summaryrefslogtreecommitdiffstats
path: root/man7
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 19:40:15 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 19:40:15 +0000
commit399644e47874bff147afb19c89228901ac39340e (patch)
tree1c4c0b733f4c16b5783b41bebb19194a9ef62ad1 /man7
parentInitial commit. (diff)
downloadmanpages-399644e47874bff147afb19c89228901ac39340e.tar.xz
manpages-399644e47874bff147afb19c89228901ac39340e.zip
Adding upstream version 6.05.01.upstream/6.05.01
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'man7')
-rw-r--r--man7/address_families.7390
-rw-r--r--man7/aio.7446
-rw-r--r--man7/armscii-8.7120
-rw-r--r--man7/arp.7306
-rw-r--r--man7/ascii.7169
-rw-r--r--man7/attributes.7865
-rw-r--r--man7/boot.7230
-rw-r--r--man7/bootparam.7664
-rw-r--r--man7/bpf-helpers.75128
-rw-r--r--man7/capabilities.71872
-rw-r--r--man7/cgroup_namespaces.7248
-rw-r--r--man7/cgroups.71914
-rw-r--r--man7/charsets.7335
-rw-r--r--man7/complex.783
-rw-r--r--man7/cp1251.7166
-rw-r--r--man7/cp1252.7156
-rw-r--r--man7/cpuset.71504
-rw-r--r--man7/credentials.7379
-rw-r--r--man7/ddp.7245
-rw-r--r--man7/environ.7354
-rw-r--r--man7/epoll.7610
-rw-r--r--man7/fanotify.71455
-rw-r--r--man7/feature_test_macros.7937
-rw-r--r--man7/fifo.770
-rw-r--r--man7/futex.7121
-rw-r--r--man7/glibc.71
-rw-r--r--man7/glob.7205
-rw-r--r--man7/hier.7654
-rw-r--r--man7/hostname.797
-rw-r--r--man7/icmp.7196
-rw-r--r--man7/inode.7481
-rw-r--r--man7/inotify.71100
-rw-r--r--man7/intro.723
-rw-r--r--man7/ip.71524
-rw-r--r--man7/ipc_namespaces.766
-rw-r--r--man7/ipv6.7416
-rw-r--r--man7/iso-8859-1.71
-rw-r--r--man7/iso-8859-10.71
-rw-r--r--man7/iso-8859-11.71
-rw-r--r--man7/iso-8859-13.71
-rw-r--r--man7/iso-8859-14.71
-rw-r--r--man7/iso-8859-15.71
-rw-r--r--man7/iso-8859-16.71
-rw-r--r--man7/iso-8859-2.71
-rw-r--r--man7/iso-8859-3.71
-rw-r--r--man7/iso-8859-4.71
-rw-r--r--man7/iso-8859-5.71
-rw-r--r--man7/iso-8859-6.71
-rw-r--r--man7/iso-8859-7.71
-rw-r--r--man7/iso-8859-8.71
-rw-r--r--man7/iso-8859-9.71
-rw-r--r--man7/iso_8859-1.7150
-rw-r--r--man7/iso_8859-10.7146
-rw-r--r--man7/iso_8859-11.7143
-rw-r--r--man7/iso_8859-13.7146
-rw-r--r--man7/iso_8859-14.7146
-rw-r--r--man7/iso_8859-15.7149
-rw-r--r--man7/iso_8859-16.7147
-rw-r--r--man7/iso_8859-2.7151
-rw-r--r--man7/iso_8859-3.7139
-rw-r--r--man7/iso_8859-4.7146
-rw-r--r--man7/iso_8859-5.7151
-rw-r--r--man7/iso_8859-6.7102
-rw-r--r--man7/iso_8859-7.7150
-rw-r--r--man7/iso_8859-8.7114
-rw-r--r--man7/iso_8859-9.7146
-rw-r--r--man7/iso_8859_1.71
-rw-r--r--man7/iso_8859_10.71
-rw-r--r--man7/iso_8859_11.71
-rw-r--r--man7/iso_8859_13.71
-rw-r--r--man7/iso_8859_14.71
-rw-r--r--man7/iso_8859_15.71
-rw-r--r--man7/iso_8859_16.71
-rw-r--r--man7/iso_8859_2.71
-rw-r--r--man7/iso_8859_3.71
-rw-r--r--man7/iso_8859_4.71
-rw-r--r--man7/iso_8859_5.71
-rw-r--r--man7/iso_8859_6.71
-rw-r--r--man7/iso_8859_7.71
-rw-r--r--man7/iso_8859_8.71
-rw-r--r--man7/iso_8859_9.71
-rw-r--r--man7/kernel_lockdown.7109
-rw-r--r--man7/keyrings.7901
-rw-r--r--man7/koi8-r.7169
-rw-r--r--man7/koi8-u.7175
-rw-r--r--man7/landlock.7586
-rw-r--r--man7/latin1.71
-rw-r--r--man7/latin10.71
-rw-r--r--man7/latin2.71
-rw-r--r--man7/latin3.71
-rw-r--r--man7/latin4.71
-rw-r--r--man7/latin5.71
-rw-r--r--man7/latin6.71
-rw-r--r--man7/latin7.71
-rw-r--r--man7/latin8.71
-rw-r--r--man7/latin9.71
-rw-r--r--man7/libc.7115
-rw-r--r--man7/locale.7379
-rw-r--r--man7/mailaddr.7134
-rw-r--r--man7/man-pages.71227
-rw-r--r--man7/man.7507
-rw-r--r--man7/math_error.7246
-rw-r--r--man7/mount_namespaces.71371
-rw-r--r--man7/mq_overview.7389
-rw-r--r--man7/namespaces.7417
-rw-r--r--man7/netdevice.7421
-rw-r--r--man7/netlink.7609
-rw-r--r--man7/network_namespaces.762
-rw-r--r--man7/nptl.7112
-rw-r--r--man7/numa.7170
-rw-r--r--man7/operator.754
-rw-r--r--man7/packet.7694
-rw-r--r--man7/path_resolution.7264
-rw-r--r--man7/persistent-keyring.7124
-rw-r--r--man7/pid_namespaces.7388
-rw-r--r--man7/pipe.7407
-rw-r--r--man7/pkeys.7237
-rw-r--r--man7/posixoptions.71014
-rw-r--r--man7/precedence.71
-rw-r--r--man7/process-keyring.755
-rw-r--r--man7/pthreads.7937
-rw-r--r--man7/pty.7158
-rw-r--r--man7/queue.7138
-rw-r--r--man7/random.7213
-rw-r--r--man7/raw.7281
-rw-r--r--man7/regex.7293
-rw-r--r--man7/rtld-audit.7606
-rw-r--r--man7/rtnetlink.7558
-rw-r--r--man7/sched.7992
-rw-r--r--man7/sem_overview.7139
-rw-r--r--man7/session-keyring.7113
-rw-r--r--man7/shm_overview.7104
-rw-r--r--man7/sigevent.7120
-rw-r--r--man7/signal-safety.7341
-rw-r--r--man7/signal.71019
-rw-r--r--man7/sock_diag.7825
-rw-r--r--man7/socket.71266
-rw-r--r--man7/spufs.7767
-rw-r--r--man7/standards.7303
-rw-r--r--man7/string_copying.7816
-rw-r--r--man7/suffixes.7265
-rw-r--r--man7/svipc.71
-rw-r--r--man7/symlink.7564
-rw-r--r--man7/system_data_types.7320
-rw-r--r--man7/sysvipc.799
-rw-r--r--man7/tcp.71563
-rw-r--r--man7/termio.745
-rw-r--r--man7/thread-keyring.750
-rw-r--r--man7/time.7218
-rw-r--r--man7/time_namespaces.7345
-rw-r--r--man7/tis-620.71
-rw-r--r--man7/udp.7312
-rw-r--r--man7/udplite.7137
-rw-r--r--man7/unicode.7246
-rw-r--r--man7/units.7108
-rw-r--r--man7/unix.71205
-rw-r--r--man7/uri.7761
-rw-r--r--man7/url.71
-rw-r--r--man7/urn.71
-rw-r--r--man7/user-keyring.781
-rw-r--r--man7/user-session-keyring.792
-rw-r--r--man7/user_namespaces.71469
-rw-r--r--man7/utf-8.7211
-rw-r--r--man7/utf8.71
-rw-r--r--man7/uts_namespaces.746
-rw-r--r--man7/vdso.7612
-rw-r--r--man7/vsock.7232
-rw-r--r--man7/x25.7122
-rw-r--r--man7/xattr.7180
169 files changed, 56310 insertions, 0 deletions
diff --git a/man7/address_families.7 b/man7/address_families.7
new file mode 100644
index 0000000..4a75b72
--- /dev/null
+++ b/man7/address_families.7
@@ -0,0 +1,390 @@
+.\" Copyright (c) 2018 by Eugene Syromyatnikov <evgsyr@gmail.com>,
+.\" and Copyright (c) 2018 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH address_families 7 2023-01-22 "Linux man-pages 6.05.01"
+.SH NAME
+address_families \- socket address families (domains)
+.SH SYNOPSIS
+.nf
+.BR "#include <sys/types.h>" " /* See NOTES */"
+.B #include <sys/socket.h>
+.PP
+.BI "int socket(int " domain ", int " type ", int " protocol );
+.fi
+.SH DESCRIPTION
+The
+.I domain
+argument of the
+.BR socket (2)
+specifies a communication domain; this selects the protocol
+family which will be used for communication.
+These families are defined in
+.IR <sys/socket.h> .
+The formats currently understood by the Linux kernel include:
+.TP
+.BR AF_UNIX ", " AF_LOCAL
+Local communication.
+For further information, see
+.BR unix (7).
+.TP
+.B AF_INET
+IPv4 Internet protocols.
+For further information, see
+.BR ip (7).
+.TP
+.B AF_AX25
+Amateur radio AX.25 protocol.
+For further information, see
+.BR ax25 (4).
+.\" Part of ax25-tools
+.TP
+.B AF_IPX
+IPX \- Novell protocols.
+.TP
+.B AF_APPLETALK
+AppleTalk
+For further information, see
+.BR ddp (7).
+.TP
+.B AF_NETROM
+AX.25 packet layer protocol.
+For further information, see
+.BR netrom (4),
+.\" Part of ax25-tools package
+.UR https://www.tldp.org/HOWTO/AX25-HOWTO/x61.html
+.I The Packet Radio Protocols and Linux
+.UE
+and the
+.IR AX.25 ", " NET/ROM ", and " "ROSE network programming"
+chapters of the
+.UR https://www.tldp.org/HOWTO/AX25-HOWTO/x2107.html
+.I Linux Amateur Radio AX.25 HOWTO
+.UE .
+.TP
+.B AF_BRIDGE
+Can't be used for creating sockets;
+mostly used for bridge links in
+.BR rtnetlink (7)
+protocol commands.
+.TP
+.B AF_ATMPVC
+Access to raw ATM Permanent Virtual Circuits (PVCs).
+For further information, see the
+.UR https://www.tldp.org/HOWTO/text/ATM-Linux-HOWTO
+.I ATM on Linux HOWTO
+.UE .
+.TP
+.B AF_X25
+ITU-T X.25 / ISO-8208 protocol.
+For further information, see
+.BR x25 (7).
+.TP
+.B AF_INET6
+IPv6 Internet protocols.
+For further information, see
+.BR ipv6 (7).
+.TP
+.B AF_ROSE
+RATS (Radio Amateur Telecommunications Society).
+Open Systems environment (ROSE) AX.25 packet layer protocol.
+For further information, see the resources listed for
+.BR AF_NETROM .
+.TP
+.B AF_DECnet
+DECet protocol sockets.
+See
+.I Documentation/networking/decnet.txt
+in the Linux kernel source tree for details.
+.TP
+.B AF_NETBEUI
+Reserved for "802.2LLC project"; never used.
+.TP
+.B AF_SECURITY
+This was a short-lived (between Linux 2.1.30 and 2.1.99pre2) protocol family
+for firewall upcalls.
+.TP
+.B AF_KEY
+Key management protocol, originally developed for usage with IPsec
+(since Linux 2.1.38).
+This has no relation to
+.BR keyctl (2)
+and the in-kernel key storage facility.
+See
+.UR https://tools.ietf.org/html/rfc2367
+RFC 2367
+.I PF_KEY Key Management API, Version 2
+.UE
+for details.
+.TP
+.B AF_NETLINK
+Kernel user interface device.
+For further information, see
+.BR netlink (7).
+.TP
+.B AF_PACKET
+Low-level packet interface.
+For further information, see
+.BR packet (7).
+.\" .TP
+.\" .B AF_ASH
+.\" Asynchronous Serial Host protocol (?)
+.\" Notes from Eugene Syromyatnikov:
+.\" I haven't found any concrete information about this one;
+.\" it never was implemented in Linux, at least, judging by historical
+.\" repos. There is also this file (and its variations):
+.\" https://github.com/ecki/net-tools/blob/master/lib/ash.c
+.\" ( https://github.com/ecki/net-tools/commits/master/lib/ash.c )
+.\" it mentions "NET-2 distribution" (BSD Net/2?), but, again, I failed
+.\" to find any mentions of "ash" protocol there.
+.\" (for the reference:
+.\" ftp://pdp11.org.ru/pub/unix-archive/Distributions/UCB/Net2/net2.tar.gz )
+.\" Another source that mentions it is
+.\" https://www.silabs.com/documents/public/user-guides/ug101-uart-gateway-protocol-reference.pdf
+.\" https://www.silabs.com/documents/public/user-guides/ug115-ashv3-protocol-reference.pdf
+.\" but I doubt that it's related, as former files use 64-byte addresses and
+.\" "Hamming-encode of hops", and that's barely combines with a protocol
+.\" that is mainly used over serial connection.
+.TP
+.B AF_ECONET
+.\" commit: 349f29d841dbae854bd7367be7c250401f974f47
+Acorn Econet protocol (removed in Linux 3.5).
+See the
+.UR http://www.8bs.com/othrdnld/manuals/econet.shtml
+Econet documentation
+.UE
+for details.
+.TP
+.B AF_ATMSVC
+Access to ATM Switched Virtual Circuits (SVCs)
+See the
+.UR https://www.tldp.org/HOWTO/text/ATM-Linux-HOWTO
+.I ATM on Linux HOWTO
+.UE
+for details.
+.TP
+.B AF_RDS
+.\" commit: 639b321b4d8f4e412bfbb2a4a19bfebc1e68ace4
+Reliable Datagram Sockets (RDS) protocol (since Linux 2.6.30).
+RDS over RDMA has no relation to
+.B AF_SMC
+or
+.BR AF_XDP .
+For further information, see
+.\" rds-tools: https://github.com/oracle/rds-tools/blob/master/rds.7
+.\" rds-tools: https://github.com/oracle/rds-tools/blob/master/rds-rdma.7
+.BR rds (7),
+.BR rds\-rdma (7),
+and
+.I Documentation/networking/rds.txt
+in the Linux kernel source tree.
+.TP
+.B AF_IRDA
+.\" commits: 1ca163afb6fd569b, d64c2a76123f0300
+Socket interface over IrDA
+(moved to staging in Linux 4.14, removed in Linux 4.17).
+.\" irda-utils: https://sourceforge.net/p/irda/code/HEAD/tree/tags/IRDAUTILS_0_9_18/irda-utils/man/irda.7.gz?format=raw
+For further information, see
+.BR irda (7).
+.TP
+.B AF_PPPOX
+Generic PPP transport layer, for setting up L2 tunnels
+(L2TP and PPPoE).
+See
+.I Documentation/networking/l2tp.txt
+in the Linux kernel source tree for details.
+.TP
+.B AF_WANPIPE
+.\" commits: ce0ecd594d78710422599918a608e96dd1ee6024
+Legacy protocol for wide area network (WAN) connectivity
+that was used by Sangoma WAN cards (called "WANPIPE");
+removed in Linux 2.6.21.
+.TP
+.B AF_LLC
+.\" linux-history commit: 34beb106cde7da233d4df35dd3d6cf4fee937caa
+Logical link control (IEEE 802.2 LLC) protocol, upper part
+of data link layer of ISO/OSI networking protocol stack
+(since Linux 2.4);
+has no relation to
+.BR AF_PACKET .
+See chapter
+.I 13.5.3. Logical Link Control
+in
+.I Understanding Linux Kernel Internals
+(O'Reilly Media, 2006)
+and
+.I IEEE Standards for Local Area Networks: Logical Link Control
+(The Institute of Electronics and Electronics Engineers, Inc.,
+New York, New York, 1985)
+for details.
+See also
+.UR https://wiki.linuxfoundation.org/networking/llc
+some historical notes
+.UE
+regarding its development.
+.TP
+.B AF_IB
+.\" commits: 8d36eb01da5d371f..ce117ffac2e93334
+InfiniBand native addressing (since Linux 3.11).
+.TP
+.B AF_MPLS
+.\" commits: 0189197f441602acdca3f97750d392a895b778fd
+Multiprotocol Label Switching (since Linux 4.1);
+mostly used for configuring MPLS routing via
+.BR netlink (7),
+as it doesn't expose ability to create sockets to user space.
+.TP
+.B AF_CAN
+.\" commits: 8dbde28d9711475a..5423dd67bd0108a1
+Controller Area Network automotive bus protocol (since Linux 2.6.25).
+See
+.I Documentation/networking/can.rst
+in the Linux kernel source tree for details.
+.TP
+.B AF_TIPC
+.\" commits: b97bf3fd8f6a16966d4f18983b2c40993ff937d4
+TIPC, "cluster domain sockets" protocol (since Linux 2.6.16).
+See
+.UR http://tipc.io/programming.html
+.I TIPC Programmer's Guide
+.UE
+and the
+.UR http://tipc.io/protocol.html
+protocol description
+.UE
+for details.
+.TP
+.B AF_BLUETOOTH
+.\" commits: 8d36eb01da5d371f..ce117ffac2e93334
+Bluetooth low-level socket protocol (since Linux 3.11).
+See
+.UR https://git.kernel.org\:/pub/scm\:/bluetooth/bluez.git\:/tree/doc/mgmt-api.txt
+.I Bluetooth Management API overview
+.UE
+and
+.UR https://people.csail.mit.edu/albert/bluez-intro/
+.I An Introduction to Bluetooth Programming
+by Albert Huang
+.UE
+for details.
+.TP
+.B AF_IUCV
+.\" commit: eac3731bd04c7131478722a3c148b78774553116
+IUCV (inter-user communication vehicle) z/VM protocol
+for hypervisor-guest interaction (since Linux 2.6.21);
+has no relation to
+.B AF_VSOCK
+and/or
+.B AF_SMC
+See
+.UR https://www.ibm.com\:/support\:/knowledgecenter\:/en/SSB27U_6.4.0\:/com.ibm.zvm.v640.hcpb4\:/iucv.htm
+.I IUCV protocol overview
+.UE
+for details.
+.TP
+.B AF_RXRPC
+.\" commit: 17926a79320afa9b95df6b977b40cca6d8713cea
+.\" http://people.redhat.com/~dhowells/rxrpc/
+.\" https://www.infradead.org/~dhowells/kafs/af_rxrpc_client.html
+.\" http://workshop.openafs.org/afsbpw09/talks/thu_2/kafs.pdf
+.\" http://pages.cs.wisc.edu/~remzi/OSTEP/dist-afs.pdf
+.\" http://web.mit.edu/kolya/afs/rx/rx-spec
+Rx, Andrew File System remote procedure call protocol
+(since Linux 2.6.22).
+See
+.I Documentation/networking/rxrpc.txt
+in the Linux kernel source tree for details.
+.TP
+.B AF_ISDN
+.\" commit: 1b2b03f8e514e4f68e293846ba511a948b80243c
+New "modular ISDN" driver interface protocol (since Linux 2.6.27).
+See the
+.UR http://www.misdn.eu/wiki/Main_Page/
+mISDN wiki
+.UE
+for details.
+.TP
+.B AF_PHONET
+.\" commit: 4b07b3f69a8471cdc142c51461a331226fef248a
+Nokia cellular modem IPC/RPC interface (since Linux 2.6.31).
+See
+.I Documentation/networking/phonet.txt
+in the Linux kernel source tree for details.
+.TP
+.B AF_IEEE802154
+.\" commit: 9ec7671603573ede31207eb5b0b3e1aa211b2854
+IEEE 802.15.4 WPAN (wireless personal area network) raw packet protocol
+(since Linux 2.6.31).
+See
+.I Documentation/networking/ieee802154.txt
+in the Linux kernel source tree for details.
+.TP
+.B AF_CAIF
+.\" commit: 529d6dad5bc69de14cdd24831e2a14264e93daa4
+.\" https://lwn.net/Articles/371017/
+.\" http://read.pudn.com/downloads157/doc/comm/698729/Misc/caif/Com%20CPU%20to%20Appl%20CPU%20Interface%20DESCRIPTION_LZN901%202002_revR1C.pdf
+.\" http://read.pudn.com/downloads157/doc/comm/698729/Misc/caif/Com%20CPU%20to%20Appl%20CPU%20Interface%20PROTOCOL%20SPECIFICATION_LZN901%201708_revR1A.pdf
+Ericsson's Communication CPU to Application CPU interface (CAIF) protocol
+(since Linux 2.6.36).
+See
+.I Documentation/networking/caif/Linux\-CAIF.txt
+in the Linux kernel source tree for details.
+.TP
+.B AF_ALG
+Interface to kernel crypto API (since Linux 2.6.38).
+See
+.I Documentation/crypto/userspace\-if.rst
+in the Linux kernel source tree for details.
+.TP
+.B AF_VSOCK
+.\" commit: d021c344051af91f42c5ba9fdedc176740cbd238
+VMWare VSockets protocol for hypervisor-guest interaction (since Linux 3.9);
+has no relation to
+.B AF_IUCV
+and
+.BR AF_SMC .
+For further information, see
+.BR vsock (7).
+.TP
+.B AF_KCM
+.\" commit: 03c8efc1ffeb6b82a22c1af8dd908af349563314
+KCM (kernel connection multiplexer) interface (since Linux 4.6).
+See
+.I Documentation/networking/kcm.txt
+in the Linux kernel source tree for details.
+.TP
+.B AF_QIPCRTR
+.\" commit: bdabad3e363d825ddf9679dd431cca0b2c30f881
+Qualcomm IPC router interface protocol (since Linux 4.7).
+.TP
+.B AF_SMC
+.\" commit: f3a3e248f3f7cd9a4bed334022704d7e7fc781bf
+SMC-R (shared memory communications over RDMA) protocol (since Linux 4.11),
+and SMC-D (shared memory communications, direct memory access) protocol
+for intra-node z/VM quest interaction (since Linux 4.19);
+has no relation to
+.BR AF_RDS ", " AF_IUCV
+or
+.BR AF_VSOCK .
+See
+.UR https://tools.ietf.org/html/rfc7609
+RFC 7609
+.I IBM's Shared Memory Communications over RDMA (SMC-R) Protocol
+.UE
+for details regarding SMC-R.
+See
+.UR https://www-01.ibm.com\:/software/network\:/commserver\:/SMC-D/index.html
+.I SMC-D Reference Information
+.UE
+for details regarding SMC-D.
+.TP
+.B AF_XDP
+.\" commit: c0c77d8fb787cfe0c3fca689c2a30d1dad4eaba7
+XDP (express data path) interface (since Linux 4.18).
+See
+.I Documentation/networking/af_xdp.rst
+in the Linux kernel source tree for details.
+.SH SEE ALSO
+.BR socket (2),
+.BR socket (7)
diff --git a/man7/aio.7 b/man7/aio.7
new file mode 100644
index 0000000..64c0db1
--- /dev/null
+++ b/man7/aio.7
@@ -0,0 +1,446 @@
+.\" Copyright (c) 2010 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH AIO 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+aio \- POSIX asynchronous I/O overview
+.SH DESCRIPTION
+The POSIX asynchronous I/O (AIO) interface allows applications
+to initiate one or more I/O operations that are performed
+asynchronously (i.e., in the background).
+The application can elect to be notified of completion of
+the I/O operation in a variety of ways:
+by delivery of a signal, by instantiation of a thread,
+or no notification at all.
+.PP
+The POSIX AIO interface consists of the following functions:
+.TP
+.BR aio_read (3)
+Enqueue a read request.
+This is the asynchronous analog of
+.BR read (2).
+.TP
+.BR aio_write (3)
+Enqueue a write request.
+This is the asynchronous analog of
+.BR write (2).
+.TP
+.BR aio_fsync (3)
+Enqueue a sync request for the I/O operations on a file descriptor.
+This is the asynchronous analog of
+.BR fsync (2)
+and
+.BR fdatasync (2).
+.TP
+.BR aio_error (3)
+Obtain the error status of an enqueued I/O request.
+.TP
+.BR aio_return (3)
+Obtain the return status of a completed I/O request.
+.TP
+.BR aio_suspend (3)
+Suspend the caller until one or more of a specified set of
+I/O requests completes.
+.TP
+.BR aio_cancel (3)
+Attempt to cancel outstanding I/O requests on a specified
+file descriptor.
+.TP
+.BR lio_listio (3)
+Enqueue multiple I/O requests using a single function call.
+.PP
+The
+.I aiocb
+("asynchronous I/O control block") structure defines
+parameters that control an I/O operation.
+An argument of this type is employed with all of the functions listed above.
+This structure has the following form:
+.PP
+.in +4n
+.EX
+#include <aiocb.h>
+\&
+struct aiocb {
+ /* The order of these fields is implementation\-dependent */
+\&
+ int aio_fildes; /* File descriptor */
+ off_t aio_offset; /* File offset */
+ volatile void *aio_buf; /* Location of buffer */
+ size_t aio_nbytes; /* Length of transfer */
+ int aio_reqprio; /* Request priority */
+ struct sigevent aio_sigevent; /* Notification method */
+ int aio_lio_opcode; /* Operation to be performed;
+ lio_listio() only */
+\&
+ /* Various implementation\-internal fields not shown */
+};
+\&
+/* Operation codes for \[aq]aio_lio_opcode\[aq]: */
+\&
+enum { LIO_READ, LIO_WRITE, LIO_NOP };
+.EE
+.in
+.PP
+The fields of this structure are as follows:
+.TP
+.I aio_fildes
+The file descriptor on which the I/O operation is to be performed.
+.TP
+.I aio_offset
+This is the file offset at which the I/O operation is to be performed.
+.TP
+.I aio_buf
+This is the buffer used to transfer data for a read or write operation.
+.TP
+.I aio_nbytes
+This is the size of the buffer pointed to by
+.IR aio_buf .
+.TP
+.I aio_reqprio
+This field specifies a value that is subtracted
+from the calling thread's real-time priority in order to
+determine the priority for execution of this I/O request (see
+.BR pthread_setschedparam (3)).
+The specified value must be between 0 and the value returned by
+.IR sysconf(_SC_AIO_PRIO_DELTA_MAX) .
+This field is ignored for file synchronization operations.
+.TP
+.I aio_sigevent
+This field is a structure that specifies how the caller is
+to be notified when the asynchronous I/O operation completes.
+Possible values for
+.I aio_sigevent.sigev_notify
+are
+.BR SIGEV_NONE ,
+.BR SIGEV_SIGNAL ,
+and
+.BR SIGEV_THREAD .
+See
+.BR sigevent (7)
+for further details.
+.TP
+.I aio_lio_opcode
+The type of operation to be performed; used only for
+.BR lio_listio (3).
+.PP
+In addition to the standard functions listed above,
+the GNU C library provides the following extension to the POSIX AIO API:
+.TP
+.BR aio_init (3)
+Set parameters for tuning the behavior of the glibc POSIX AIO implementation.
+.SH ERRORS
+.TP
+.B EINVAL
+The
+.I aio_reqprio
+field of the
+.I aiocb
+structure was less than 0,
+or was greater than the limit returned by the call
+.IR sysconf(_SC_AIO_PRIO_DELTA_MAX) .
+.SH STANDARDS
+POSIX.1-2008.
+.SH HISTORY
+POSIX.1-2001.
+glibc 2.1.
+.SH NOTES
+It is a good idea to zero out the control block buffer before use (see
+.BR memset (3)).
+The control block buffer and the buffer pointed to by
+.I aio_buf
+must not be changed while the I/O operation is in progress.
+These buffers must remain valid until the I/O operation completes.
+.PP
+Simultaneous asynchronous read or write operations using the same
+.I aiocb
+structure yield undefined results.
+.PP
+The current Linux POSIX AIO implementation is provided in user space by glibc.
+This has a number of limitations, most notably that maintaining multiple
+threads to perform I/O operations is expensive and scales poorly.
+Work has been in progress for some time on a kernel
+state-machine-based implementation of asynchronous I/O
+(see
+.BR io_submit (2),
+.BR io_setup (2),
+.BR io_cancel (2),
+.BR io_destroy (2),
+.BR io_getevents (2)),
+but this implementation hasn't yet matured to the point where
+the POSIX AIO implementation can be completely
+reimplemented using the kernel system calls.
+.\" http://lse.sourceforge.net/io/aio.html
+.\" http://lse.sourceforge.net/io/aionotes.txt
+.\" http://lwn.net/Articles/148755/
+.SH EXAMPLES
+The program below opens each of the files named in its command-line
+arguments and queues a request on the resulting file descriptor using
+.BR aio_read (3).
+The program then loops,
+periodically monitoring each of the I/O operations
+that is still in progress using
+.BR aio_error (3).
+Each of the I/O requests is set up to provide notification by delivery
+of a signal.
+After all I/O requests have completed,
+the program retrieves their status using
+.BR aio_return (3).
+.PP
+The
+.B SIGQUIT
+signal (generated by typing control-\e) causes the program to request
+cancelation of each of the outstanding requests using
+.BR aio_cancel (3).
+.PP
+Here is an example of what we might see when running this program.
+In this example, the program queues two requests to standard input,
+and these are satisfied by two lines of input containing
+"abc" and "x".
+.PP
+.in +4n
+.EX
+$ \fB./a.out /dev/stdin /dev/stdin\fP
+opened /dev/stdin on descriptor 3
+opened /dev/stdin on descriptor 4
+aio_error():
+ for request 0 (descriptor 3): In progress
+ for request 1 (descriptor 4): In progress
+\fBabc\fP
+I/O completion signal received
+aio_error():
+ for request 0 (descriptor 3): I/O succeeded
+ for request 1 (descriptor 4): In progress
+aio_error():
+ for request 1 (descriptor 4): In progress
+\fBx\fP
+I/O completion signal received
+aio_error():
+ for request 1 (descriptor 4): I/O succeeded
+All I/O requests completed
+aio_return():
+ for request 0 (descriptor 3): 4
+ for request 1 (descriptor 4): 2
+.EE
+.in
+.SS Program source
+\&
+.EX
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <aio.h>
+#include <signal.h>
+\&
+#define BUF_SIZE 20 /* Size of buffers for read operations */
+\&
+#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)
+\&
+struct ioRequest { /* Application\-defined structure for tracking
+ I/O requests */
+ int reqNum;
+ int status;
+ struct aiocb *aiocbp;
+};
+\&
+static volatile sig_atomic_t gotSIGQUIT = 0;
+ /* On delivery of SIGQUIT, we attempt to
+ cancel all outstanding I/O requests */
+\&
+static void /* Handler for SIGQUIT */
+quitHandler(int sig)
+{
+ gotSIGQUIT = 1;
+}
+\&
+#define IO_SIGNAL SIGUSR1 /* Signal used to notify I/O completion */
+\&
+static void /* Handler for I/O completion signal */
+aioSigHandler(int sig, siginfo_t *si, void *ucontext)
+{
+ if (si\->si_code == SI_ASYNCIO) {
+ write(STDOUT_FILENO, "I/O completion signal received\en", 31);
+\&
+ /* The corresponding ioRequest structure would be available as
+ struct ioRequest *ioReq = si\->si_value.sival_ptr;
+ and the file descriptor would then be available via
+ ioReq\->aiocbp\->aio_fildes */
+ }
+}
+\&
+int
+main(int argc, char *argv[])
+{
+ struct sigaction sa;
+ int s;
+ int numReqs; /* Total number of queued I/O requests */
+ int openReqs; /* Number of I/O requests still in progress */
+\&
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <pathname> <pathname>...\en",
+ argv[0]);
+ exit(EXIT_FAILURE);
+ }
+\&
+ numReqs = argc \- 1;
+\&
+ /* Allocate our arrays. */
+\&
+ struct ioRequest *ioList = calloc(numReqs, sizeof(*ioList));
+ if (ioList == NULL)
+ errExit("calloc");
+\&
+ struct aiocb *aiocbList = calloc(numReqs, sizeof(*aiocbList));
+ if (aiocbList == NULL)
+ errExit("calloc");
+\&
+ /* Establish handlers for SIGQUIT and the I/O completion signal. */
+\&
+ sa.sa_flags = SA_RESTART;
+ sigemptyset(&sa.sa_mask);
+\&
+ sa.sa_handler = quitHandler;
+ if (sigaction(SIGQUIT, &sa, NULL) == \-1)
+ errExit("sigaction");
+\&
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sa.sa_sigaction = aioSigHandler;
+ if (sigaction(IO_SIGNAL, &sa, NULL) == \-1)
+ errExit("sigaction");
+\&
+ /* Open each file specified on the command line, and queue
+ a read request on the resulting file descriptor. */
+\&
+ for (size_t j = 0; j < numReqs; j++) {
+ ioList[j].reqNum = j;
+ ioList[j].status = EINPROGRESS;
+ ioList[j].aiocbp = &aiocbList[j];
+\&
+ ioList[j].aiocbp\->aio_fildes = open(argv[j + 1], O_RDONLY);
+ if (ioList[j].aiocbp\->aio_fildes == \-1)
+ errExit("open");
+ printf("opened %s on descriptor %d\en", argv[j + 1],
+ ioList[j].aiocbp\->aio_fildes);
+\&
+ ioList[j].aiocbp\->aio_buf = malloc(BUF_SIZE);
+ if (ioList[j].aiocbp\->aio_buf == NULL)
+ errExit("malloc");
+\&
+ ioList[j].aiocbp\->aio_nbytes = BUF_SIZE;
+ ioList[j].aiocbp\->aio_reqprio = 0;
+ ioList[j].aiocbp\->aio_offset = 0;
+ ioList[j].aiocbp\->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ ioList[j].aiocbp\->aio_sigevent.sigev_signo = IO_SIGNAL;
+ ioList[j].aiocbp\->aio_sigevent.sigev_value.sival_ptr =
+ &ioList[j];
+\&
+ s = aio_read(ioList[j].aiocbp);
+ if (s == \-1)
+ errExit("aio_read");
+ }
+\&
+ openReqs = numReqs;
+\&
+ /* Loop, monitoring status of I/O requests. */
+\&
+ while (openReqs > 0) {
+ sleep(3); /* Delay between each monitoring step */
+\&
+ if (gotSIGQUIT) {
+\&
+ /* On receipt of SIGQUIT, attempt to cancel each of the
+ outstanding I/O requests, and display status returned
+ from the cancelation requests. */
+\&
+ printf("got SIGQUIT; canceling I/O requests: \en");
+\&
+ for (size_t j = 0; j < numReqs; j++) {
+ if (ioList[j].status == EINPROGRESS) {
+ printf(" Request %zu on descriptor %d:", j,
+ ioList[j].aiocbp\->aio_fildes);
+ s = aio_cancel(ioList[j].aiocbp\->aio_fildes,
+ ioList[j].aiocbp);
+ if (s == AIO_CANCELED)
+ printf("I/O canceled\en");
+ else if (s == AIO_NOTCANCELED)
+ printf("I/O not canceled\en");
+ else if (s == AIO_ALLDONE)
+ printf("I/O all done\en");
+ else
+ perror("aio_cancel");
+ }
+ }
+\&
+ gotSIGQUIT = 0;
+ }
+\&
+ /* Check the status of each I/O request that is still
+ in progress. */
+\&
+ printf("aio_error():\en");
+ for (size_t j = 0; j < numReqs; j++) {
+ if (ioList[j].status == EINPROGRESS) {
+ printf(" for request %zu (descriptor %d): ",
+ j, ioList[j].aiocbp\->aio_fildes);
+ ioList[j].status = aio_error(ioList[j].aiocbp);
+\&
+ switch (ioList[j].status) {
+ case 0:
+ printf("I/O succeeded\en");
+ break;
+ case EINPROGRESS:
+ printf("In progress\en");
+ break;
+ case ECANCELED:
+ printf("Canceled\en");
+ break;
+ default:
+ perror("aio_error");
+ break;
+ }
+\&
+ if (ioList[j].status != EINPROGRESS)
+ openReqs\-\-;
+ }
+ }
+ }
+\&
+ printf("All I/O requests completed\en");
+\&
+ /* Check status return of all I/O requests. */
+\&
+ printf("aio_return():\en");
+ for (size_t j = 0; j < numReqs; j++) {
+ ssize_t s;
+\&
+ s = aio_return(ioList[j].aiocbp);
+ printf(" for request %zu (descriptor %d): %zd\en",
+ j, ioList[j].aiocbp\->aio_fildes, s);
+ }
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.SH SEE ALSO
+.ad l
+.nh
+.BR io_cancel (2),
+.BR io_destroy (2),
+.BR io_getevents (2),
+.BR io_setup (2),
+.BR io_submit (2),
+.BR aio_cancel (3),
+.BR aio_error (3),
+.BR aio_init (3),
+.BR aio_read (3),
+.BR aio_return (3),
+.BR aio_write (3),
+.BR lio_listio (3)
+.PP
+"Asynchronous I/O Support in Linux 2.5",
+Bhattacharya, Pratt, Pulavarty, and Morgan,
+Proceedings of the Linux Symposium, 2003,
+.UR https://www.kernel.org/doc/ols/2003/ols2003\-pages\-351\-366.pdf
+.UE
diff --git a/man7/armscii-8.7 b/man7/armscii-8.7
new file mode 100644
index 0000000..2ef36ab
--- /dev/null
+++ b/man7/armscii-8.7
@@ -0,0 +1,120 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis <edimitro at tee.gr>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ARMSCII-8 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+armscii-8 \- Armenian character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The Armenian Standard Code for Information Interchange,
+8-bit coded character set.
+.SS ArmSCII-8 characters
+The following table displays the characters in ArmSCII-8 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+242 162 A2 և ARMENIAN SMALL LIGATURE ECH YIWN
+243 163 A3 ։ ARMENIAN FULL STOP
+244 164 A4 ) RIGHT PARENTHESIS
+245 165 A5 ( LEFT PARENTHESIS
+246 166 A6 » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+247 167 A7 « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+250 168 A8 — EM DASH
+251 169 A9 . FULL STOP
+252 170 AA ՝ ARMENIAN COMMA
+253 171 AB , COMMA
+254 172 AC - HYPHEN-MINUS
+255 173 AD ֊ ARMENIAN HYPHEN
+256 174 AE … HORIZONTAL ELLIPSIS
+257 175 AF ՜ ARMENIAN EXCLAMATION MARK
+260 176 B0 ՛ ARMENIAN EMPHASIS MARK
+261 177 B1 ՞ ARMENIAN QUESTION MARK
+262 178 B2 Ա ARMENIAN CAPITAL LETTER AYB
+263 179 B3 ա ARMENIAN SMALL LETTER AYB
+264 180 B4 Բ ARMENIAN CAPITAL LETTER BEN
+265 181 B5 բ ARMENIAN SMALL LETTER BEN
+266 182 B6 Գ ARMENIAN CAPITAL LETTER GIM
+267 183 B7 գ ARMENIAN SMALL LETTER GIM
+270 184 B8 Դ ARMENIAN CAPITAL LETTER DA
+271 185 B9 դ ARMENIAN SMALL LETTER DA
+272 186 BA Ե ARMENIAN CAPITAL LETTER ECH
+273 187 BB ե ARMENIAN SMALL LETTER ECH
+274 188 BC Զ ARMENIAN CAPITAL LETTER ZA
+275 189 BD զ ARMENIAN SMALL LETTER ZA
+276 190 BE Է ARMENIAN CAPITAL LETTER EH
+277 191 BF է ARMENIAN SMALL LETTER EH
+300 192 C0 Ը ARMENIAN CAPITAL LETTER ET
+301 193 C1 ը ARMENIAN SMALL LETTER ET
+302 194 C2 Թ ARMENIAN CAPITAL LETTER TO
+303 195 C3 թ ARMENIAN SMALL LETTER TO
+304 196 C4 Ժ ARMENIAN CAPITAL LETTER ZHE
+305 197 C5 ժ ARMENIAN SMALL LETTER ZHE
+306 198 C6 Ի ARMENIAN CAPITAL LETTER INI
+307 199 C7 ի ARMENIAN SMALL LETTER INI
+310 200 C8 Լ ARMENIAN CAPITAL LETTER LIWN
+311 201 C9 լ ARMENIAN SMALL LETTER LIWN
+312 202 CA Խ ARMENIAN CAPITAL LETTER XEH
+313 203 CB խ ARMENIAN SMALL LETTER XEH
+314 204 CC Ծ ARMENIAN CAPITAL LETTER CA
+315 205 CD ծ ARMENIAN SMALL LETTER CA
+316 206 CE Կ ARMENIAN CAPITAL LETTER KEN
+317 207 CF կ ARMENIAN SMALL LETTER KEN
+320 208 D0 Հ ARMENIAN CAPITAL LETTER HO
+321 209 D1 հ ARMENIAN SMALL LETTER HO
+322 210 D2 Ձ ARMENIAN CAPITAL LETTER JA
+323 211 D3 ձ ARMENIAN SMALL LETTER JA
+324 212 D4 Ղ ARMENIAN CAPITAL LETTER GHAD
+325 213 D5 ղ ARMENIAN SMALL LETTER GHAD
+326 214 D6 Ճ ARMENIAN CAPITAL LETTER CHEH
+327 215 D7 ճ ARMENIAN SMALL LETTER CHEH
+330 216 D8 Մ ARMENIAN CAPITAL LETTER MEN
+331 217 D9 մ ARMENIAN SMALL LETTER MEN
+332 218 DA Յ ARMENIAN CAPITAL LETTER YI
+333 219 DB յ ARMENIAN SMALL LETTER YI
+334 220 DC Ն ARMENIAN CAPITAL LETTER NOW
+335 221 DD ն ARMENIAN SMALL LETTER NOW
+336 222 DE Շ ARMENIAN CAPITAL LETTER SHA
+337 223 DF շ ARMENIAN SMALL LETTER SHA
+340 224 E0 Ո ARMENIAN CAPITAL LETTER VO
+341 225 E1 ո ARMENIAN SMALL LETTER VO
+342 226 E2 Չ ARMENIAN CAPITAL LETTER CHA
+343 227 E3 չ ARMENIAN SMALL LETTER CHA
+344 228 E4 Պ ARMENIAN CAPITAL LETTER PEH
+345 229 E5 պ ARMENIAN SMALL LETTER PEH
+346 230 E6 Ջ ARMENIAN CAPITAL LETTER JHEH
+347 231 E7 ջ ARMENIAN SMALL LETTER JHEH
+350 232 E8 Ռ ARMENIAN CAPITAL LETTER RA
+351 233 E9 ռ ARMENIAN SMALL LETTER RA
+352 234 EA Ս ARMENIAN CAPITAL LETTER SEH
+353 235 EB ս ARMENIAN SMALL LETTER SEH
+354 236 EC Վ ARMENIAN CAPITAL LETTER VEW
+355 237 ED վ ARMENIAN SMALL LETTER VEW
+356 238 EE Տ ARMENIAN CAPITAL LETTER TIWN
+357 239 EF տ ARMENIAN SMALL LETTER TIWN
+360 240 F0 Ր ARMENIAN CAPITAL LETTER REH
+361 241 F1 ր ARMENIAN SMALL LETTER REH
+362 242 F2 Ց ARMENIAN CAPITAL LETTER CO
+363 243 F3 ց ARMENIAN SMALL LETTER CO
+364 244 F4 Ւ ARMENIAN CAPITAL LETTER YIWN
+365 245 F5 ւ ARMENIAN SMALL LETTER YIWN
+366 246 F6 Փ ARMENIAN CAPITAL LETTER PIWR
+367 247 F7 փ ARMENIAN SMALL LETTER PIWR
+370 248 F8 Ք ARMENIAN CAPITAL LETTER KEH
+371 249 F9 ք ARMENIAN SMALL LETTER KEH
+372 250 FA Օ ARMENIAN CAPITAL LETTER OH
+373 251 FB օ ARMENIAN SMALL LETTER OH
+374 252 FC Ֆ ARMENIAN CAPITAL LETTER FEH
+375 253 FD ֆ ARMENIAN SMALL LETTER FEH
+376 254 FE ՚ ARMENIAN APOSTROPHE
+.TE
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/arp.7 b/man7/arp.7
new file mode 100644
index 0000000..a4ca6a6
--- /dev/null
+++ b/man7/arp.7
@@ -0,0 +1,306 @@
+'\" t
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Matthew Wilcox <willy@bofh.ai>.
+.\"
+.\" Modified June 1999 Andi Kleen
+.\" $Id: arp.7,v 1.10 2000/04/27 19:31:38 ak Exp $
+.\"
+.TH arp 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+arp \- Linux ARP kernel module.
+.SH DESCRIPTION
+This kernel protocol module implements the Address Resolution
+Protocol defined in RFC\ 826.
+It is used to convert between Layer2 hardware addresses
+and IPv4 protocol addresses on directly connected networks.
+The user normally doesn't interact directly with this module except to
+configure it;
+instead it provides a service for other protocols in the kernel.
+.PP
+A user process can receive ARP packets by using
+.BR packet (7)
+sockets.
+There is also a mechanism for managing the ARP cache
+in user-space by using
+.BR netlink (7)
+sockets.
+The ARP table can also be controlled via
+.BR ioctl (2)
+on any
+.B AF_INET
+socket.
+.PP
+The ARP module maintains a cache of mappings between hardware addresses
+and protocol addresses.
+The cache has a limited size so old and less
+frequently used entries are garbage-collected.
+Entries which are marked
+as permanent are never deleted by the garbage-collector.
+The cache can
+be directly manipulated by the use of ioctls and its behavior can be
+tuned by the
+.I /proc
+interfaces described below.
+.PP
+When there is no positive feedback for an existing mapping after some
+time (see the
+.I /proc
+interfaces below), a neighbor cache entry is considered stale.
+Positive feedback can be gotten from a higher layer; for example from
+a successful TCP ACK.
+Other protocols can signal forward progress
+using the
+.B MSG_CONFIRM
+flag to
+.BR sendmsg (2).
+When there is no forward progress, ARP tries to reprobe.
+It first tries to ask a local arp daemon
+.B app_solicit
+times for an updated MAC address.
+If that fails and an old MAC address is known, a unicast probe is sent
+.B ucast_solicit
+times.
+If that fails too, it will broadcast a new ARP
+request to the network.
+Requests are sent only when there is data queued
+for sending.
+.PP
+Linux will automatically add a nonpermanent proxy arp entry when it
+receives a request for an address it forwards to and proxy arp is
+enabled on the receiving interface.
+When there is a reject route for the target, no proxy arp entry is added.
+.SS Ioctls
+Three ioctls are available on all
+.B AF_INET
+sockets.
+They take a pointer to a
+.I struct arpreq
+as their argument.
+.PP
+.in +4n
+.EX
+struct arpreq {
+ struct sockaddr arp_pa; /* protocol address */
+ struct sockaddr arp_ha; /* hardware address */
+ int arp_flags; /* flags */
+ struct sockaddr arp_netmask; /* netmask of protocol address */
+ char arp_dev[16];
+};
+.EE
+.in
+.PP
+.BR SIOCSARP ", " SIOCDARP " and " SIOCGARP
+respectively set, delete, and get an ARP mapping.
+Setting and deleting ARP maps are privileged operations and may
+be performed only by a process with the
+.B CAP_NET_ADMIN
+capability or an effective UID of 0.
+.PP
+.I arp_pa
+must be an
+.B AF_INET
+address and
+.I arp_ha
+must have the same type as the device which is specified in
+.IR arp_dev .
+.I arp_dev
+is a zero-terminated string which names a device.
+.RS
+.TS
+tab(:) allbox;
+c s
+l l.
+\fIarp_flags\fR
+flag:meaning
+ATF_COM:Lookup complete
+ATF_PERM:Permanent entry
+ATF_PUBL:Publish entry
+ATF_USETRAILERS:Trailers requested
+ATF_NETMASK:Use a netmask
+ATF_DONTPUB:Don't answer
+.TE
+.RE
+.PP
+If the
+.B ATF_NETMASK
+flag is set, then
+.I arp_netmask
+should be valid.
+Linux 2.2 does not support proxy network ARP entries, so this
+should be set to 0xffffffff, or 0 to remove an existing proxy arp entry.
+.B ATF_USETRAILERS
+is obsolete and should not be used.
+.SS /proc interfaces
+ARP supports a range of
+.I /proc
+interfaces to configure parameters on a global or per-interface basis.
+The interfaces can be accessed by reading or writing the
+.I /proc/sys/net/ipv4/neigh/*/*
+files.
+Each interface in the system has its own directory in
+.IR /proc/sys/net/ipv4/neigh/ .
+The setting in the "default" directory is used for all newly created
+devices.
+Unless otherwise specified, time-related interfaces are specified
+in seconds.
+.TP
+.IR anycast_delay " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The maximum number of jiffies to delay before replying to a
+IPv6 neighbor solicitation message.
+Anycast support is not yet implemented.
+Defaults to 1 second.
+.TP
+.IR app_solicit " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The maximum number of probes to send to the user space ARP daemon via
+netlink before dropping back to multicast probes (see
+.IR mcast_solicit ).
+Defaults to 0.
+.TP
+.IR base_reachable_time " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+Once a neighbor has been found, the entry is considered to be valid
+for at least a random value between
+.IR base_reachable_time "/2 and 3*" base_reachable_time /2.
+An entry's validity will be extended if it receives positive feedback
+from higher level protocols.
+Defaults to 30 seconds.
+This file is now obsolete in favor of
+.IR base_reachable_time_ms .
+.TP
+.IR base_reachable_time_ms " (since Linux 2.6.12)"
+As for
+.IR base_reachable_time ,
+but measures time in milliseconds.
+Defaults to 30000 milliseconds.
+.TP
+.IR delay_first_probe_time " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+Delay before first probe after it has been decided that a neighbor
+is stale.
+Defaults to 5 seconds.
+.TP
+.IR gc_interval " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+How frequently the garbage collector for neighbor entries
+should attempt to run.
+Defaults to 30 seconds.
+.TP
+.IR gc_stale_time " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+Determines how often to check for stale neighbor entries.
+When a neighbor entry is considered stale, it is resolved again before
+sending data to it.
+Defaults to 60 seconds.
+.TP
+.IR gc_thresh1 " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The minimum number of entries to keep in the ARP cache.
+The garbage collector will not run if there are fewer than
+this number of entries in the cache.
+Defaults to 128.
+.TP
+.IR gc_thresh2 " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The soft maximum number of entries to keep in the ARP cache.
+The garbage collector will allow the number of entries to exceed
+this for 5 seconds before collection will be performed.
+Defaults to 512.
+.TP
+.IR gc_thresh3 " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The hard maximum number of entries to keep in the ARP cache.
+The garbage collector will always run if there are more than
+this number of entries in the cache.
+Defaults to 1024.
+.TP
+.IR locktime " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The minimum number of jiffies to keep an ARP entry in the cache.
+This prevents ARP cache thrashing if there is more than one potential
+mapping (generally due to network misconfiguration).
+Defaults to 1 second.
+.TP
+.IR mcast_solicit " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The maximum number of attempts to resolve an address by
+multicast/broadcast before marking the entry as unreachable.
+Defaults to 3.
+.TP
+.IR proxy_delay " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+When an ARP request for a known proxy-ARP address is received, delay up to
+.I proxy_delay
+jiffies before replying.
+This is used to prevent network flooding in some cases.
+Defaults to 0.8 seconds.
+.TP
+.IR proxy_qlen " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The maximum number of packets which may be queued to proxy-ARP addresses.
+Defaults to 64.
+.TP
+.IR retrans_time " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The number of jiffies to delay before retransmitting a request.
+Defaults to 1 second.
+This file is now obsolete in favor of
+.IR retrans_time_ms .
+.TP
+.IR retrans_time_ms " (since Linux 2.6.12)"
+The number of milliseconds to delay before retransmitting a request.
+Defaults to 1000 milliseconds.
+.TP
+.IR ucast_solicit " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The maximum number of attempts to send unicast probes before asking
+the ARP daemon (see
+.IR app_solicit ).
+Defaults to 3.
+.TP
+.IR unres_qlen " (since Linux 2.2)"
+.\" Precisely: 2.1.79
+The maximum number of packets which may be queued for each unresolved
+address by other network layers.
+Defaults to 3.
+.SH VERSIONS
+The
+.I struct arpreq
+changed in Linux 2.0 to include the
+.I arp_dev
+member and the ioctl numbers changed at the same time.
+Support for the old ioctls was dropped in Linux 2.2.
+.PP
+Support for proxy arp entries for networks (netmask not equal 0xffffffff)
+was dropped in Linux 2.2.
+It is replaced by automatic proxy arp setup by
+the kernel for all reachable hosts on other interfaces (when
+forwarding and proxy arp is enabled for the interface).
+.PP
+The
+.I neigh/*
+interfaces did not exist before Linux 2.2.
+.SH BUGS
+Some timer settings are specified in jiffies, which is architecture-
+and kernel version-dependent; see
+.BR time (7).
+.PP
+There is no way to signal positive feedback from user space.
+This means connection-oriented protocols implemented in user space
+will generate excessive ARP traffic, because ndisc will regularly
+reprobe the MAC address.
+The same problem applies for some kernel protocols (e.g., NFS over UDP).
+.PP
+This man page mashes together functionality that is IPv4-specific
+with functionality that is shared between IPv4 and IPv6.
+.SH SEE ALSO
+.BR capabilities (7),
+.BR ip (7),
+.BR arpd (8)
+.PP
+RFC\ 826 for a description of ARP.
+RFC\ 2461 for a description of IPv6 neighbor discovery and the base
+algorithms used.
+Linux 2.2+ IPv4 ARP uses the IPv6 algorithms when applicable.
diff --git a/man7/ascii.7 b/man7/ascii.7
new file mode 100644
index 0000000..13f5578
--- /dev/null
+++ b/man7/ascii.7
@@ -0,0 +1,169 @@
+'\" t
+.\" Copyright (c) 1993 Michael Haardt (michael@moria.de)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" Created 1993-04-02 by Michael Haardt (michael@moria.de)
+.\" Modified 1993-07-24 by Rik Faith (faith@cs.unc.edu)
+.\" Modified 1994-05-15 by Daniel Quinlan (quinlan@yggdrasil.com)
+.\" Modified 1994-11-22 by Daniel Quinlan (quinlan@yggdrasil.com)
+.\" Modified 1995-07-11 by Daniel Quinlan (quinlan@yggdrasil.com)
+.\" Modified 1996-12-18 by Michael Haardt and aeb
+.\" Modified 1999-05-31 by Dimitri Papadopoulos (dpo@club-internet.fr)
+.\" Modified 1999-08-08 by Michael Haardt (michael@moria.de)
+.\" Modified 2004-04-01 by aeb
+.\"
+.TH ascii 7 2023-05-02 "Linux man-pages 6.05.01"
+.SH NAME
+ascii \- ASCII character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+ASCII is the American Standard Code for Information Interchange.
+It is a 7-bit code.
+Many 8-bit codes (e.g., ISO 8859-1) contain ASCII as their lower half.
+The international counterpart of ASCII is known as ISO 646-IRV.
+.PP
+The following table contains the 128 ASCII characters.
+.PP
+C program \f(CW\[aq]\eX\[aq]\fP escapes are noted.
+.PP
+.EX
+.TS
+l l l l | l l l l.
+Oct Dec Hex Char Oct Dec Hex Char
+_
+000 0 00 NUL \[aq]\e0\[aq] (null character) 100 64 40 @
+001 1 01 SOH (start of heading) 101 65 41 A
+002 2 02 STX (start of text) 102 66 42 B
+003 3 03 ETX (end of text) 103 67 43 C
+004 4 04 EOT (end of transmission) 104 68 44 D
+005 5 05 ENQ (enquiry) 105 69 45 E
+006 6 06 ACK (acknowledge) 106 70 46 F
+007 7 07 BEL \[aq]\ea\[aq] (bell) 107 71 47 G
+010 8 08 BS \[aq]\eb\[aq] (backspace) 110 72 48 H
+011 9 09 HT \[aq]\et\[aq] (horizontal tab) 111 73 49 I
+012 10 0A LF \[aq]\en\[aq] (new line) 112 74 4A J
+013 11 0B VT \[aq]\ev\[aq] (vertical tab) 113 75 4B K
+014 12 0C FF \[aq]\ef\[aq] (form feed) 114 76 4C L
+015 13 0D CR \[aq]\er\[aq] (carriage ret) 115 77 4D M
+016 14 0E SO (shift out) 116 78 4E N
+017 15 0F SI (shift in) 117 79 4F O
+020 16 10 DLE (data link escape) 120 80 50 P
+021 17 11 DC1 (device control 1) 121 81 51 Q
+022 18 12 DC2 (device control 2) 122 82 52 R
+023 19 13 DC3 (device control 3) 123 83 53 S
+024 20 14 DC4 (device control 4) 124 84 54 T
+025 21 15 NAK (negative ack.) 125 85 55 U
+026 22 16 SYN (synchronous idle) 126 86 56 V
+027 23 17 ETB (end of trans. blk) 127 87 57 W
+030 24 18 CAN (cancel) 130 88 58 X
+031 25 19 EM (end of medium) 131 89 59 Y
+032 26 1A SUB (substitute) 132 90 5A Z
+033 27 1B ESC (escape) 133 91 5B [
+034 28 1C FS (file separator) 134 92 5C \e \[aq]\e\e\[aq]
+035 29 1D GS (group separator) 135 93 5D ]
+036 30 1E RS (record separator) 136 94 5E \[ha]
+037 31 1F US (unit separator) 137 95 5F \&_
+040 32 20 SPACE 140 96 60 \`
+041 33 21 ! 141 97 61 a
+042 34 22 " 142 98 62 b
+043 35 23 # 143 99 63 c
+044 36 24 $ 144 100 64 d
+045 37 25 % 145 101 65 e
+046 38 26 & 146 102 66 f
+047 39 27 \[aq] 147 103 67 g
+050 40 28 ( 150 104 68 h
+051 41 29 ) 151 105 69 i
+052 42 2A * 152 106 6A j
+053 43 2B + 153 107 6B k
+054 44 2C , 154 108 6C l
+055 45 2D \- 155 109 6D m
+056 46 2E . 156 110 6E n
+057 47 2F / 157 111 6F o
+060 48 30 0 160 112 70 p
+061 49 31 1 161 113 71 q
+062 50 32 2 162 114 72 r
+063 51 33 3 163 115 73 s
+064 52 34 4 164 116 74 t
+065 53 35 5 165 117 75 u
+066 54 36 6 166 118 76 v
+067 55 37 7 167 119 77 w
+070 56 38 8 170 120 78 x
+071 57 39 9 171 121 79 y
+072 58 3A : 172 122 7A z
+073 59 3B ; 173 123 7B {
+074 60 3C < 174 124 7C |
+075 61 3D = 175 125 7D }
+076 62 3E > 176 126 7E \[ti]
+077 63 3F ? 177 127 7F DEL
+.TE
+.EE
+.SS Tables
+For convenience, below are more compact tables in hex and decimal.
+.PP
+.EX
+ 2 3 4 5 6 7 30 40 50 60 70 80 90 100 110 120
+ ------------- ---------------------------------
+0: 0 @ P \` p 0: ( 2 < F P Z d n x
+1: ! 1 A Q a q 1: ) 3 = G Q [ e o y
+2: " 2 B R b r 2: * 4 > H R \e f p z
+3: # 3 C S c s 3: ! + 5 ? I S ] g q {
+4: $ 4 D T d t 4: " , 6 @ J T \[ha] h r |
+5: % 5 E U e u 5: # \- 7 A K U _ i s }
+6: & 6 F V f v 6: $ . 8 B L V \` j t \[ti]
+7: \[aq] 7 G W g w 7: % / 9 C M W a k u DEL
+8: ( 8 H X h x 8: & 0 : D N X b l v
+9: ) 9 I Y i y 9: \[aq] 1 ; E O Y c m w
+A: * : J Z j z
+B: + ; K [ k {
+C: , < L \e l |
+D: \- = M ] m }
+E: . > N \[ha] n \[ti]
+F: / ? O _ o DEL
+.EE
+.SH NOTES
+.SS History
+/etc/ascii (VII) appears in the UNIX Programmer's Manual.
+.PP
+On older terminals, the underscore code is displayed as a left arrow,
+called backarrow, the caret is displayed as an up-arrow and the vertical
+bar has a hole in the middle.
+.PP
+Uppercase and lowercase characters differ by just one bit and the
+ASCII character 2 differs from the double quote by just one bit, too.
+That made it much easier to encode characters mechanically or with a
+non-microcontroller-based electronic keyboard and that pairing was found
+on old teletypes.
+.PP
+The ASCII standard was published by the United States of America
+Standards Institute (USASI) in 1968.
+.\"
+.\" ASA was the American Standards Association and X3 was an ASA sectional
+.\" committee on computers and data processing. Its name changed to
+.\" American National Standards Committee X3 (ANSC-X3) and now it is known
+.\" as Accredited Standards Committee X3 (ASC X3). It is accredited by ANSI
+.\" and administered by ITI. The subcommittee X3.2 worked on coded
+.\" character sets; the task group working on ASCII appears to have been
+.\" designated X3.2.4. In 1966, ASA became the United States of America
+.\" Standards Institute (USASI) and published ASCII in 1968. It became the
+.\" American National Standards Institute (ANSI) in 1969 and is the
+.\" U.S. member body of ISO; private and nonprofit.
+.\"
+.SH SEE ALSO
+.BR charsets (7),
+.BR iso_8859\-1 (7),
+.BR iso_8859\-2 (7),
+.BR iso_8859\-3 (7),
+.BR iso_8859\-4 (7),
+.BR iso_8859\-5 (7),
+.BR iso_8859\-6 (7),
+.BR iso_8859\-7 (7),
+.BR iso_8859\-8 (7),
+.BR iso_8859\-9 (7),
+.BR iso_8859\-10 (7),
+.BR iso_8859\-11 (7),
+.BR iso_8859\-13 (7),
+.BR iso_8859\-14 (7),
+.BR iso_8859\-15 (7),
+.BR iso_8859\-16 (7),
+.BR utf\-8 (7)
diff --git a/man7/attributes.7 b/man7/attributes.7
new file mode 100644
index 0000000..b32fe54
--- /dev/null
+++ b/man7/attributes.7
@@ -0,0 +1,865 @@
+.\" Copyright (c) 2014, Red Hat, Inc
+.\" Written by Alexandre Oliva <aoliva@redhat.com>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.TH attributes 7 2023-03-18 "Linux man-pages 6.05.01"
+.SH NAME
+attributes \- POSIX safety concepts
+.SH DESCRIPTION
+.\"
+.\"
+.IR Note :
+the text of this man page is based on the material taken from
+the "POSIX Safety Concepts" section of the GNU C Library manual.
+Further details on the topics described here can be found in that
+manual.
+.PP
+Various function manual pages include a section ATTRIBUTES
+that describes the safety of calling the function in various contexts.
+This section annotates functions with the following safety markings:
+.TP
+.I MT-Safe
+.I MT-Safe
+or
+Thread-Safe functions are safe to call in the presence
+of other threads.
+MT, in MT-Safe, stands for Multi Thread.
+.IP
+Being MT-Safe does not imply a function is atomic, nor that it uses any
+of the memory synchronization mechanisms POSIX exposes to users.
+It is even possible that calling MT-Safe functions in sequence
+does not yield an MT-Safe combination.
+For example, having a thread call two MT-Safe
+functions one right after the other does not guarantee behavior
+equivalent to atomic execution of a combination of both functions,
+since concurrent calls in other threads may interfere in a destructive way.
+.IP
+Whole-program optimizations that could inline functions across library
+interfaces may expose unsafe reordering, and so performing inlining
+across the GNU C Library interface is not recommended.
+The documented
+MT-Safety status is not guaranteed under whole-program optimization.
+However, functions defined in user-visible headers are designed to be
+safe for inlining.
+.\" .TP
+.\" .I AS-Safe
+.\" .I AS-Safe
+.\" or Async-Signal-Safe functions are safe to call from
+.\" asynchronous signal handlers.
+.\" AS, in AS-Safe, stands for Asynchronous Signal.
+.\"
+.\" Many functions that are AS-Safe may set
+.\" .IR errno ,
+.\" or modify the floating-point environment,
+.\" because their doing so does not make them
+.\" unsuitable for use in signal handlers.
+.\" However, programs could misbehave should asynchronous signal handlers
+.\" modify this thread-local state,
+.\" and the signal handling machinery cannot be counted on to
+.\" preserve it.
+.\" Therefore, signal handlers that call functions that may set
+.\" .I errno
+.\" or modify the floating-point environment
+.\" .I must
+.\" save their original values, and restore them before returning.
+.\" .TP
+.\" .I AC-Safe
+.\" .I AC-Safe
+.\" or Async-Cancel-Safe functions are safe to call when
+.\" asynchronous cancelation is enabled.
+.\" AC in AC-Safe stands for Asynchronous Cancelation.
+.\"
+.\" The POSIX standard defines only three functions to be AC-Safe, namely
+.\" .BR pthread_cancel (3),
+.\" .BR pthread_setcancelstate (3),
+.\" and
+.\" .BR pthread_setcanceltype (3).
+.\" At present the GNU C Library provides no
+.\" guarantees beyond these three functions,
+.\" but does document which functions are presently AC-Safe.
+.\" This documentation is provided for use
+.\" by the GNU C Library developers.
+.\"
+.\" Just like signal handlers, cancelation cleanup routines must configure
+.\" the floating point environment they require.
+.\" The routines cannot assume a floating point environment,
+.\" particularly when asynchronous cancelation is enabled.
+.\" If the configuration of the floating point
+.\" environment cannot be performed atomically then it is also possible that
+.\" the environment encountered is internally inconsistent.
+.TP
+.I MT-Unsafe \" ", " AS-Unsafe ", " AC-Unsafe
+.I MT-Unsafe \" ", " AS-Unsafe ", " AC-Unsafe
+functions are not safe to call in a multithreaded programs.
+.\" functions are not
+.\" safe to call within the safety contexts described above.
+.\" Calling them
+.\" within such contexts invokes undefined behavior.
+.\"
+.\" Functions not explicitly documented as safe in a safety context should
+.\" be regarded as Unsafe.
+.\" .TP
+.\" .I Preliminary
+.\" .I Preliminary
+.\" safety properties are documented, indicating these
+.\" properties may
+.\" .I not
+.\" be counted on in future releases of
+.\" the GNU C Library.
+.\"
+.\" Such preliminary properties are the result of an assessment of the
+.\" properties of our current implementation,
+.\" rather than of what is mandated and permitted
+.\" by current and future standards.
+.\"
+.\" Although we strive to abide by the standards, in some cases our
+.\" implementation is safe even when the standard does not demand safety,
+.\" and in other cases our implementation does not meet the standard safety
+.\" requirements.
+.\" The latter are most likely bugs; the former, when marked
+.\" as
+.\" .IR Preliminary ,
+.\" should not be counted on: future standards may
+.\" require changes that are not compatible with the additional safety
+.\" properties afforded by the current implementation.
+.\"
+.\" Furthermore,
+.\" the POSIX standard does not offer a detailed definition of safety.
+.\" We assume that, by "safe to call", POSIX means that,
+.\" as long as the program does not invoke undefined behavior,
+.\" the "safe to call" function behaves as specified,
+.\" and does not cause other functions to deviate from their specified behavior.
+.\" We have chosen to use its loose
+.\" definitions of safety, not because they are the best definitions to use,
+.\" but because choosing them harmonizes this manual with POSIX.
+.\"
+.\" Please keep in mind that these are preliminary definitions and annotations,
+.\" and certain aspects of the definitions are still under
+.\" discussion and might be subject to clarification or change.
+.\"
+.\" Over time,
+.\" we envision evolving the preliminary safety notes into stable commitments,
+.\" as stable as those of our interfaces.
+.\" As we do, we will remove the
+.\" .I Preliminary
+.\" keyword from safety notes.
+.\" As long as the keyword remains, however,
+.\" they are not to be regarded as a promise of future behavior.
+.PP
+Other keywords that appear in safety notes are defined in subsequent sections.
+.\"
+.\"
+.\" .SS Unsafe features
+.\" Functions that are unsafe to call in certain contexts are annotated with
+.\" keywords that document their features that make them unsafe to call.
+.\" AS-Unsafe features in this section indicate the functions are never safe
+.\" to call when asynchronous signals are enabled.
+.\" AC-Unsafe features
+.\" indicate they are never safe to call when asynchronous cancelation is
+.\" .\" enabled.
+.\" There are no MT-Unsafe marks in this section.
+.\" .TP
+.\" .\" .I code
+.\" Functions marked with
+.\" .I lock
+.\" as an AS-Unsafe feature may be
+.\" .\" interrupted by a signal while holding a non-recursive lock.
+.\" If the signal handler calls another such function that takes the same lock,
+.\" the result is a deadlock.
+.\"
+.\" Functions annotated with
+.\" .I lock
+.\" as an AC-Unsafe feature may, if canceled asynchronously,
+.\" fail to release a lock that would have been released if their execution
+.\" had not been interrupted by asynchronous thread cancelation.
+.\" Once a lock is left taken,
+.\" attempts to take that lock will block indefinitely.
+.\" .TP
+.\" .I corrupt
+.\" Functions marked with
+.\" .\" .I corrupt
+.\" as an AS-Unsafe feature may corrupt
+.\" data structures and misbehave when they interrupt,
+.\" or are interrupted by, another such function.
+.\" Unlike functions marked with
+.\" .IR lock ,
+.\" these take recursive locks to avoid MT-Safety problems,
+.\" but this is not enough to stop a signal handler from observing
+.\" a partially-updated data structure.
+.\" Further corruption may arise from the interrupted function's
+.\" failure to notice updates made by signal handlers.
+.\"
+.\" Functions marked with
+.\" .I corrupt
+.\" as an AC-Unsafe feature may leave
+.\" data structures in a corrupt, partially updated state.
+.\" Subsequent uses of the data structure may misbehave.
+.\"
+.\" .\" A special case, probably not worth documenting separately, involves
+.\" .\" reallocing, or even freeing pointers. Any case involving free could
+.\" .\" be easily turned into an ac-safe leak by resetting the pointer before
+.\" .\" releasing it; I don't think we have any case that calls for this sort
+.\" .\" of fixing. Fixing the realloc cases would require a new interface:
+.\" .\" instead of @code{ptr=realloc(ptr,size)} we'd have to introduce
+.\" .\" @code{acsafe_realloc(&ptr,size)} that would modify ptr before
+.\" .\" releasing the old memory. The ac-unsafe realloc could be implemented
+.\" .\" in terms of an internal interface with this semantics (say
+.\" .\" __acsafe_realloc), but since realloc can be overridden, the function
+.\" .\" we call to implement realloc should not be this internal interface,
+.\" .\" but another internal interface that calls __acsafe_realloc if realloc
+.\" .\" was not overridden, and calls the overridden realloc with async
+.\" .\" cancel disabled. --lxoliva
+.\" .TP
+.\" .I heap
+.\" Functions marked with
+.\" .I heap
+.\" may call heap memory management functions from the
+.\" .BR malloc (3)/ free (3)
+.\" family of functions and are only as safe as those functions.
+.\" This note is thus equivalent to:
+.\"
+.\" | AS-Unsafe lock | AC-Unsafe lock fd mem |
+.\" .\" @sampsafety{@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}}
+.\" .\"
+.\" .\" Check for cases that should have used plugin instead of or in
+.\" .\" addition to this. Then, after rechecking gettext, adjust i18n if
+.\" .\" needed.
+.\" .TP
+.\" .I dlopen
+.\" Functions marked with
+.\" .I dlopen
+.\" use the dynamic loader to load
+.\" shared libraries into the current execution image.
+.\" This involves opening files, mapping them into memory,
+.\" allocating additional memory, resolving symbols,
+.\" applying relocations and more,
+.\" all of this while holding internal dynamic loader locks.
+.\"
+.\" The locks are enough for these functions to be AS-Unsafe and AC-Unsafe,
+.\" but other issues may arise.
+.\" At present this is a placeholder for all
+.\" potential safety issues raised by
+.\" .BR dlopen (3).
+.\"
+.\" .\" dlopen runs init and fini sections of the module; does this mean
+.\" .\" dlopen always implies plugin?
+.\" .TP
+.\" .I plugin
+.\" Functions annotated with
+.\" .I plugin
+.\" may run code from plugins that
+.\" may be external to the GNU C Library.
+.\" Such plugin functions are assumed to be
+.\" MT-Safe, AS-Unsafe and AC-Unsafe.
+.\" Examples of such plugins are stack unwinding libraries,
+.\" name service switch (NSS) and character set conversion (iconv) back-ends.
+.\"
+.\" Although the plugins mentioned as examples are all brought in by means
+.\" of dlopen, the
+.\" .I plugin
+.\" keyword does not imply any direct
+.\" involvement of the dynamic loader or the
+.\" .I libdl
+.\" interfaces,
+.\" those are covered by
+.\" .IR dlopen .
+.\" For example, if one function loads a module and finds the addresses
+.\" of some of its functions,
+.\" while another just calls those already-resolved functions,
+.\" the former will be marked with
+.\" .IR dlopen ,
+.\" whereas the latter will get the
+.\" .IR plugin .
+.\" When a single function takes all of these actions, then it gets both marks.
+.\" .TP
+.\" .I i18n
+.\" Functions marked with
+.\" .I i18n
+.\" may call internationalization
+.\" functions of the
+.\" .BR gettext (3)
+.\" family and will be only as safe as those
+.\" functions.
+.\" This note is thus equivalent to:
+.\"
+.\" | MT-Safe env | AS-Unsafe corrupt heap dlopen | AC-Unsafe corrupt |
+.\"
+.\" .\" @sampsafety{@mtsafe{@mtsenv{}}@asunsafe{@asucorrupt{} @ascuheap{} @ascudlopen{}}@acunsafe{@acucorrupt{}}}
+.\" .TP
+.\" .I timer
+.\" Functions marked with
+.\" .I timer
+.\" use the
+.\" .BR alarm (3)
+.\" function or
+.\" similar to set a time-out for a system call or a long-running operation.
+.\" In a multi-threaded program, there is a risk that the time-out signal
+.\" will be delivered to a different thread,
+.\" thus failing to interrupt the intended thread.
+.\" Besides being MT-Unsafe, such functions are always
+.\" AS-Unsafe, because calling them in signal handlers may interfere with
+.\" timers set in the interrupted code, and AC-Unsafe,
+.\" because there is no safe way to guarantee an earlier timer
+.\" will be reset in case of asynchronous cancelation.
+.\"
+.\"
+.SS Conditionally safe features
+For some features that make functions unsafe to call in certain contexts,
+there are known ways to avoid the safety problem other than
+refraining from calling the function altogether.
+The keywords that follow refer to such features,
+and each of their definitions indicates
+how the whole program needs to be constrained in order to remove the
+safety problem indicated by the keyword.
+Only when all the reasons that
+make a function unsafe are observed and addressed,
+by applying the documented constraints,
+does the function become safe to call in a context.
+.TP
+.I init
+Functions marked with
+.I init
+as an MT-Unsafe feature perform
+MT-Unsafe initialization when they are first called.
+.IP
+Calling such a function at least once in single-threaded mode removes
+this specific cause for the function to be regarded as MT-Unsafe.
+If no other cause for that remains,
+the function can then be safely called after other threads are started.
+.\"
+.\" Functions marked with
+.\" .I init
+.\" as an AS-Unsafe or AC-Unsafe feature use the GNU C Library internal
+.\" .I libc_once
+.\" machinery or similar to initialize internal data structures.
+.\"
+.\" If a signal handler interrupts such an initializer,
+.\" and calls any function that also performs
+.\" .I libc_once
+.\" initialization, it will deadlock if the thread library has been loaded.
+.\"
+.\" Furthermore, if an initializer is partially complete before it is canceled
+.\" or interrupted by a signal whose handler requires the same initialization,
+.\" some or all of the initialization may be performed more than once,
+.\" leaking resources or even resulting in corrupt internal data.
+.\"
+.\" Applications that need to call functions marked with
+.\" .I init
+.\" as an AS-Safety or AC-Unsafe feature should ensure
+.\" the initialization is performed
+.\" before configuring signal handlers or enabling cancelation,
+.\" so that the AS-Safety and AC-Safety issues related with
+.\" .I libc_once
+.\" do not arise.
+.\"
+.\" .\" We may have to extend the annotations to cover conditions in which
+.\" .\" initialization may or may not occur, since an initial call in a safe
+.\" .\" context is no use if the initialization doesn't take place at that
+.\" .\" time: it doesn't remove the risk for later calls.
+.TP
+.I race
+Functions annotated with
+.I race
+as an MT-Safety issue operate on
+objects in ways that may cause data races or similar forms of
+destructive interference out of concurrent execution.
+In some cases,
+the objects are passed to the functions by users;
+in others, they are used by the functions to return values to users;
+in others, they are not even exposed to users.
+.\"
+.\" We consider access to objects passed as (indirect) arguments to
+.\" functions to be data race free.
+.\" The assurance of data race free objects
+.\" is the caller's responsibility.
+.\" We will not mark a function as MT-Unsafe or AS-Unsafe
+.\" if it misbehaves when users fail to take the measures required by
+.\" POSIX to avoid data races when dealing with such objects.
+.\" As a general rule, if a function is documented as reading from
+.\" an object passed (by reference) to it, or modifying it,
+.\" users ought to use memory synchronization primitives
+.\" to avoid data races just as they would should they perform
+.\" the accesses themselves rather than by calling the library function.
+.\" Standard I/O
+.\" .RI ( "FILE *" )
+.\" streams are the exception to the general rule,
+.\" in that POSIX mandates the library to guard against data races
+.\" in many functions that manipulate objects of this specific opaque type.
+.\" We regard this as a convenience provided to users,
+.\" rather than as a general requirement whose expectations
+.\" should extend to other types.
+.\"
+.\" In order to remind users that guarding certain arguments is their
+.\" responsibility, we will annotate functions that take objects of certain
+.\" types as arguments.
+.\" We draw the line for objects passed by users as follows:
+.\" objects whose types are exposed to users,
+.\" and that users are expected to access directly,
+.\" such as memory buffers, strings,
+.\" and various user-visible structured types, do
+.\" .I not
+.\" give reason for functions to be annotated with
+.\" .IR race .
+.\" It would be noisy and redundant with the general requirement,
+.\" and not many would be surprised by the library's lack of internal
+.\" guards when accessing objects that can be accessed directly by users.
+.\"
+.\" As for objects that are opaque or opaque-like,
+.\" in that they are to be manipulated only by passing them
+.\" to library functions (e.g.,
+.\" .IR FILE ,
+.\" .IR DIR ,
+.\" .IR obstack ,
+.\" .IR iconv_t ),
+.\" there might be additional expectations as to internal coordination
+.\" of access by the library.
+.\" We will annotate, with
+.\" .I race
+.\" followed by a colon and the argument name,
+.\" functions that take such objects but that do not take
+.\" care of synchronizing access to them by default.
+.\" For example,
+.\" .I FILE
+.\" stream
+.\" .I unlocked
+.\" functions
+.\" .RB ( unlocked_stdio (3))
+.\" will be annotated,
+.\" but those that perform implicit locking on
+.\" .I FILE
+.\" streams by default will not,
+.\" even though the implicit locking may be disabled on a per-stream basis.
+.\"
+.\" In either case, we will not regard as MT-Unsafe functions that may
+.\" access user-supplied objects in unsafe ways should users fail to ensure
+.\" the accesses are well defined.
+.\" The notion prevails that users are expected to safeguard against
+.\" data races any user-supplied objects that the library accesses
+.\" on their behalf.
+.\"
+.\" .\" The above describes @mtsrace; @mtasurace is described below.
+.\"
+.\" This user responsibility does not apply, however,
+.\" to objects controlled by the library itself,
+.\" such as internal objects and static buffers used
+.\" to return values from certain calls.
+.\" When the library doesn't guard them against concurrent uses,
+.\" these cases are regarded as MT-Unsafe and AS-Unsafe (although the
+.\" .I race
+.\" mark under AS-Unsafe will be omitted
+.\" as redundant with the one under MT-Unsafe).
+.\" As in the case of user-exposed objects,
+.\" the mark may be followed by a colon and an identifier.
+.\" The identifier groups all functions that operate on a
+.\" certain unguarded object; users may avoid the MT-Safety issues related
+.\" with unguarded concurrent access to such internal objects by creating a
+.\" non-recursive mutex related with the identifier,
+.\" and always holding the mutex when calling any function marked
+.\" as racy on that identifier,
+.\" as they would have to should the identifier be
+.\" an object under user control.
+.\" The non-recursive mutex avoids the MT-Safety issue,
+.\" but it trades one AS-Safety issue for another,
+.\" so use in asynchronous signals remains undefined.
+.\"
+.\" When the identifier relates to a static buffer used to hold return values,
+.\" the mutex must be held for as long as the buffer remains in use
+.\" by the caller.
+.\" Many functions that return pointers to static buffers offer reentrant
+.\" variants that store return values in caller-supplied buffers instead.
+.\" In some cases, such as
+.\" .BR tmpname (3),
+.\" the variant is chosen not by calling an alternate entry point,
+.\" but by passing a non-NULL pointer to the buffer in which the
+.\" returned values are to be stored.
+.\" These variants are generally preferable in multi-threaded programs,
+.\" although some of them are not MT-Safe because of other internal buffers,
+.\" also documented with
+.\" .I race
+.\" notes.
+.TP
+.I const
+Functions marked with
+.I const
+as an MT-Safety issue non-atomically
+modify internal objects that are better regarded as constant,
+because a substantial portion of the GNU C Library accesses them without
+synchronization.
+Unlike
+.IR race ,
+which causes both readers and
+writers of internal objects to be regarded as MT-Unsafe,\" and AS-Unsafe,
+this mark is applied to writers only.
+Writers remain\" equally
+MT-Unsafe\" and AS-Unsafe
+to call,
+but the then-mandatory constness of objects they
+modify enables readers to be regarded as MT-Safe\" and AS-Safe
+(as long as no other reasons for them to be unsafe remain),
+since the lack of synchronization is not a problem when the
+objects are effectively constant.
+.IP
+The identifier that follows the
+.I const
+mark will appear by itself as a safety note in readers.
+Programs that wish to work around this safety issue,
+so as to call writers, may use a non-recursive
+read-write lock
+associated with the identifier, and guard
+.I all
+calls to functions marked with
+.I const
+followed by the identifier with a write lock, and
+.I all
+calls to functions marked with the identifier
+by itself with a read lock.
+.\" The non-recursive locking removes the MT-Safety problem,
+.\" but it trades one AS-Safety problem for another,
+.\" so use in asynchronous signals remains undefined.
+.\"
+.\" .\" But what if, instead of marking modifiers with const:id and readers
+.\" .\" with just id, we marked writers with race:id and readers with ro:id?
+.\" .\" Instead of having to define each instance of 'id', we'd have a
+.\" .\" general pattern governing all such 'id's, wherein race:id would
+.\" .\" suggest the need for an exclusive/write lock to make the function
+.\" .\" safe, whereas ro:id would indicate 'id' is expected to be read-only,
+.\" .\" but if any modifiers are called (while holding an exclusive lock),
+.\" .\" then ro:id-marked functions ought to be guarded with a read lock for
+.\" .\" safe operation. ro:env or ro:locale, for example, seems to convey
+.\" .\" more clearly the expectations and the meaning, than just env or
+.\" .\" locale.
+.TP
+.I sig
+Functions marked with
+.I sig
+as a MT-Safety issue
+.\" (that implies an identical AS-Safety issue, omitted for brevity)
+may temporarily install a signal handler for internal purposes,
+which may interfere with other uses of the signal,
+identified after a colon.
+.IP
+This safety problem can be worked around by ensuring that no other uses
+of the signal will take place for the duration of the call.
+Holding a non-recursive mutex while calling all functions that use the same
+temporary signal;
+blocking that signal before the call and resetting its
+handler afterwards is recommended.
+.\"
+.\" There is no safe way to guarantee the original signal handler is
+.\" restored in case of asynchronous cancelation,
+.\" therefore so-marked functions are also AC-Unsafe.
+.\"
+.\" .\" fixme: at least deferred cancelation should get it right, and would
+.\" .\" obviate the restoring bit below, and the qualifier above.
+.\"
+.\" Besides the measures recommended to work around the
+.\" MT-Safety and AS-Safety problem,
+.\" in order to avert the cancelation problem,
+.\" disabling asynchronous cancelation
+.\" .I and
+.\" installing a cleanup handler to restore the signal to the desired state
+.\" and to release the mutex are recommended.
+.TP
+.I term
+Functions marked with
+.I term
+as an MT-Safety issue may change the
+terminal settings in the recommended way, namely: call
+.BR tcgetattr (3),
+modify some flags, and then call
+.BR tcsetattr (3),
+this creates a window in which changes made by other threads are lost.
+Thus, functions marked with
+.I term
+are MT-Unsafe.
+.\" The same window enables changes made by asynchronous signals to be lost.
+.\" These functions are also AS-Unsafe,
+.\" but the corresponding mark is omitted as redundant.
+.IP
+It is thus advisable for applications using the terminal to avoid
+concurrent and reentrant interactions with it,
+by not using it in signal handlers or blocking signals that might use it,
+and holding a lock while calling these functions and interacting
+with the terminal.
+This lock should also be used for mutual exclusion with
+functions marked with
+.IR race:tcattr(fd) ,
+where
+.I fd
+is a file descriptor for the controlling terminal.
+The caller may use a single mutex for simplicity,
+or use one mutex per terminal,
+even if referenced by different file descriptors.
+.\"
+.\" Functions marked with
+.\" .I term
+.\" as an AC-Safety issue are supposed to
+.\" restore terminal settings to their original state,
+.\" after temporarily changing them, but they may fail to do so if canceled.
+.\"
+.\" .\" fixme: at least deferred cancelation should get it right, and would
+.\" .\" obviate the restoring bit below, and the qualifier above.
+.\"
+.\" Besides the measures recommended to work around the
+.\" MT-Safety and AS-Safety problem,
+.\" in order to avert the cancelation problem,
+.\" disabling asynchronous cancelation
+.\" .I and
+.\" installing a cleanup handler to
+.\" restore the terminal settings to the original state and to release the
+.\" mutex are recommended.
+.\"
+.\"
+.SS Other safety remarks
+Additional keywords may be attached to functions,
+indicating features that do not make a function unsafe to call,
+but that may need to be taken into account in certain classes of programs:
+.TP
+.I locale
+Functions annotated with
+.I locale
+as an MT-Safety issue read from
+the locale object without any form of synchronization.
+Functions
+annotated with
+.I locale
+called concurrently with locale changes may
+behave in ways that do not correspond to any of the locales active
+during their execution, but an unpredictable mix thereof.
+.IP
+We do not mark these functions as MT-Unsafe,\" or AS-Unsafe,
+however,
+because functions that modify the locale object are marked with
+.I const:locale
+and regarded as unsafe.
+Being unsafe, the latter are not to be called when multiple threads
+are running or asynchronous signals are enabled,
+and so the locale can be considered effectively constant
+in these contexts,
+which makes the former safe.
+.\" Should the locking strategy suggested under @code{const} be used,
+.\" failure to guard locale uses is not as fatal as data races in
+.\" general: unguarded uses will @emph{not} follow dangling pointers or
+.\" access uninitialized, unmapped or recycled memory. Each access will
+.\" read from a consistent locale object that is or was active at some
+.\" point during its execution. Without synchronization, however, it
+.\" cannot even be assumed that, after a change in locale, earlier
+.\" locales will no longer be used, even after the newly-chosen one is
+.\" used in the thread. Nevertheless, even though unguarded reads from
+.\" the locale will not violate type safety, functions that access the
+.\" locale multiple times may invoke all sorts of undefined behavior
+.\" because of the unexpected locale changes.
+.TP
+.I env
+Functions marked with
+.I env
+as an MT-Safety issue access the
+environment with
+.BR getenv (3)
+or similar, without any guards to ensure
+safety in the presence of concurrent modifications.
+.IP
+We do not mark these functions as MT-Unsafe,\" or AS-Unsafe,
+however,
+because functions that modify the environment are all marked with
+.I const:env
+and regarded as unsafe.
+Being unsafe, the latter are not to be called when multiple threads
+are running or asynchronous signals are enabled,
+and so the environment can be considered
+effectively constant in these contexts,
+which makes the former safe.
+.TP
+.I hostid
+The function marked with
+.I hostid
+as an MT-Safety issue reads from the system-wide data structures that
+hold the "host ID" of the machine.
+These data structures cannot generally be modified atomically.
+Since it is expected that the "host ID" will not normally change,
+the function that reads from it
+.RB ( gethostid (3))
+is regarded as safe,
+whereas the function that modifies it
+.RB ( sethostid (3))
+is marked with
+.IR const:hostid ,
+indicating it may require special care if it is to be called.
+In this specific case,
+the special care amounts to system-wide
+(not merely intra-process) coordination.
+.TP
+.I sigintr
+Functions marked with
+.I sigintr
+as an MT-Safety issue access the
+GNU C Library
+.I _sigintr
+internal data structure without any guards to ensure
+safety in the presence of concurrent modifications.
+.IP
+We do not mark these functions as MT-Unsafe,\" or AS-Unsafe,
+however,
+because functions that modify this data structure are all marked with
+.I const:sigintr
+and regarded as unsafe.
+Being unsafe,
+the latter are not to be called when multiple threads are
+running or asynchronous signals are enabled,
+and so the data structure can be considered
+effectively constant in these contexts,
+which makes the former safe.
+.\" .TP
+.\" .I fd
+.\" Functions annotated with
+.\" .I fd
+.\" as an AC-Safety issue may leak file
+.\" descriptors if asynchronous thread cancelation interrupts their
+.\" execution.
+.\"
+.\" Functions that allocate or deallocate file descriptors will generally be
+.\" marked as such.
+.\" Even if they attempted to protect the file descriptor
+.\" allocation and deallocation with cleanup regions,
+.\" allocating a new descriptor and storing its number where the cleanup region
+.\" could release it cannot be performed as a single atomic operation.
+.\" Similarly,
+.\" releasing the descriptor and taking it out of the data structure
+.\" normally responsible for releasing it cannot be performed atomically.
+.\" There will always be a window in which the descriptor cannot be released
+.\" because it was not stored in the cleanup handler argument yet,
+.\" or it was already taken out before releasing it.
+.\" .\" It cannot be taken out after release:
+.\" an open descriptor could mean either that the descriptor still
+.\" has to be closed,
+.\" or that it already did so but the descriptor was
+.\" reallocated by another thread or signal handler.
+.\"
+.\" Such leaks could be internally avoided, with some performance penalty,
+.\" by temporarily disabling asynchronous thread cancelation.
+.\" However,
+.\" since callers of allocation or deallocation functions would have to do
+.\" this themselves, to avoid the same sort of leak in their own layer,
+.\" it makes more sense for the library to assume they are taking care of it
+.\" than to impose a performance penalty that is redundant when the problem
+.\" is solved in upper layers, and insufficient when it is not.
+.\"
+.\" This remark by itself does not cause a function to be regarded as
+.\" AC-Unsafe.
+.\" However, cumulative effects of such leaks may pose a
+.\" problem for some programs.
+.\" If this is the case,
+.\" suspending asynchronous cancelation for the duration of calls
+.\" to such functions is recommended.
+.\" .TP
+.\" .I mem
+.\" Functions annotated with
+.\" .I mem
+.\" as an AC-Safety issue may leak
+.\" memory if asynchronous thread cancelation interrupts their execution.
+.\"
+.\" The problem is similar to that of file descriptors: there is no atomic
+.\" interface to allocate memory and store its address in the argument to a
+.\" cleanup handler,
+.\" or to release it and remove its address from that argument,
+.\" without at least temporarily disabling asynchronous cancelation,
+.\" which these functions do not do.
+.\"
+.\" This remark does not by itself cause a function to be regarded as
+.\" generally AC-Unsafe.
+.\" However, cumulative effects of such leaks may be
+.\" severe enough for some programs that disabling asynchronous cancelation
+.\" for the duration of calls to such functions may be required.
+.TP
+.I cwd
+Functions marked with
+.I cwd
+as an MT-Safety issue may temporarily
+change the current working directory during their execution,
+which may cause relative pathnames to be resolved in unexpected ways in
+other threads or within asynchronous signal or cancelation handlers.
+.IP
+This is not enough of a reason to mark so-marked functions as MT-Unsafe,
+.\" or AS-Unsafe,
+but when this behavior is optional (e.g.,
+.BR nftw (3)
+with
+.BR FTW_CHDIR ),
+avoiding the option may be a good alternative to
+using full pathnames or file descriptor-relative (e.g.,
+.BR openat (2))
+system calls.
+.\" .TP
+.\" .I !posix
+.\" This remark, as an MT-Safety, AS-Safety or AC-Safety
+.\" note to a function,
+.\" indicates the safety status of the function is known to differ
+.\" from the specified status in the POSIX standard.
+.\" For example, POSIX does not require a function to be Safe,
+.\" but our implementation is, or vice-versa.
+.\"
+.\" For the time being, the absence of this remark does not imply the safety
+.\" properties we documented are identical to those mandated by POSIX for
+.\" the corresponding functions.
+.TP
+.I :identifier
+Annotations may sometimes be followed by identifiers,
+intended to group several functions that, for example,
+access the data structures in an unsafe way, as in
+.I race
+and
+.IR const ,
+or to provide more specific information,
+such as naming a signal in a function marked with
+.IR sig .
+It is envisioned that it may be applied to
+.I lock
+and
+.I corrupt
+as well in the future.
+.IP
+In most cases, the identifier will name a set of functions,
+but it may name global objects or function arguments,
+or identifiable properties or logical components associated with them,
+with a notation such as, for example,
+.I :buf(arg)
+to denote a buffer associated with the argument
+.IR arg ,
+or
+.I :tcattr(fd)
+to denote the terminal attributes of a file descriptor
+.IR fd .
+.IP
+The most common use for identifiers is to provide logical groups of
+functions and arguments that need to be protected by the same
+synchronization primitive in order to ensure safe operation in a given
+context.
+.TP
+.I /condition
+Some safety annotations may be conditional,
+in that they only apply if a boolean expression involving arguments,
+global variables or even the underlying kernel evaluates to true.
+.\" Such conditions as
+.\" .I /hurd
+.\" or
+.\" .I /!linux!bsd
+.\" indicate the preceding marker only
+.\" applies when the underlying kernel is the HURD,
+.\" or when it is neither Linux nor a BSD kernel, respectively.
+For example,
+.I /!ps
+and
+.I /one_per_line
+indicate the preceding marker only applies when argument
+.I ps
+is NULL, or global variable
+.I one_per_line
+is nonzero.
+.IP
+When all marks that render a function unsafe are
+adorned with such conditions,
+and none of the named conditions hold,
+then the function can be regarded as safe.
+.SH SEE ALSO
+.BR pthreads (7),
+.BR signal\-safety (7)
diff --git a/man7/boot.7 b/man7/boot.7
new file mode 100644
index 0000000..f69e8c1
--- /dev/null
+++ b/man7/boot.7
@@ -0,0 +1,230 @@
+.\" Written by Oron Peled <oron@actcom.co.il>.
+.\"
+.\" SPDX-License-Identifier: GPL-1.0-or-later
+.\"
+.\" I tried to be as much generic in the description as possible:
+.\" - General boot sequence is applicable to almost any
+.\" OS/Machine (DOS/PC, Linux/PC, Solaris/SPARC, CMS/S390)
+.\" - kernel and init(1) is applicable to almost any UNIX/Linux
+.\" - boot scripts are applicable to SYSV-R4 based UNIX/Linux
+.\"
+.\" Modified 2004-11-03 patch from Martin Schulze <joey@infodrom.org>
+.\"
+.TH boot 7 2023-07-08 "Linux man-pages 6.05.01"
+.SH NAME
+boot \- System bootup process based on UNIX System V Release 4
+.SH DESCRIPTION
+The \fBbootup process\fR (or "\fBboot sequence\fR") varies in details
+among systems, but can be roughly divided into phases controlled by
+the following components:
+.IP (1) 5
+hardware
+.IP (2)
+operating system (OS) loader
+.IP (3)
+kernel
+.IP (4)
+root user-space process (\fIinit\fR and \fIinittab\fR)
+.IP (5)
+boot scripts
+.PP
+Each of these is described below in more detail.
+.SS Hardware
+After power-on or hard reset, control is given
+to a program stored in read-only memory (normally
+PROM); for historical reasons involving the personal
+computer, this program is often called "the \fBBIOS\fR".
+.PP
+This program normally performs a basic self-test of the
+machine and accesses nonvolatile memory to read
+further parameters.
+This memory in the PC is
+battery-backed CMOS memory, so most people
+refer to it as "the \fBCMOS\fR"; outside
+of the PC world, it is usually called "the \fBNVRAM\fR"
+(nonvolatile RAM).
+.PP
+The parameters stored in the NVRAM vary among
+systems, but as a minimum, they should specify
+which device can supply an OS loader, or at least which
+devices may be probed for one; such a device is known as "the
+\fBboot device\fR".
+The hardware boot stage loads the OS loader from a fixed position on
+the boot device, and then transfers control to it.
+.TP
+Note:
+The device from which the OS loader is read may be attached via a network,
+in which case the details of booting are further specified by protocols such as
+DHCP, TFTP, PXE, Etherboot, etc.
+.SS OS loader
+The main job of the OS loader is to locate the kernel
+on some device, load it, and run it.
+Most OS loaders allow
+interactive use, in order to enable specification of an alternative
+kernel (maybe a backup in case the one last compiled
+isn't functioning) and to pass optional parameters
+to the kernel.
+.PP
+In a traditional PC, the OS loader is located in the initial 512-byte block
+of the boot device; this block is known as "the \fBMBR\fR"
+(Master Boot Record).
+.PP
+In most systems, the OS loader is very
+limited due to various constraints.
+Even on non-PC systems,
+there are some limitations on the size and complexity
+of this loader, but the size limitation of the PC MBR
+(512 bytes, including the partition table) makes it
+almost impossible to squeeze much functionality into it.
+.PP
+Therefore, most systems split the role of loading the OS between
+a primary OS loader and a secondary OS loader; this secondary
+OS loader may be located within a larger portion of persistent
+storage, such as a disk partition.
+.PP
+In Linux, the OS loader is often
+.BR grub (8)
+(an alternative is
+.BR lilo (8)).
+.SS Kernel
+When the kernel is loaded, it initializes various components of
+the computer and operating system; each portion of software
+responsible for such a task is usually consider "a \fBdriver\fR" for
+the applicable component.
+The kernel starts the virtual memory
+swapper (it is a kernel process, called "kswapd" in a modern Linux
+kernel), and mounts some filesystem at the root path,
+.IR / .
+.PP
+Some of the parameters that may be passed to the kernel
+relate to these activities (for example, the default root filesystem
+can be overridden); for further information
+on Linux kernel parameters, read
+.BR bootparam (7).
+.PP
+Only then does the kernel create the initial userland
+process, which is given the number 1 as its
+.B PID
+(process ID).
+Traditionally, this process executes the
+program
+.IR /sbin/init ,
+to which are passed the parameters that haven't already been
+handled by the kernel.
+.SS Root user-space process
+.TP
+Note:
+The following description applies to an OS based on UNIX System V Release 4.
+However, a number of widely used systems have adopted a related but
+fundamentally different approach known as
+.BR systemd (1),
+for which the bootup process is detailed in its associated
+.BR bootup (7).
+.PP
+When
+.I /sbin/init
+starts, it reads
+.I /etc/inittab
+for further instructions.
+This file defines what should be run when the
+.I /sbin/init
+program is instructed to enter a particular run level, giving
+the administrator an easy way to establish an environment
+for some usage; each run level is associated with a set of services
+(for example, run level
+.B S
+is single-user mode,
+and run level
+.B 2
+entails running most network services).
+.PP
+The administrator may change the current run level via
+.BR init (1),
+and query the current run level via
+.BR runlevel (8).
+.PP
+However, since it is not convenient to manage individual services
+by editing this file,
+.I /etc/inittab
+only bootstraps a set of scripts
+that actually start/stop the individual services.
+.SS Boot scripts
+.TP
+Note:
+The following description applies to an OS based on UNIX System V Release 4.
+However, a number of widely used systems (Slackware Linux, FreeBSD, OpenBSD)
+have a somewhat different scheme for boot scripts.
+.PP
+For each managed service (mail, nfs server, cron, etc.), there is
+a single startup script located in a specific directory
+.RI ( /etc/init.d
+in most versions of Linux).
+Each of these scripts accepts as a single argument
+the word "start" (causing it to start the service) or the word
+\&"stop" (causing it to stop the service).
+The script may optionally
+accept other "convenience" parameters (e.g., "restart" to stop and then
+start, "status" to display the service status, etc.).
+Running the script
+without parameters displays the possible arguments.
+.SS Sequencing directories
+To make specific scripts start/stop at specific run levels and in a
+specific order, there are \fIsequencing directories\fR, normally
+of the form \fI/etc/rc[0\-6S].d\fR.
+In each of these directories,
+there are links (usually symbolic) to the scripts in the \fI/etc/init.d\fR
+directory.
+.PP
+A primary script (usually \fI/etc/rc\fR) is called from
+.BR inittab (5);
+this primary script calls each service's script via a link in the
+relevant sequencing directory.
+Each link whose name begins with \[aq]S\[aq] is called with
+the argument "start" (thereby starting the service).
+Each link whose name begins with \[aq]K\[aq] is called with
+the argument "stop" (thereby stopping the service).
+.PP
+To define the starting or stopping order within the same run level,
+the name of a link contains an \fBorder-number\fR.
+Also, for clarity, the name of a link usually
+ends with the name of the service to which it refers.
+For example,
+the link \fI/etc/rc2.d/S80sendmail\fR starts the
+.BR sendmail (8)
+service on
+run level 2.
+This happens after \fI/etc/rc2.d/S12syslog\fR is run
+but before \fI/etc/rc2.d/S90xfs\fR is run.
+.PP
+To manage these links is to manage the boot order and run levels;
+under many systems, there are tools to help with this task
+(e.g.,
+.BR chkconfig (8)).
+.SS Boot configuration
+A program that provides a service is often called a "\fBdaemon\fR".
+Usually, a daemon may receive various command-line options
+and parameters.
+To allow a system administrator to change these
+inputs without editing an entire boot script,
+some separate configuration file is used, and is located in a specific
+directory where an associated boot script may find it
+(\fI/etc/sysconfig\fR on older Red Hat systems).
+.PP
+In older UNIX systems, such a file contained the actual command line
+options for a daemon, but in modern Linux systems (and also
+in HP-UX), it just contains shell variables.
+A boot script in \fI/etc/init.d\fR reads and includes its configuration
+file (that is, it "\fBsources\fR" its configuration file) and then uses
+the variable values.
+.SH FILES
+.IR /etc/init.d/ ,
+.IR /etc/rc[S0\-6].d/ ,
+.I /etc/sysconfig/
+.SH SEE ALSO
+.BR init (1),
+.BR systemd (1),
+.BR inittab (5),
+.BR bootparam (7),
+.BR bootup (7),
+.BR runlevel (8),
+.BR shutdown (8)
diff --git a/man7/bootparam.7 b/man7/bootparam.7
new file mode 100644
index 0000000..5514aca
--- /dev/null
+++ b/man7/bootparam.7
@@ -0,0 +1,664 @@
+.\" Copyright (c) 1995,1997 Paul Gortmaker and Andries Brouwer
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" This man page written 950814 by aeb, based on Paul Gortmaker's HOWTO
+.\" (dated v1.0.1, 15/08/95).
+.\" Major update, aeb, 970114.
+.\"
+.TH bootparam 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+bootparam \- introduction to boot time parameters of the Linux kernel
+.SH DESCRIPTION
+The Linux kernel accepts certain 'command-line options' or 'boot time
+parameters' at the moment it is started.
+In general, this is used to
+supply the kernel with information about hardware parameters that
+the kernel would not be able to determine on its own, or to avoid/override
+the values that the kernel would otherwise detect.
+.PP
+When the kernel is booted directly by the BIOS,
+you have no opportunity to specify any parameters.
+So, in order to take advantage of this possibility you have to
+use a boot loader that is able to pass parameters, such as GRUB.
+.SS The argument list
+The kernel command line is parsed into a list of strings
+(boot arguments) separated by spaces.
+Most of the boot arguments have the form:
+.PP
+.in +4n
+.EX
+name[=value_1][,value_2]...[,value_10]
+.EE
+.in
+.PP
+where 'name' is a unique keyword that is used to identify what part of
+the kernel the associated values (if any) are to be given to.
+Note the limit of 10 is real, as the present code handles only 10 comma
+separated parameters per keyword.
+(However, you can reuse the same
+keyword with up to an additional 10 parameters in unusually
+complicated situations, assuming the setup function supports it.)
+.PP
+Most of the sorting is coded in the kernel source file
+.IR init/main.c .
+First, the kernel
+checks to see if the argument is any of the special arguments 'root=',
+\&'nfsroot=', 'nfsaddrs=', 'ro', 'rw', 'debug', or 'init'.
+The meaning of these special arguments is described below.
+.PP
+Then it walks a list of setup functions
+to see if the specified argument string (such as 'foo') has
+been associated with a setup function ('foo_setup()') for a particular
+device or part of the kernel.
+If you passed the kernel the line
+foo=3,4,5,6 then the kernel would search the bootsetups array to see
+if 'foo' was registered.
+If it was, then it would call the setup
+function associated with 'foo' (foo_setup()) and hand it the arguments
+3, 4, 5, and 6 as given on the kernel command line.
+.PP
+Anything of the form 'foo=bar' that is not accepted as a setup function
+as described above is then interpreted as an environment variable to
+be set.
+A (useless?) example would be to use 'TERM=vt100' as a boot
+argument.
+.PP
+Any remaining arguments that were not picked up by the kernel and were
+not interpreted as environment variables are then passed onto PID 1,
+which is usually the
+.BR init (1)
+program.
+The most common argument that
+is passed to the
+.I init
+process is the word 'single' which instructs it
+to boot the computer in single user mode, and not launch all the usual
+daemons.
+Check the manual page for the version of
+.BR init (1)
+installed on
+your system to see what arguments it accepts.
+.SS General non-device-specific boot arguments
+.TP
+.B "'init=...'"
+This sets the initial command to be executed by the kernel.
+If this is not set, or cannot be found, the kernel will try
+.IR /sbin/init ,
+then
+.IR /etc/init ,
+then
+.IR /bin/init ,
+then
+.I /bin/sh
+and panic if all of this fails.
+.TP
+.B "'nfsaddrs=...'"
+This sets the NFS boot address to the given string.
+This boot address is used in case of a net boot.
+.TP
+.B "'nfsroot=...'"
+This sets the NFS root name to the given string.
+If this string
+does not begin with '/' or ',' or a digit, then it is prefixed by
+\&'/tftpboot/'.
+This root name is used in case of a net boot.
+.TP
+.B "'root=...'"
+This argument tells the kernel what device is to be used as the root
+filesystem while booting.
+The default of this setting is determined
+at compile time, and usually is the value of the root device of the
+system that the kernel was built on.
+To override this value, and
+select the second floppy drive as the root device, one would
+use 'root=/dev/fd1'.
+.IP
+The root device can be specified symbolically or numerically.
+A symbolic specification has the form
+.IR /dev/XXYN ,
+where XX designates
+the device type (e.g., 'hd' for ST-506 compatible hard disk, with Y in
+\&'a'\[en]'d'; 'sd' for SCSI compatible disk, with Y in 'a'\[en]'e'),
+Y the driver letter or
+number, and N the number (in decimal) of the partition on this device.
+.IP
+Note that this has nothing to do with the designation of these
+devices on your filesystem.
+The '/dev/' part is purely conventional.
+.IP
+The more awkward and less portable numeric specification of the above
+possible root devices in major/minor format is also accepted.
+(For example,
+.I /dev/sda3
+is major 8, minor 3, so you could use 'root=0x803' as an
+alternative.)
+.TP
+.B 'rootdelay='
+This parameter sets the delay (in seconds) to pause before attempting
+to mount the root filesystem.
+.TP
+.B 'rootflags=...'
+This parameter sets the mount option string for the root filesystem
+(see also
+.BR fstab (5)).
+.TP
+.B 'rootfstype=...'
+The 'rootfstype' option tells the kernel to mount the root filesystem as
+if it where of the type specified.
+This can be useful (for example) to
+mount an ext3 filesystem as ext2 and then remove the journal in the root
+filesystem, in fact reverting its format from ext3 to ext2 without the
+need to boot the box from alternate media.
+.TP
+.BR 'ro' " and " 'rw'
+The 'ro' option tells the kernel to mount the root filesystem
+as 'read-only' so that filesystem consistency check programs (fsck)
+can do their work on a quiescent filesystem.
+No processes can
+write to files on the filesystem in question until it is 'remounted'
+as read/write capable, for example, by 'mount \-w \-n \-o remount /'.
+(See also
+.BR mount (8).)
+.IP
+The 'rw' option tells the kernel to mount the root filesystem read/write.
+This is the default.
+.TP
+.B "'resume=...'"
+This tells the kernel the location of
+the suspend-to-disk data that you want the machine to resume from
+after hibernation.
+Usually, it is the same as your swap partition or file.
+Example:
+.IP
+.in +4n
+.EX
+resume=/dev/hda2
+.EE
+.in
+.TP
+.B "'reserve=...'"
+This is used to protect I/O port regions from probes.
+The form of the command is:
+.IP
+.in +4n
+.EX
+.BI reserve= iobase,extent[,iobase,extent]...
+.EE
+.in
+.IP
+In some machines it may be necessary to prevent device drivers from
+checking for devices (auto-probing) in a specific region.
+This may be
+because of hardware that reacts badly to the probing, or hardware
+that would be mistakenly identified, or merely
+hardware you don't want the kernel to initialize.
+.IP
+The reserve boot-time argument specifies an I/O port region that
+shouldn't be probed.
+A device driver will not probe a reserved region,
+unless another boot argument explicitly specifies that it do so.
+.IP
+For example, the boot line
+.IP
+.in +4n
+.EX
+reserve=0x300,32 blah=0x300
+.EE
+.in
+.IP
+keeps all device drivers except the driver for 'blah' from probing
+0x300\-0x31f.
+.TP
+.B "'panic=N'"
+By default, the kernel will not reboot after a panic, but this option
+will cause a kernel reboot after N seconds (if N is greater than zero).
+This panic timeout can also be set by
+.IP
+.in +4n
+.EX
+echo N > /proc/sys/kernel/panic
+.EE
+.in
+.TP
+.B "'reboot=[warm|cold][,[bios|hard]]'"
+Since Linux 2.0.22, a reboot is by default a cold reboot.
+One asks for the old default with 'reboot=warm'.
+(A cold reboot may be required to reset certain hardware,
+but might destroy not yet written data in a disk cache.
+A warm reboot may be faster.)
+By default, a reboot is hard, by asking the keyboard controller
+to pulse the reset line low, but there is at least one type
+of motherboard where that doesn't work.
+The option 'reboot=bios' will
+instead jump through the BIOS.
+.TP
+.BR 'nosmp' " and " 'maxcpus=N'
+(Only when __SMP__ is defined.)
+A command-line option of 'nosmp' or 'maxcpus=0' will disable SMP
+activation entirely; an option 'maxcpus=N' limits the maximum number
+of CPUs activated in SMP mode to N.
+.SS Boot arguments for use by kernel developers
+.TP
+.B "'debug'"
+Kernel messages are handed off to a daemon (e.g.,
+.BR klogd (8)
+or similar) so that they may be logged to disk.
+Messages with a priority above
+.I console_loglevel
+are also printed on the console.
+(For a discussion of log levels, see
+.BR syslog (2).)
+By default,
+.I console_loglevel
+is set to log messages at levels higher than
+.BR KERN_DEBUG .
+This boot argument will cause the kernel to also
+print messages logged at level
+.BR KERN_DEBUG .
+The console loglevel can also be set on a booted system via the
+.I /proc/sys/kernel/printk
+file (described in
+.BR syslog (2)),
+the
+.BR syslog (2)
+.B SYSLOG_ACTION_CONSOLE_LEVEL
+operation, or
+.BR dmesg (8).
+.TP
+.B "'profile=N'"
+It is possible to enable a kernel profiling function,
+if one wishes to find out where the kernel is spending its CPU cycles.
+Profiling is enabled by setting the variable
+.I prof_shift
+to a nonzero value.
+This is done either by specifying
+.B CONFIG_PROFILE
+at compile time, or by giving the 'profile=' option.
+Now the value that
+.I prof_shift
+gets will be N, when given, or
+.BR CONFIG_PROFILE_SHIFT ,
+when that is given, or 2, the default.
+The significance of this variable is that it
+gives the granularity of the profiling: each clock tick, if the
+system was executing kernel code, a counter is incremented:
+.IP
+.in +4n
+.EX
+profile[address >> prof_shift]++;
+.EE
+.in
+.IP
+The raw profiling information can be read from
+.IR /proc/profile .
+Probably you'll want to use a tool such as readprofile.c to digest it.
+Writing to
+.I /proc/profile
+will clear the counters.
+.SS Boot arguments for ramdisk use
+(Only if the kernel was compiled with
+.BR CONFIG_BLK_DEV_RAM .)
+In general it is a bad idea to use a ramdisk under Linux\[em]the
+system will use available memory more efficiently itself.
+But while booting,
+it is often useful to load the floppy contents into a
+ramdisk.
+One might also have a system in which first
+some modules (for filesystem or hardware) must be loaded
+before the main disk can be accessed.
+.IP
+In Linux 1.3.48, ramdisk handling was changed drastically.
+Earlier, the memory was allocated statically, and there was
+a 'ramdisk=N' parameter to tell its size.
+(This could also be set in the kernel image at compile time.)
+These days ram disks use the buffer cache, and grow dynamically.
+For a lot of information on the current ramdisk
+setup, see the kernel source file
+.I Documentation/blockdev/ramdisk.txt
+.RI ( Documentation/ramdisk.txt
+in older kernels).
+.IP
+There are four parameters, two boolean and two integral.
+.TP
+.B "'load_ramdisk=N'"
+If N=1, do load a ramdisk.
+If N=0, do not load a ramdisk.
+(This is the default.)
+.TP
+.B "'prompt_ramdisk=N'"
+If N=1, do prompt for insertion of the floppy.
+(This is the default.)
+If N=0, do not prompt.
+(Thus, this parameter is never needed.)
+.TP
+.BR 'ramdisk_size=N' " or (obsolete) " 'ramdisk=N'
+Set the maximal size of the ramdisk(s) to N kB.
+The default is 4096 (4\ MB).
+.TP
+.B "'ramdisk_start=N'"
+Sets the starting block number (the offset on the floppy where
+the ramdisk starts) to N.
+This is needed in case the ramdisk follows a kernel image.
+.TP
+.B "'noinitrd'"
+(Only if the kernel was compiled with
+.B CONFIG_BLK_DEV_RAM
+and
+.BR CONFIG_BLK_DEV_INITRD .)
+These days it is possible to compile the kernel to use initrd.
+When this feature is enabled, the boot process will load the kernel
+and an initial ramdisk; then the kernel converts initrd into
+a "normal" ramdisk, which is mounted read-write as root device;
+then
+.I /linuxrc
+is executed; afterward the "real" root filesystem is mounted,
+and the initrd filesystem is moved over to
+.IR /initrd ;
+finally
+the usual boot sequence (e.g., invocation of
+.IR /sbin/init )
+is performed.
+.IP
+For a detailed description of the initrd feature, see the kernel source file
+.I Documentation/admin\-guide/initrd.rst
+.\" commit 9d85025b0418163fae079c9ba8f8445212de8568
+(or
+.I Documentation/initrd.txt
+before Linux 4.10).
+.IP
+The 'noinitrd' option tells the kernel that although it was compiled for
+operation with initrd, it should not go through the above steps, but
+leave the initrd data under
+.IR /dev/initrd .
+(This device can be used only once: the data is freed as soon as
+the last process that used it has closed
+.IR /dev/initrd .)
+.SS Boot arguments for SCSI devices
+General notation for this section:
+.PP
+.I iobase
+-- the first I/O port that the SCSI host occupies.
+These are specified in hexadecimal notation,
+and usually lie in the range from 0x200 to 0x3ff.
+.PP
+.I irq
+-- the hardware interrupt that the card is configured to use.
+Valid values will be dependent on the card in question, but will
+usually be 5, 7, 9, 10, 11, 12, and 15.
+The other values are usually
+used for common peripherals like IDE hard disks, floppies, serial
+ports, and so on.
+.PP
+.I scsi\-id
+-- the ID that the host adapter uses to identify itself on the
+SCSI bus.
+Only some host adapters allow you to change this value, as
+most have it permanently specified internally.
+The usual default value
+is 7, but the Seagate and Future Domain TMC-950 boards use 6.
+.PP
+.I parity
+-- whether the SCSI host adapter expects the attached devices
+to supply a parity value with all information exchanges.
+Specifying a one indicates parity checking is enabled,
+and a zero disables parity checking.
+Again, not all adapters will support selection of parity
+behavior as a boot argument.
+.TP
+.B "'max_scsi_luns=...'"
+A SCSI device can have a number of 'subdevices' contained within
+itself.
+The most common example is one of the new SCSI CD-ROMs that
+handle more than one disk at a time.
+Each CD is addressed as a
+\&'Logical Unit Number' (LUN) of that particular device.
+But most
+devices, such as hard disks, tape drives, and such are only one device,
+and will be assigned to LUN zero.
+.IP
+Some poorly designed SCSI devices cannot handle being probed for
+LUNs not equal to zero.
+Therefore, if the compile-time flag
+.B CONFIG_SCSI_MULTI_LUN
+is not set, newer kernels will by default probe only LUN zero.
+.IP
+To specify the number of probed LUNs at boot, one enters
+\&'max_scsi_luns=n' as a boot arg, where n is a number between one and
+eight.
+To avoid problems as described above, one would use n=1 to
+avoid upsetting such broken devices.
+.TP
+.B "SCSI tape configuration"
+Some boot time configuration of the SCSI tape driver can be achieved
+by using the following:
+.IP
+.in +4n
+.EX
+.BI st= buf_size[,write_threshold[,max_bufs]]
+.EE
+.in
+.IP
+The first two numbers are specified in units of kB.
+The default
+.I buf_size
+is 32k\ B, and the maximum size that can be specified is a
+ridiculous 16384\ kB.
+The
+.I write_threshold
+is the value at which the buffer is committed to tape, with a
+default value of 30\ kB.
+The maximum number of buffers varies
+with the number of drives detected, and has a default of two.
+An example usage would be:
+.IP
+.in +4n
+.EX
+st=32,30,2
+.EE
+.in
+.IP
+Full details can be found in the file
+.I Documentation/scsi/st.txt
+(or
+.I drivers/scsi/README.st
+for older kernels) in the Linux kernel source.
+.SS Hard disks
+.TP
+.B "IDE Disk/CD-ROM Driver Parameters"
+The IDE driver accepts a number of parameters, which range from disk
+geometry specifications, to support for broken controller chips.
+Drive-specific options are specified by using 'hdX=' with X in 'a'\[en]'h'.
+.IP
+Non-drive-specific options are specified with the prefix 'hd='.
+Note that using a drive-specific prefix for a non-drive-specific option
+will still work, and the option will just be applied as expected.
+.IP
+Also note that 'hd=' can be used to refer to the next unspecified
+drive in the (a, ..., h) sequence.
+For the following discussions,
+the 'hd=' option will be cited for brevity.
+See the file
+.I Documentation/ide/ide.txt
+(or
+.I Documentation/ide.txt
+.\" Linux 2.0, 2.2, 2.4
+in older kernels, or
+.I drivers/block/README.ide
+in ancient kernels) in the Linux kernel source for more details.
+.TP
+.B "The 'hd=cyls,heads,sects[,wpcom[,irq]]' options"
+These options are used to specify the physical geometry of the disk.
+Only the first three values are required.
+The cylinder/head/sectors
+values will be those used by fdisk.
+The write precompensation value
+is ignored for IDE disks.
+The IRQ value specified will be the IRQ
+used for the interface that the drive resides on, and is not really a
+drive-specific parameter.
+.TP
+.B "The 'hd=serialize' option"
+The dual IDE interface CMD-640 chip is broken as designed such that
+when drives on the secondary interface are used at the same time as
+drives on the primary interface, it will corrupt your data.
+Using this
+option tells the driver to make sure that both interfaces are never
+used at the same time.
+.TP
+.B "The 'hd=noprobe' option"
+Do not probe for this drive.
+For example,
+.IP
+.in +4n
+.EX
+hdb=noprobe hdb=1166,7,17
+.EE
+.in
+.IP
+would disable the probe, but still specify the drive geometry so
+that it would be registered as a valid block device, and hence
+usable.
+.TP
+.B "The 'hd=nowerr' option"
+Some drives apparently have the
+.B WRERR_STAT
+bit stuck on permanently.
+This enables a work-around for these broken devices.
+.TP
+.B "The 'hd=cdrom' option"
+This tells the IDE driver that there is an ATAPI compatible CD-ROM
+attached in place of a normal IDE hard disk.
+In most cases the CD-ROM
+is identified automatically, but if it isn't then this may help.
+.TP
+.B "Standard ST-506 Disk Driver Options ('hd=')"
+The standard disk driver can accept geometry arguments for the disks
+similar to the IDE driver.
+Note however that it expects only three
+values (C/H/S); any more or any less and it will silently ignore you.
+Also, it accepts only 'hd=' as an argument, that is, 'hda='
+and so on are not valid here.
+The format is as follows:
+.IP
+.in +4n
+.EX
+hd=cyls,heads,sects
+.EE
+.in
+.IP
+If there are two disks installed, the above is repeated with the
+geometry parameters of the second disk.
+.SS Ethernet devices
+Different drivers make use of different parameters, but they all at
+least share having an IRQ, an I/O port base value, and a name.
+In its most generic form, it looks something like this:
+.PP
+.in +4n
+.EX
+ether=irq,iobase[,param_1[,...param_8]],name
+.EE
+.in
+.PP
+The first nonnumeric argument is taken as the name.
+The param_n values (if applicable) usually have different meanings for each
+different card/driver.
+Typical param_n values are used to specify
+things like shared memory address, interface selection, DMA channel
+and the like.
+.PP
+The most common use of this parameter is to force probing for a second
+ethercard, as the default is to probe only for one.
+This can be accomplished with a simple:
+.PP
+.in +4n
+.EX
+ether=0,0,eth1
+.EE
+.in
+.PP
+Note that the values of zero for the IRQ and I/O base in the above
+example tell the driver(s) to autoprobe.
+.PP
+The Ethernet-HowTo has extensive documentation on using multiple
+cards and on the card/driver-specific implementation
+of the param_n values where used.
+Interested readers should refer to
+the section in that document on their particular card.
+.SS The floppy disk driver
+There are many floppy driver options, and they are all listed in
+.I Documentation/blockdev/floppy.txt
+(or
+.I Documentation/floppy.txt
+in older kernels, or
+.I drivers/block/README.fd
+for ancient kernels) in the Linux kernel source.
+See that file for the details.
+.SS The sound driver
+The sound driver can also accept boot arguments to override the compiled-in
+values.
+This is not recommended, as it is rather complex.
+It is described in the Linux kernel source file
+.I Documentation/sound/oss/README.OSS
+.RI ( drivers/sound/Readme.linux
+in older kernel versions).
+It accepts
+a boot argument of the form:
+.PP
+.in +4n
+.EX
+sound=device1[,device2[,device3...[,device10]]]
+.EE
+.in
+.PP
+where each deviceN value is of the following format 0xTaaaId and the
+bytes are used as follows:
+.PP
+T \- device type: 1=FM, 2=SB, 3=PAS, 4=GUS, 5=MPU401, 6=SB16,
+7=SB16-MPU401
+.PP
+aaa \- I/O address in hex.
+.PP
+I \- interrupt line in hex (i.e., 10=a, 11=b, ...)
+.PP
+d \- DMA channel.
+.PP
+As you can see, it gets pretty messy, and you are better off to compile
+in your own personal values as recommended.
+Using a boot argument of
+\&'sound=0' will disable the sound driver entirely.
+.SS The line printer driver
+.TP
+.B "'lp='"
+.br
+Syntax:
+.IP
+.in +4n
+.EX
+lp=0
+lp=auto
+lp=reset
+lp=port[,port...]
+.EE
+.in
+.IP
+You can tell the printer driver what ports to use and what ports not
+to use.
+The latter comes in handy if you don't want the printer driver
+to claim all available parallel ports, so that other drivers
+(e.g., PLIP, PPA) can use them instead.
+.IP
+The format of the argument is multiple port names.
+For example,
+lp=none,parport0 would use the first parallel port for lp1, and
+disable lp0.
+To disable the printer driver entirely, one can use
+lp=0.
+.\" .SH AUTHORS
+.\" Linus Torvalds (and many others)
+.SH SEE ALSO
+.BR klogd (8),
+.BR mount (8)
+.PP
+For up-to-date information, see the kernel source file
+.IR Documentation/admin\-guide/kernel\-parameters.txt .
diff --git a/man7/bpf-helpers.7 b/man7/bpf-helpers.7
new file mode 100644
index 0000000..26ddf83
--- /dev/null
+++ b/man7/bpf-helpers.7
@@ -0,0 +1,5128 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "BPF-HELPERS" 7 "2023-04-11" "Linux v6.2"
+.SH NAME
+BPF-HELPERS \- list of eBPF helper functions
+.\" Copyright (C) All BPF authors and contributors from 2014 to present.
+.
+.\" See git log include/uapi/linux/bpf.h in kernel tree for details.
+.
+.\"
+.
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.
+.\"
+.
+.\" Please do not edit this file. It was generated from the documentation
+.
+.\" located in file include/uapi/linux/bpf.h of the Linux kernel sources
+.
+.\" (helpers description), and from scripts/bpf_doc.py in the same
+.
+.\" repository (header and footer).
+.
+.SH DESCRIPTION
+.sp
+The extended Berkeley Packet Filter (eBPF) subsystem consists in programs
+written in a pseudo\-assembly language, then attached to one of the several
+kernel hooks and run in reaction of specific events. This framework differs
+from the older, \(dqclassic\(dq BPF (or \(dqcBPF\(dq) in several aspects, one of them being
+the ability to call special functions (or \(dqhelpers\(dq) from within a program.
+These functions are restricted to a white\-list of helpers defined in the
+kernel.
+.sp
+These helpers are used by eBPF programs to interact with the system, or with
+the context in which they work. For instance, they can be used to print
+debugging messages, to get the time since the system was booted, to interact
+with eBPF maps, or to manipulate network packets. Since there are several eBPF
+program types, and that they do not run in the same context, each program type
+can only call a subset of those helpers.
+.sp
+Due to eBPF conventions, a helper can not have more than five arguments.
+.sp
+Internally, eBPF programs call directly into the compiled helper functions
+without requiring any foreign\-function interface. As a result, calling helpers
+introduces no overhead, thus offering excellent performance.
+.sp
+This document is an attempt to list and document the helpers available to eBPF
+developers. They are sorted by chronological order (the oldest helpers in the
+kernel at the top).
+.SH HELPERS
+.INDENT 0.0
+.TP
+.B \fBvoid *bpf_map_lookup_elem(struct bpf_map *\fP\fImap\fP\fB, const void *\fP\fIkey\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Perform a lookup in \fImap\fP for an entry associated to \fIkey\fP\&.
+.TP
+.B Return
+Map value associated to \fIkey\fP, or \fBNULL\fP if no entry was
+found.
+.UNINDENT
+.TP
+.B \fBlong bpf_map_update_elem(struct bpf_map *\fP\fImap\fP\fB, const void *\fP\fIkey\fP\fB, const void *\fP\fIvalue\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Add or update the value of the entry associated to \fIkey\fP in
+\fImap\fP with \fIvalue\fP\&. \fIflags\fP is one of:
+.INDENT 7.0
+.TP
+.B \fBBPF_NOEXIST\fP
+The entry for \fIkey\fP must not exist in the map.
+.TP
+.B \fBBPF_EXIST\fP
+The entry for \fIkey\fP must already exist in the map.
+.TP
+.B \fBBPF_ANY\fP
+No condition on the existence of the entry for \fIkey\fP\&.
+.UNINDENT
+.sp
+Flag value \fBBPF_NOEXIST\fP cannot be used for maps of types
+\fBBPF_MAP_TYPE_ARRAY\fP or \fBBPF_MAP_TYPE_PERCPU_ARRAY\fP (all
+elements always exist), the helper would return an error.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_map_delete_elem(struct bpf_map *\fP\fImap\fP\fB, const void *\fP\fIkey\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Delete entry with \fIkey\fP from \fImap\fP\&.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_probe_read(void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB, const void *\fP\fIunsafe_ptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For tracing programs, safely attempt to read \fIsize\fP bytes from
+kernel space address \fIunsafe_ptr\fP and store the data in \fIdst\fP\&.
+.sp
+Generally, use \fBbpf_probe_read_user\fP() or
+\fBbpf_probe_read_kernel\fP() instead.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBu64 bpf_ktime_get_ns(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return the time elapsed since system boot, in nanoseconds.
+Does not include time the system was suspended.
+See: \fBclock_gettime\fP(\fBCLOCK_MONOTONIC\fP)
+.TP
+.B Return
+Current \fIktime\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_trace_printk(const char *\fP\fIfmt\fP\fB, u32\fP \fIfmt_size\fP\fB, ...)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is a \(dqprintk()\-like\(dq facility for debugging. It
+prints a message defined by format \fIfmt\fP (of size \fIfmt_size\fP)
+to file \fI/sys/kernel/debug/tracing/trace\fP from DebugFS, if
+available. It can take up to three additional \fBu64\fP
+arguments (as an eBPF helpers, the total number of arguments is
+limited to five).
+.sp
+Each time the helper is called, it appends a line to the trace.
+Lines are discarded while \fI/sys/kernel/debug/tracing/trace\fP is
+open, use \fI/sys/kernel/debug/tracing/trace_pipe\fP to avoid this.
+The format of the trace is customizable, and the exact output
+one will get depends on the options set in
+\fI/sys/kernel/debug/tracing/trace_options\fP (see also the
+\fIREADME\fP file under the same directory). However, it usually
+defaults to something like:
+.INDENT 7.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+telnet\-470 [001] .N.. 419421.045894: 0x00000001: <fmt>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In the above:
+.INDENT 7.0
+.INDENT 3.5
+.INDENT 0.0
+.IP \(bu 2
+\fBtelnet\fP is the name of the current task.
+.IP \(bu 2
+\fB470\fP is the PID of the current task.
+.IP \(bu 2
+\fB001\fP is the CPU number on which the task is
+running.
+.IP \(bu 2
+In \fB\&.N..\fP, each character refers to a set of
+options (whether irqs are enabled, scheduling
+options, whether hard/softirqs are running, level of
+preempt_disabled respectively). \fBN\fP means that
+\fBTIF_NEED_RESCHED\fP and \fBPREEMPT_NEED_RESCHED\fP
+are set.
+.IP \(bu 2
+\fB419421.045894\fP is a timestamp.
+.IP \(bu 2
+\fB0x00000001\fP is a fake value used by BPF for the
+instruction pointer register.
+.IP \(bu 2
+\fB<fmt>\fP is the message formatted with \fIfmt\fP\&.
+.UNINDENT
+.UNINDENT
+.UNINDENT
+.sp
+The conversion specifiers supported by \fIfmt\fP are similar, but
+more limited than for printk(). They are \fB%d\fP, \fB%i\fP,
+\fB%u\fP, \fB%x\fP, \fB%ld\fP, \fB%li\fP, \fB%lu\fP, \fB%lx\fP, \fB%lld\fP,
+\fB%lli\fP, \fB%llu\fP, \fB%llx\fP, \fB%p\fP, \fB%s\fP\&. No modifier (size
+of field, padding with zeroes, etc.) is available, and the
+helper will return \fB\-EINVAL\fP (but print nothing) if it
+encounters an unknown specifier.
+.sp
+Also, note that \fBbpf_trace_printk\fP() is slow, and should
+only be used for debugging purposes. For this reason, a notice
+block (spanning several lines) is printed to kernel logs and
+states that the helper should not be used \(dqfor production use\(dq
+the first time this helper is used (or more precisely, when
+\fBtrace_printk\fP() buffers are allocated). For passing values
+to user space, perf events should be preferred.
+.TP
+.B Return
+The number of bytes written to the buffer, or a negative error
+in case of failure.
+.UNINDENT
+.TP
+.B \fBu32 bpf_get_prandom_u32(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get a pseudo\-random number.
+.sp
+From a security point of view, this helper uses its own
+pseudo\-random internal state, and cannot be used to infer the
+seed of other random functions in the kernel. However, it is
+essential to note that the generator used by the helper is not
+cryptographically secure.
+.TP
+.B Return
+A random 32\-bit unsigned value.
+.UNINDENT
+.TP
+.B \fBu32 bpf_get_smp_processor_id(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the SMP (symmetric multiprocessing) processor id. Note that
+all programs run with migration disabled, which means that the
+SMP processor id is stable during all the execution of the
+program.
+.TP
+.B Return
+The SMP id of the processor running the program.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_store_bytes(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIoffset\fP\fB, const void *\fP\fIfrom\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Store \fIlen\fP bytes from address \fIfrom\fP into the packet
+associated to \fIskb\fP, at \fIoffset\fP\&. \fIflags\fP are a combination of
+\fBBPF_F_RECOMPUTE_CSUM\fP (automatically recompute the
+checksum for the packet after storing the bytes) and
+\fBBPF_F_INVALIDATE_HASH\fP (set \fIskb\fP\fB\->hash\fP, \fIskb\fP\fB\->swhash\fP and \fIskb\fP\fB\->l4hash\fP to 0).
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_l3_csum_replace(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIoffset\fP\fB, u64\fP \fIfrom\fP\fB, u64\fP \fIto\fP\fB, u64\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Recompute the layer 3 (e.g. IP) checksum for the packet
+associated to \fIskb\fP\&. Computation is incremental, so the helper
+must know the former value of the header field that was
+modified (\fIfrom\fP), the new value of this field (\fIto\fP), and the
+number of bytes (2 or 4) for this field, stored in \fIsize\fP\&.
+Alternatively, it is possible to store the difference between
+the previous and the new values of the header field in \fIto\fP, by
+setting \fIfrom\fP and \fIsize\fP to 0. For both methods, \fIoffset\fP
+indicates the location of the IP checksum within the packet.
+.sp
+This helper works in combination with \fBbpf_csum_diff\fP(),
+which does not update the checksum in\-place, but offers more
+flexibility and can handle sizes larger than 2 or 4 for the
+checksum to update.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_l4_csum_replace(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIoffset\fP\fB, u64\fP \fIfrom\fP\fB, u64\fP \fIto\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+packet associated to \fIskb\fP\&. Computation is incremental, so the
+helper must know the former value of the header field that was
+modified (\fIfrom\fP), the new value of this field (\fIto\fP), and the
+number of bytes (2 or 4) for this field, stored on the lowest
+four bits of \fIflags\fP\&. Alternatively, it is possible to store
+the difference between the previous and the new values of the
+header field in \fIto\fP, by setting \fIfrom\fP and the four lowest
+bits of \fIflags\fP to 0. For both methods, \fIoffset\fP indicates the
+location of the IP checksum within the packet. In addition to
+the size of the field, \fIflags\fP can be added (bitwise OR) actual
+flags. With \fBBPF_F_MARK_MANGLED_0\fP, a null checksum is left
+untouched (unless \fBBPF_F_MARK_ENFORCE\fP is added as well), and
+for updates resulting in a null checksum the value is set to
+\fBCSUM_MANGLED_0\fP instead. Flag \fBBPF_F_PSEUDO_HDR\fP indicates
+the checksum is to be computed against a pseudo\-header.
+.sp
+This helper works in combination with \fBbpf_csum_diff\fP(),
+which does not update the checksum in\-place, but offers more
+flexibility and can handle sizes larger than 2 or 4 for the
+checksum to update.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_tail_call(void *\fP\fIctx\fP\fB, struct bpf_map *\fP\fIprog_array_map\fP\fB, u32\fP \fIindex\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This special helper is used to trigger a \(dqtail call\(dq, or in
+other words, to jump into another eBPF program. The same stack
+frame is used (but values on stack and in registers for the
+caller are not accessible to the callee). This mechanism allows
+for program chaining, either for raising the maximum number of
+available eBPF instructions, or to execute given programs in
+conditional blocks. For security reasons, there is an upper
+limit to the number of successive tail calls that can be
+performed.
+.sp
+Upon call of this helper, the program attempts to jump into a
+program referenced at index \fIindex\fP in \fIprog_array_map\fP, a
+special map of type \fBBPF_MAP_TYPE_PROG_ARRAY\fP, and passes
+\fIctx\fP, a pointer to the context.
+.sp
+If the call succeeds, the kernel immediately runs the first
+instruction of the new program. This is not a function call,
+and it never returns to the previous program. If the call
+fails, then the helper has no effect, and the caller continues
+to run its subsequent instructions. A call can fail if the
+destination program for the jump does not exist (i.e. \fIindex\fP
+is superior to the number of entries in \fIprog_array_map\fP), or
+if the maximum number of tail calls has been reached for this
+chain of programs. This limit is defined in the kernel by the
+macro \fBMAX_TAIL_CALL_CNT\fP (not accessible to user space),
+which is currently set to 33.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_clone_redirect(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIifindex\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Clone and redirect the packet associated to \fIskb\fP to another
+net device of index \fIifindex\fP\&. Both ingress and egress
+interfaces can be used for redirection. The \fBBPF_F_INGRESS\fP
+value in \fIflags\fP is used to make the distinction (ingress path
+is selected if the flag is present, egress path otherwise).
+This is the only flag supported for now.
+.sp
+In comparison with \fBbpf_redirect\fP() helper,
+\fBbpf_clone_redirect\fP() has the associated cost of
+duplicating the packet buffer, but this can be executed out of
+the eBPF program. Conversely, \fBbpf_redirect\fP() is more
+efficient, but it is handled through an action code where the
+redirection happens only after the eBPF program has returned.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_current_pid_tgid(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the current pid and tgid.
+.TP
+.B Return
+A 64\-bit integer containing the current tgid and pid, and
+created as such:
+\fIcurrent_task\fP\fB\->tgid << 32 |\fP
+\fIcurrent_task\fP\fB\->pid\fP\&.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_current_uid_gid(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the current uid and gid.
+.TP
+.B Return
+A 64\-bit integer containing the current GID and UID, and
+created as such: \fIcurrent_gid\fP \fB<< 32 |\fP \fIcurrent_uid\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_current_comm(void *\fP\fIbuf\fP\fB, u32\fP \fIsize_of_buf\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Copy the \fBcomm\fP attribute of the current task into \fIbuf\fP of
+\fIsize_of_buf\fP\&. The \fBcomm\fP attribute contains the name of
+the executable (excluding the path) for the current task. The
+\fIsize_of_buf\fP must be strictly positive. On success, the
+helper makes sure that the \fIbuf\fP is NUL\-terminated. On failure,
+it is filled with zeroes.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBu32 bpf_get_cgroup_classid(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Retrieve the classid for the current task, i.e. for the net_cls
+cgroup to which \fIskb\fP belongs.
+.sp
+This helper can be used on TC egress path, but not on ingress.
+.sp
+The net_cls cgroup provides an interface to tag network packets
+based on a user\-provided identifier for all traffic coming from
+the tasks belonging to the related cgroup. See also the related
+kernel documentation, available from the Linux sources in file
+\fIDocumentation/admin\-guide/cgroup\-v1/net_cls.rst\fP\&.
+.sp
+The Linux kernel has two versions for cgroups: there are
+cgroups v1 and cgroups v2. Both are available to users, who can
+use a mixture of them, but note that the net_cls cgroup is for
+cgroup v1 only. This makes it incompatible with BPF programs
+run on cgroups, which is a cgroup\-v2\-only feature (a socket can
+only hold data for one version of cgroups at a time).
+.sp
+This helper is only available is the kernel was compiled with
+the \fBCONFIG_CGROUP_NET_CLASSID\fP configuration option set to
+\(dq\fBy\fP\(dq or to \(dq\fBm\fP\(dq.
+.TP
+.B Return
+The classid, or 0 for the default unconfigured classid.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_vlan_push(struct sk_buff *\fP\fIskb\fP\fB, __be16\fP \fIvlan_proto\fP\fB, u16\fP \fIvlan_tci\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Push a \fIvlan_tci\fP (VLAN tag control information) of protocol
+\fIvlan_proto\fP to the packet associated to \fIskb\fP, then update
+the checksum. Note that if \fIvlan_proto\fP is different from
+\fBETH_P_8021Q\fP and \fBETH_P_8021AD\fP, it is considered to
+be \fBETH_P_8021Q\fP\&.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_vlan_pop(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Pop a VLAN header from the packet associated to \fIskb\fP\&.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_get_tunnel_key(struct sk_buff *\fP\fIskb\fP\fB, struct bpf_tunnel_key *\fP\fIkey\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get tunnel metadata. This helper takes a pointer \fIkey\fP to an
+empty \fBstruct bpf_tunnel_key\fP of \fBsize\fP, that will be
+filled with tunnel metadata for the packet associated to \fIskb\fP\&.
+The \fIflags\fP can be set to \fBBPF_F_TUNINFO_IPV6\fP, which
+indicates that the tunnel is based on IPv6 protocol instead of
+IPv4.
+.sp
+The \fBstruct bpf_tunnel_key\fP is an object that generalizes the
+principal parameters used by various tunneling protocols into a
+single struct. This way, it can be used to easily make a
+decision based on the contents of the encapsulation header,
+\(dqsummarized\(dq in this struct. In particular, it holds the IP
+address of the remote end (IPv4 or IPv6, depending on the case)
+in \fIkey\fP\fB\->remote_ipv4\fP or \fIkey\fP\fB\->remote_ipv6\fP\&. Also,
+this struct exposes the \fIkey\fP\fB\->tunnel_id\fP, which is
+generally mapped to a VNI (Virtual Network Identifier), making
+it programmable together with the \fBbpf_skb_set_tunnel_key\fP() helper.
+.sp
+Let\(aqs imagine that the following code is part of a program
+attached to the TC ingress interface, on one end of a GRE
+tunnel, and is supposed to filter out all messages coming from
+remote ends with IPv4 address other than 10.0.0.1:
+.INDENT 7.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+int ret;
+struct bpf_tunnel_key key = {};
+
+ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+if (ret < 0)
+ return TC_ACT_SHOT; // drop packet
+
+if (key.remote_ipv4 != 0x0a000001)
+ return TC_ACT_SHOT; // drop packet
+
+return TC_ACT_OK; // accept packet
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+This interface can also be used with all encapsulation devices
+that can operate in \(dqcollect metadata\(dq mode: instead of having
+one network device per specific configuration, the \(dqcollect
+metadata\(dq mode only requires a single device where the
+configuration can be extracted from this helper.
+.sp
+This can be used together with various tunnels such as VXLan,
+Geneve, GRE or IP in IP (IPIP).
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_set_tunnel_key(struct sk_buff *\fP\fIskb\fP\fB, struct bpf_tunnel_key *\fP\fIkey\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Populate tunnel metadata for packet associated to \fIskb.\fP The
+tunnel metadata is set to the contents of \fIkey\fP, of \fIsize\fP\&. The
+\fIflags\fP can be set to a combination of the following values:
+.INDENT 7.0
+.TP
+.B \fBBPF_F_TUNINFO_IPV6\fP
+Indicate that the tunnel is based on IPv6 protocol
+instead of IPv4.
+.TP
+.B \fBBPF_F_ZERO_CSUM_TX\fP
+For IPv4 packets, add a flag to tunnel metadata
+indicating that checksum computation should be skipped
+and checksum set to zeroes.
+.TP
+.B \fBBPF_F_DONT_FRAGMENT\fP
+Add a flag to tunnel metadata indicating that the
+packet should not be fragmented.
+.TP
+.B \fBBPF_F_SEQ_NUMBER\fP
+Add a flag to tunnel metadata indicating that a
+sequence number should be added to tunnel header before
+sending the packet. This flag was added for GRE
+encapsulation, but might be used with other protocols
+as well in the future.
+.UNINDENT
+.sp
+Here is a typical usage on the transmit path:
+.INDENT 7.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+struct bpf_tunnel_key key;
+ populate key ...
+bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+See also the description of the \fBbpf_skb_get_tunnel_key\fP()
+helper for additional information.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBu64 bpf_perf_event_read(struct bpf_map *\fP\fImap\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Read the value of a perf event counter. This helper relies on a
+\fImap\fP of type \fBBPF_MAP_TYPE_PERF_EVENT_ARRAY\fP\&. The nature of
+the perf event counter is selected when \fImap\fP is updated with
+perf event file descriptors. The \fImap\fP is an array whose size
+is the number of available CPUs, and each cell contains a value
+relative to one CPU. The value to retrieve is indicated by
+\fIflags\fP, that contains the index of the CPU to look up, masked
+with \fBBPF_F_INDEX_MASK\fP\&. Alternatively, \fIflags\fP can be set to
+\fBBPF_F_CURRENT_CPU\fP to indicate that the value for the
+current CPU should be retrieved.
+.sp
+Note that before Linux 4.13, only hardware perf event can be
+retrieved.
+.sp
+Also, be aware that the newer helper
+\fBbpf_perf_event_read_value\fP() is recommended over
+\fBbpf_perf_event_read\fP() in general. The latter has some ABI
+quirks where error and counter value are used as a return code
+(which is wrong to do since ranges may overlap). This issue is
+fixed with \fBbpf_perf_event_read_value\fP(), which at the same
+time provides more features over the \fBbpf_perf_event_read\fP() interface. Please refer to the description of
+\fBbpf_perf_event_read_value\fP() for details.
+.TP
+.B Return
+The value of the perf event counter read from the map, or a
+negative error code in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_redirect(u32\fP \fIifindex\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Redirect the packet to another net device of index \fIifindex\fP\&.
+This helper is somewhat similar to \fBbpf_clone_redirect\fP(), except that the packet is not cloned, which provides
+increased performance.
+.sp
+Except for XDP, both ingress and egress interfaces can be used
+for redirection. The \fBBPF_F_INGRESS\fP value in \fIflags\fP is used
+to make the distinction (ingress path is selected if the flag
+is present, egress path otherwise). Currently, XDP only
+supports redirection to the egress interface, and accepts no
+flag at all.
+.sp
+The same effect can also be attained with the more generic
+\fBbpf_redirect_map\fP(), which uses a BPF map to store the
+redirect target instead of providing it directly to the helper.
+.TP
+.B Return
+For XDP, the helper returns \fBXDP_REDIRECT\fP on success or
+\fBXDP_ABORTED\fP on error. For other program types, the values
+are \fBTC_ACT_REDIRECT\fP on success or \fBTC_ACT_SHOT\fP on
+error.
+.UNINDENT
+.TP
+.B \fBu32 bpf_get_route_realm(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Retrieve the realm or the route, that is to say the
+\fBtclassid\fP field of the destination for the \fIskb\fP\&. The
+identifier retrieved is a user\-provided tag, similar to the
+one used with the net_cls cgroup (see description for
+\fBbpf_get_cgroup_classid\fP() helper), but here this tag is
+held by a route (a destination entry), not by a task.
+.sp
+Retrieving this identifier works with the clsact TC egress hook
+(see also \fBtc\-bpf(8)\fP), or alternatively on conventional
+classful egress qdiscs, but not on TC ingress path. In case of
+clsact TC egress hook, this has the advantage that, internally,
+the destination entry has not been dropped yet in the transmit
+path. Therefore, the destination entry does not need to be
+artificially held via \fBnetif_keep_dst\fP() for a classful
+qdisc until the \fIskb\fP is freed.
+.sp
+This helper is available only if the kernel was compiled with
+\fBCONFIG_IP_ROUTE_CLASSID\fP configuration option.
+.TP
+.B Return
+The realm of the route for the packet associated to \fIskb\fP, or 0
+if none was found.
+.UNINDENT
+.TP
+.B \fBlong bpf_perf_event_output(void *\fP\fIctx\fP\fB, struct bpf_map *\fP\fImap\fP\fB, u64\fP \fIflags\fP\fB, void *\fP\fIdata\fP\fB, u64\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Write raw \fIdata\fP blob into a special BPF perf event held by
+\fImap\fP of type \fBBPF_MAP_TYPE_PERF_EVENT_ARRAY\fP\&. This perf
+event must have the following attributes: \fBPERF_SAMPLE_RAW\fP
+as \fBsample_type\fP, \fBPERF_TYPE_SOFTWARE\fP as \fBtype\fP, and
+\fBPERF_COUNT_SW_BPF_OUTPUT\fP as \fBconfig\fP\&.
+.sp
+The \fIflags\fP are used to indicate the index in \fImap\fP for which
+the value must be put, masked with \fBBPF_F_INDEX_MASK\fP\&.
+Alternatively, \fIflags\fP can be set to \fBBPF_F_CURRENT_CPU\fP
+to indicate that the index of the current CPU core should be
+used.
+.sp
+The value to write, of \fIsize\fP, is passed through eBPF stack and
+pointed by \fIdata\fP\&.
+.sp
+The context of the program \fIctx\fP needs also be passed to the
+helper.
+.sp
+On user space, a program willing to read the values needs to
+call \fBperf_event_open\fP() on the perf event (either for
+one or for all CPUs) and to store the file descriptor into the
+\fImap\fP\&. This must be done before the eBPF program can send data
+into it. An example is available in file
+\fIsamples/bpf/trace_output_user.c\fP in the Linux kernel source
+tree (the eBPF program counterpart is in
+\fIsamples/bpf/trace_output_kern.c\fP).
+.sp
+\fBbpf_perf_event_output\fP() achieves better performance
+than \fBbpf_trace_printk\fP() for sharing data with user
+space, and is much better suitable for streaming data from eBPF
+programs.
+.sp
+Note that this helper is not restricted to tracing use cases
+and can be used with programs attached to TC or XDP as well,
+where it allows for passing data to user space listeners. Data
+can be:
+.INDENT 7.0
+.IP \(bu 2
+Only custom structs,
+.IP \(bu 2
+Only the packet payload, or
+.IP \(bu 2
+A combination of both.
+.UNINDENT
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_load_bytes(const void *\fP\fIskb\fP\fB, u32\fP \fIoffset\fP\fB, void *\fP\fIto\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper was provided as an easy way to load data from a
+packet. It can be used to load \fIlen\fP bytes from \fIoffset\fP from
+the packet associated to \fIskb\fP, into the buffer pointed by
+\fIto\fP\&.
+.sp
+Since Linux 4.7, usage of this helper has mostly been replaced
+by \(dqdirect packet access\(dq, enabling packet data to be
+manipulated with \fIskb\fP\fB\->data\fP and \fIskb\fP\fB\->data_end\fP
+pointing respectively to the first byte of packet data and to
+the byte after the last byte of packet data. However, it
+remains useful if one wishes to read large quantities of data
+at once from a packet into the eBPF stack.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_stackid(void *\fP\fIctx\fP\fB, struct bpf_map *\fP\fImap\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Walk a user or a kernel stack and return its id. To achieve
+this, the helper needs \fIctx\fP, which is a pointer to the context
+on which the tracing program is executed, and a pointer to a
+\fImap\fP of type \fBBPF_MAP_TYPE_STACK_TRACE\fP\&.
+.sp
+The last argument, \fIflags\fP, holds the number of stack frames to
+skip (from 0 to 255), masked with
+\fBBPF_F_SKIP_FIELD_MASK\fP\&. The next bits can be used to set
+a combination of the following flags:
+.INDENT 7.0
+.TP
+.B \fBBPF_F_USER_STACK\fP
+Collect a user space stack instead of a kernel stack.
+.TP
+.B \fBBPF_F_FAST_STACK_CMP\fP
+Compare stacks by hash only.
+.TP
+.B \fBBPF_F_REUSE_STACKID\fP
+If two different stacks hash into the same \fIstackid\fP,
+discard the old one.
+.UNINDENT
+.sp
+The stack id retrieved is a 32 bit long integer handle which
+can be further combined with other data (including other stack
+ids) and used as a key into maps. This can be useful for
+generating a variety of graphs (such as flame graphs or off\-cpu
+graphs).
+.sp
+For walking a stack, this helper is an improvement over
+\fBbpf_probe_read\fP(), which can be used with unrolled loops
+but is not efficient and consumes a lot of eBPF instructions.
+Instead, \fBbpf_get_stackid\fP() can collect up to
+\fBPERF_MAX_STACK_DEPTH\fP both kernel and user frames. Note that
+this limit can be controlled with the \fBsysctl\fP program, and
+that it should be manually increased in order to profile long
+user stacks (such as stacks for Java programs). To do so, use:
+.INDENT 7.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+# sysctl kernel.perf_event_max_stack=<new value>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.TP
+.B Return
+The positive or null stack id on success, or a negative error
+in case of failure.
+.UNINDENT
+.TP
+.B \fBs64 bpf_csum_diff(__be32 *\fP\fIfrom\fP\fB, u32\fP \fIfrom_size\fP\fB, __be32 *\fP\fIto\fP\fB, u32\fP \fIto_size\fP\fB, __wsum\fP \fIseed\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Compute a checksum difference, from the raw buffer pointed by
+\fIfrom\fP, of length \fIfrom_size\fP (that must be a multiple of 4),
+towards the raw buffer pointed by \fIto\fP, of size \fIto_size\fP
+(same remark). An optional \fIseed\fP can be added to the value
+(this can be cascaded, the seed may come from a previous call
+to the helper).
+.sp
+This is flexible enough to be used in several ways:
+.INDENT 7.0
+.IP \(bu 2
+With \fIfrom_size\fP == 0, \fIto_size\fP > 0 and \fIseed\fP set to
+checksum, it can be used when pushing new data.
+.IP \(bu 2
+With \fIfrom_size\fP > 0, \fIto_size\fP == 0 and \fIseed\fP set to
+checksum, it can be used when removing data from a packet.
+.IP \(bu 2
+With \fIfrom_size\fP > 0, \fIto_size\fP > 0 and \fIseed\fP set to 0, it
+can be used to compute a diff. Note that \fIfrom_size\fP and
+\fIto_size\fP do not need to be equal.
+.UNINDENT
+.sp
+This helper can be used in combination with
+\fBbpf_l3_csum_replace\fP() and \fBbpf_l4_csum_replace\fP(), to
+which one can feed in the difference computed with
+\fBbpf_csum_diff\fP().
+.TP
+.B Return
+The checksum result, or a negative error code in case of
+failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_get_tunnel_opt(struct sk_buff *\fP\fIskb\fP\fB, void *\fP\fIopt\fP\fB, u32\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Retrieve tunnel options metadata for the packet associated to
+\fIskb\fP, and store the raw tunnel option data to the buffer \fIopt\fP
+of \fIsize\fP\&.
+.sp
+This helper can be used with encapsulation devices that can
+operate in \(dqcollect metadata\(dq mode (please refer to the related
+note in the description of \fBbpf_skb_get_tunnel_key\fP() for
+more details). A particular example where this can be used is
+in combination with the Geneve encapsulation protocol, where it
+allows for pushing (with \fBbpf_skb_get_tunnel_opt\fP() helper)
+and retrieving arbitrary TLVs (Type\-Length\-Value headers) from
+the eBPF program. This allows for full customization of these
+headers.
+.TP
+.B Return
+The size of the option data retrieved.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_set_tunnel_opt(struct sk_buff *\fP\fIskb\fP\fB, void *\fP\fIopt\fP\fB, u32\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Set tunnel options metadata for the packet associated to \fIskb\fP
+to the option data contained in the raw buffer \fIopt\fP of \fIsize\fP\&.
+.sp
+See also the description of the \fBbpf_skb_get_tunnel_opt\fP()
+helper for additional information.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_change_proto(struct sk_buff *\fP\fIskb\fP\fB, __be16\fP \fIproto\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Change the protocol of the \fIskb\fP to \fIproto\fP\&. Currently
+supported are transition from IPv4 to IPv6, and from IPv6 to
+IPv4. The helper takes care of the groundwork for the
+transition, including resizing the socket buffer. The eBPF
+program is expected to fill the new headers, if any, via
+\fBskb_store_bytes\fP() and to recompute the checksums with
+\fBbpf_l3_csum_replace\fP() and \fBbpf_l4_csum_replace\fP(). The main case for this helper is to perform NAT64
+operations out of an eBPF program.
+.sp
+Internally, the GSO type is marked as dodgy so that headers are
+checked and segments are recalculated by the GSO/GRO engine.
+The size for GSO target is adapted as well.
+.sp
+All values for \fIflags\fP are reserved for future usage, and must
+be left at zero.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_change_type(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fItype\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Change the packet type for the packet associated to \fIskb\fP\&. This
+comes down to setting \fIskb\fP\fB\->pkt_type\fP to \fItype\fP, except
+the eBPF program does not have a write access to \fIskb\fP\fB\->pkt_type\fP beside this helper. Using a helper here allows
+for graceful handling of errors.
+.sp
+The major use case is to change incoming \fIskb*s to
+**PACKET_HOST*\fP in a programmatic way instead of having to
+recirculate via \fBredirect\fP(..., \fBBPF_F_INGRESS\fP), for
+example.
+.sp
+Note that \fItype\fP only allows certain values. At this time, they
+are:
+.INDENT 7.0
+.TP
+.B \fBPACKET_HOST\fP
+Packet is for us.
+.TP
+.B \fBPACKET_BROADCAST\fP
+Send packet to all.
+.TP
+.B \fBPACKET_MULTICAST\fP
+Send packet to group.
+.TP
+.B \fBPACKET_OTHERHOST\fP
+Send packet to someone else.
+.UNINDENT
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_under_cgroup(struct sk_buff *\fP\fIskb\fP\fB, struct bpf_map *\fP\fImap\fP\fB, u32\fP \fIindex\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Check whether \fIskb\fP is a descendant of the cgroup2 held by
+\fImap\fP of type \fBBPF_MAP_TYPE_CGROUP_ARRAY\fP, at \fIindex\fP\&.
+.TP
+.B Return
+The return value depends on the result of the test, and can be:
+.INDENT 7.0
+.IP \(bu 2
+0, if the \fIskb\fP failed the cgroup2 descendant test.
+.IP \(bu 2
+1, if the \fIskb\fP succeeded the cgroup2 descendant test.
+.IP \(bu 2
+A negative error code, if an error occurred.
+.UNINDENT
+.UNINDENT
+.TP
+.B \fBu32 bpf_get_hash_recalc(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Retrieve the hash of the packet, \fIskb\fP\fB\->hash\fP\&. If it is
+not set, in particular if the hash was cleared due to mangling,
+recompute this hash. Later accesses to the hash can be done
+directly with \fIskb\fP\fB\->hash\fP\&.
+.sp
+Calling \fBbpf_set_hash_invalid\fP(), changing a packet
+prototype with \fBbpf_skb_change_proto\fP(), or calling
+\fBbpf_skb_store_bytes\fP() with the
+\fBBPF_F_INVALIDATE_HASH\fP are actions susceptible to clear
+the hash and to trigger a new computation for the next call to
+\fBbpf_get_hash_recalc\fP().
+.TP
+.B Return
+The 32\-bit hash.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_current_task(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the current task.
+.TP
+.B Return
+A pointer to the current task struct.
+.UNINDENT
+.TP
+.B \fBlong bpf_probe_write_user(void *\fP\fIdst\fP\fB, const void *\fP\fIsrc\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Attempt in a safe way to write \fIlen\fP bytes from the buffer
+\fIsrc\fP to \fIdst\fP in memory. It only works for threads that are in
+user context, and \fIdst\fP must be a valid user space address.
+.sp
+This helper should not be used to implement any kind of
+security mechanism because of TOC\-TOU attacks, but rather to
+debug, divert, and manipulate execution of semi\-cooperative
+processes.
+.sp
+Keep in mind that this feature is meant for experiments, and it
+has a risk of crashing the system and running programs.
+Therefore, when an eBPF program using this helper is attached,
+a warning including PID and process name is printed to kernel
+logs.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_current_task_under_cgroup(struct bpf_map *\fP\fImap\fP\fB, u32\fP \fIindex\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Check whether the probe is being run is the context of a given
+subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+\fImap\fP of type \fBBPF_MAP_TYPE_CGROUP_ARRAY\fP, at \fIindex\fP\&.
+.TP
+.B Return
+The return value depends on the result of the test, and can be:
+.INDENT 7.0
+.IP \(bu 2
+1, if current task belongs to the cgroup2.
+.IP \(bu 2
+0, if current task does not belong to the cgroup2.
+.IP \(bu 2
+A negative error code, if an error occurred.
+.UNINDENT
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_change_tail(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Resize (trim or grow) the packet associated to \fIskb\fP to the
+new \fIlen\fP\&. The \fIflags\fP are reserved for future usage, and must
+be left at zero.
+.sp
+The basic idea is that the helper performs the needed work to
+change the size of the packet, then the eBPF program rewrites
+the rest via helpers like \fBbpf_skb_store_bytes\fP(),
+\fBbpf_l3_csum_replace\fP(), \fBbpf_l3_csum_replace\fP()
+and others. This helper is a slow path utility intended for
+replies with control messages. And because it is targeted for
+slow path, the helper itself can afford to be slow: it
+implicitly linearizes, unclones and drops offloads from the
+\fIskb\fP\&.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_pull_data(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Pull in non\-linear data in case the \fIskb\fP is non\-linear and not
+all of \fIlen\fP are part of the linear section. Make \fIlen\fP bytes
+from \fIskb\fP readable and writable. If a zero value is passed for
+\fIlen\fP, then all bytes in the linear part of \fIskb\fP will be made
+readable and writable.
+.sp
+This helper is only needed for reading and writing with direct
+packet access.
+.sp
+For direct packet access, testing that offsets to access
+are within packet boundaries (test on \fIskb\fP\fB\->data_end\fP) is
+susceptible to fail if offsets are invalid, or if the requested
+data is in non\-linear parts of the \fIskb\fP\&. On failure the
+program can just bail out, or in the case of a non\-linear
+buffer, use a helper to make the data available. The
+\fBbpf_skb_load_bytes\fP() helper is a first solution to access
+the data. Another one consists in using \fBbpf_skb_pull_data\fP
+to pull in once the non\-linear parts, then retesting and
+eventually access the data.
+.sp
+At the same time, this also makes sure the \fIskb\fP is uncloned,
+which is a necessary condition for direct write. As this needs
+to be an invariant for the write part only, the verifier
+detects writes and adds a prologue that is calling
+\fBbpf_skb_pull_data()\fP to effectively unclone the \fIskb\fP from
+the very beginning in case it is indeed cloned.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBs64 bpf_csum_update(struct sk_buff *\fP\fIskb\fP\fB, __wsum\fP \fIcsum\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Add the checksum \fIcsum\fP into \fIskb\fP\fB\->csum\fP in case the
+driver has supplied a checksum for the entire packet into that
+field. Return an error otherwise. This helper is intended to be
+used in combination with \fBbpf_csum_diff\fP(), in particular
+when the checksum needs to be updated after data has been
+written into the packet through direct packet access.
+.TP
+.B Return
+The checksum on success, or a negative error code in case of
+failure.
+.UNINDENT
+.TP
+.B \fBvoid bpf_set_hash_invalid(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Invalidate the current \fIskb\fP\fB\->hash\fP\&. It can be used after
+mangling on headers through direct packet access, in order to
+indicate that the hash is outdated and to trigger a
+recalculation the next time the kernel tries to access this
+hash or when the \fBbpf_get_hash_recalc\fP() helper is called.
+.TP
+.B Return
+void.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_numa_node_id(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return the id of the current NUMA node. The primary use case
+for this helper is the selection of sockets for the local NUMA
+node, when the program is attached to sockets using the
+\fBSO_ATTACH_REUSEPORT_EBPF\fP option (see also \fBsocket(7)\fP),
+but the helper is also available to other eBPF program types,
+similarly to \fBbpf_get_smp_processor_id\fP().
+.TP
+.B Return
+The id of current NUMA node.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_change_head(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Grows headroom of packet associated to \fIskb\fP and adjusts the
+offset of the MAC header accordingly, adding \fIlen\fP bytes of
+space. It automatically extends and reallocates memory as
+required.
+.sp
+This helper can be used on a layer 3 \fIskb\fP to push a MAC header
+for redirection into a layer 2 device.
+.sp
+All values for \fIflags\fP are reserved for future usage, and must
+be left at zero.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_xdp_adjust_head(struct xdp_buff *\fP\fIxdp_md\fP\fB, int\fP \fIdelta\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Adjust (move) \fIxdp_md\fP\fB\->data\fP by \fIdelta\fP bytes. Note that
+it is possible to use a negative value for \fIdelta\fP\&. This helper
+can be used to prepare the packet for pushing or popping
+headers.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_probe_read_str(void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB, const void *\fP\fIunsafe_ptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Copy a NUL terminated string from an unsafe kernel address
+\fIunsafe_ptr\fP to \fIdst\fP\&. See \fBbpf_probe_read_kernel_str\fP() for
+more details.
+.sp
+Generally, use \fBbpf_probe_read_user_str\fP() or
+\fBbpf_probe_read_kernel_str\fP() instead.
+.TP
+.B Return
+On success, the strictly positive length of the string,
+including the trailing NUL character. On error, a negative
+value.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_socket_cookie(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+If the \fBstruct sk_buff\fP pointed by \fIskb\fP has a known socket,
+retrieve the cookie (generated by the kernel) of this socket.
+If no cookie has been set yet, generate a new cookie. Once
+generated, the socket cookie remains stable for the life of the
+socket. This helper can be useful for monitoring per socket
+networking traffic statistics as it provides a global socket
+identifier that can be assumed unique.
+.TP
+.B Return
+A 8\-byte long unique number on success, or 0 if the socket
+field is missing inside \fIskb\fP\&.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_socket_cookie(struct bpf_sock_addr *\fP\fIctx\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Equivalent to bpf_get_socket_cookie() helper that accepts
+\fIskb\fP, but gets socket from \fBstruct bpf_sock_addr\fP context.
+.TP
+.B Return
+A 8\-byte long unique number.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_socket_cookie(struct bpf_sock_ops *\fP\fIctx\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Equivalent to \fBbpf_get_socket_cookie\fP() helper that accepts
+\fIskb\fP, but gets socket from \fBstruct bpf_sock_ops\fP context.
+.TP
+.B Return
+A 8\-byte long unique number.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_socket_cookie(struct sock *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Equivalent to \fBbpf_get_socket_cookie\fP() helper that accepts
+\fIsk\fP, but gets socket from a BTF \fBstruct sock\fP\&. This helper
+also works for sleepable programs.
+.TP
+.B Return
+A 8\-byte long unique number or 0 if \fIsk\fP is NULL.
+.UNINDENT
+.TP
+.B \fBu32 bpf_get_socket_uid(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the owner UID of the socked associated to \fIskb\fP\&.
+.TP
+.B Return
+The owner UID of the socket associated to \fIskb\fP\&. If the socket
+is \fBNULL\fP, or if it is not a full socket (i.e. if it is a
+time\-wait or a request socket instead), \fBoverflowuid\fP value
+is returned (note that \fBoverflowuid\fP might also be the actual
+UID value for the socket).
+.UNINDENT
+.TP
+.B \fBlong bpf_set_hash(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIhash\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Set the full hash for \fIskb\fP (set the field \fIskb\fP\fB\->hash\fP)
+to value \fIhash\fP\&.
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBlong bpf_setsockopt(void *\fP\fIbpf_socket\fP\fB, int\fP \fIlevel\fP\fB, int\fP \fIoptname\fP\fB, void *\fP\fIoptval\fP\fB, int\fP \fIoptlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Emulate a call to \fBsetsockopt()\fP on the socket associated to
+\fIbpf_socket\fP, which must be a full socket. The \fIlevel\fP at
+which the option resides and the name \fIoptname\fP of the option
+must be specified, see \fBsetsockopt(2)\fP for more information.
+The option value of length \fIoptlen\fP is pointed by \fIoptval\fP\&.
+.sp
+\fIbpf_socket\fP should be one of the following:
+.INDENT 7.0
+.IP \(bu 2
+\fBstruct bpf_sock_ops\fP for \fBBPF_PROG_TYPE_SOCK_OPS\fP\&.
+.IP \(bu 2
+\fBstruct bpf_sock_addr\fP for \fBBPF_CGROUP_INET4_CONNECT\fP
+and \fBBPF_CGROUP_INET6_CONNECT\fP\&.
+.UNINDENT
+.sp
+This helper actually implements a subset of \fBsetsockopt()\fP\&.
+It supports the following \fIlevel\fPs:
+.INDENT 7.0
+.IP \(bu 2
+\fBSOL_SOCKET\fP, which supports the following \fIoptname\fPs:
+\fBSO_RCVBUF\fP, \fBSO_SNDBUF\fP, \fBSO_MAX_PACING_RATE\fP,
+\fBSO_PRIORITY\fP, \fBSO_RCVLOWAT\fP, \fBSO_MARK\fP,
+\fBSO_BINDTODEVICE\fP, \fBSO_KEEPALIVE\fP, \fBSO_REUSEADDR\fP,
+\fBSO_REUSEPORT\fP, \fBSO_BINDTOIFINDEX\fP, \fBSO_TXREHASH\fP\&.
+.IP \(bu 2
+\fBIPPROTO_TCP\fP, which supports the following \fIoptname\fPs:
+\fBTCP_CONGESTION\fP, \fBTCP_BPF_IW\fP,
+\fBTCP_BPF_SNDCWND_CLAMP\fP, \fBTCP_SAVE_SYN\fP,
+\fBTCP_KEEPIDLE\fP, \fBTCP_KEEPINTVL\fP, \fBTCP_KEEPCNT\fP,
+\fBTCP_SYNCNT\fP, \fBTCP_USER_TIMEOUT\fP, \fBTCP_NOTSENT_LOWAT\fP,
+\fBTCP_NODELAY\fP, \fBTCP_MAXSEG\fP, \fBTCP_WINDOW_CLAMP\fP,
+\fBTCP_THIN_LINEAR_TIMEOUTS\fP, \fBTCP_BPF_DELACK_MAX\fP,
+\fBTCP_BPF_RTO_MIN\fP\&.
+.IP \(bu 2
+\fBIPPROTO_IP\fP, which supports \fIoptname\fP \fBIP_TOS\fP\&.
+.IP \(bu 2
+\fBIPPROTO_IPV6\fP, which supports the following \fIoptname\fPs:
+\fBIPV6_TCLASS\fP, \fBIPV6_AUTOFLOWLABEL\fP\&.
+.UNINDENT
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_adjust_room(struct sk_buff *\fP\fIskb\fP\fB, s32\fP \fIlen_diff\fP\fB, u32\fP \fImode\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Grow or shrink the room for data in the packet associated to
+\fIskb\fP by \fIlen_diff\fP, and according to the selected \fImode\fP\&.
+.sp
+By default, the helper will reset any offloaded checksum
+indicator of the skb to CHECKSUM_NONE. This can be avoided
+by the following flag:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_F_ADJ_ROOM_NO_CSUM_RESET\fP: Do not reset offloaded
+checksum data of the skb to CHECKSUM_NONE.
+.UNINDENT
+.sp
+There are two supported modes at this time:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_ADJ_ROOM_MAC\fP: Adjust room at the mac layer
+(room space is added or removed between the layer 2 and
+layer 3 headers).
+.IP \(bu 2
+\fBBPF_ADJ_ROOM_NET\fP: Adjust room at the network layer
+(room space is added or removed between the layer 3 and
+layer 4 headers).
+.UNINDENT
+.sp
+The following flags are supported at this time:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_F_ADJ_ROOM_FIXED_GSO\fP: Do not adjust gso_size.
+Adjusting mss in this way is not allowed for datagrams.
+.IP \(bu 2
+\fBBPF_F_ADJ_ROOM_ENCAP_L3_IPV4\fP,
+\fBBPF_F_ADJ_ROOM_ENCAP_L3_IPV6\fP:
+Any new space is reserved to hold a tunnel header.
+Configure skb offsets and other fields accordingly.
+.IP \(bu 2
+\fBBPF_F_ADJ_ROOM_ENCAP_L4_GRE\fP,
+\fBBPF_F_ADJ_ROOM_ENCAP_L4_UDP\fP:
+Use with ENCAP_L3 flags to further specify the tunnel type.
+.IP \(bu 2
+\fBBPF_F_ADJ_ROOM_ENCAP_L2\fP(\fIlen\fP):
+Use with ENCAP_L3/L4 flags to further specify the tunnel
+type; \fIlen\fP is the length of the inner MAC header.
+.IP \(bu 2
+\fBBPF_F_ADJ_ROOM_ENCAP_L2_ETH\fP:
+Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+L2 type as Ethernet.
+.UNINDENT
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_redirect_map(struct bpf_map *\fP\fImap\fP\fB, u64\fP \fIkey\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Redirect the packet to the endpoint referenced by \fImap\fP at
+index \fIkey\fP\&. Depending on its type, this \fImap\fP can contain
+references to net devices (for forwarding packets through other
+ports), or to CPUs (for redirecting XDP frames to another CPU;
+but this is only implemented for native XDP (with driver
+support) as of this writing).
+.sp
+The lower two bits of \fIflags\fP are used as the return code if
+the map lookup fails. This is so that the return value can be
+one of the XDP program return codes up to \fBXDP_TX\fP, as chosen
+by the caller. The higher bits of \fIflags\fP can be set to
+BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+.sp
+With BPF_F_BROADCAST the packet will be broadcasted to all the
+interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+interface will be excluded when do broadcasting.
+.sp
+See also \fBbpf_redirect\fP(), which only supports redirecting
+to an ifindex, but doesn\(aqt require a map to do so.
+.TP
+.B Return
+\fBXDP_REDIRECT\fP on success, or the value of the two lower bits
+of the \fIflags\fP argument on error.
+.UNINDENT
+.TP
+.B \fBlong bpf_sk_redirect_map(struct sk_buff *\fP\fIskb\fP\fB, struct bpf_map *\fP\fImap\fP\fB, u32\fP \fIkey\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Redirect the packet to the socket referenced by \fImap\fP (of type
+\fBBPF_MAP_TYPE_SOCKMAP\fP) at index \fIkey\fP\&. Both ingress and
+egress interfaces can be used for redirection. The
+\fBBPF_F_INGRESS\fP value in \fIflags\fP is used to make the
+distinction (ingress path is selected if the flag is present,
+egress path otherwise). This is the only flag supported for now.
+.TP
+.B Return
+\fBSK_PASS\fP on success, or \fBSK_DROP\fP on error.
+.UNINDENT
+.TP
+.B \fBlong bpf_sock_map_update(struct bpf_sock_ops *\fP\fIskops\fP\fB, struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIkey\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Add an entry to, or update a \fImap\fP referencing sockets. The
+\fIskops\fP is used as a new value for the entry associated to
+\fIkey\fP\&. \fIflags\fP is one of:
+.INDENT 7.0
+.TP
+.B \fBBPF_NOEXIST\fP
+The entry for \fIkey\fP must not exist in the map.
+.TP
+.B \fBBPF_EXIST\fP
+The entry for \fIkey\fP must already exist in the map.
+.TP
+.B \fBBPF_ANY\fP
+No condition on the existence of the entry for \fIkey\fP\&.
+.UNINDENT
+.sp
+If the \fImap\fP has eBPF programs (parser and verdict), those will
+be inherited by the socket being added. If the socket is
+already attached to eBPF programs, this results in an error.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_xdp_adjust_meta(struct xdp_buff *\fP\fIxdp_md\fP\fB, int\fP \fIdelta\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Adjust the address pointed by \fIxdp_md\fP\fB\->data_meta\fP by
+\fIdelta\fP (which can be positive or negative). Note that this
+operation modifies the address stored in \fIxdp_md\fP\fB\->data\fP,
+so the latter must be loaded only after the helper has been
+called.
+.sp
+The use of \fIxdp_md\fP\fB\->data_meta\fP is optional and programs
+are not required to use it. The rationale is that when the
+packet is processed with XDP (e.g. as DoS filter), it is
+possible to push further meta data along with it before passing
+to the stack, and to give the guarantee that an ingress eBPF
+program attached as a TC classifier on the same device can pick
+this up for further post\-processing. Since TC works with socket
+buffers, it remains possible to set from XDP the \fBmark\fP or
+\fBpriority\fP pointers, or other pointers for the socket buffer.
+Having this scratch space generic and programmable allows for
+more flexibility as the user is free to store whatever meta
+data they need.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_perf_event_read_value(struct bpf_map *\fP\fImap\fP\fB, u64\fP \fIflags\fP\fB, struct bpf_perf_event_value *\fP\fIbuf\fP\fB, u32\fP \fIbuf_size\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Read the value of a perf event counter, and store it into \fIbuf\fP
+of size \fIbuf_size\fP\&. This helper relies on a \fImap\fP of type
+\fBBPF_MAP_TYPE_PERF_EVENT_ARRAY\fP\&. The nature of the perf event
+counter is selected when \fImap\fP is updated with perf event file
+descriptors. The \fImap\fP is an array whose size is the number of
+available CPUs, and each cell contains a value relative to one
+CPU. The value to retrieve is indicated by \fIflags\fP, that
+contains the index of the CPU to look up, masked with
+\fBBPF_F_INDEX_MASK\fP\&. Alternatively, \fIflags\fP can be set to
+\fBBPF_F_CURRENT_CPU\fP to indicate that the value for the
+current CPU should be retrieved.
+.sp
+This helper behaves in a way close to
+\fBbpf_perf_event_read\fP() helper, save that instead of
+just returning the value observed, it fills the \fIbuf\fP
+structure. This allows for additional data to be retrieved: in
+particular, the enabled and running times (in \fIbuf\fP\fB\->enabled\fP and \fIbuf\fP\fB\->running\fP, respectively) are
+copied. In general, \fBbpf_perf_event_read_value\fP() is
+recommended over \fBbpf_perf_event_read\fP(), which has some
+ABI issues and provides fewer functionalities.
+.sp
+These values are interesting, because hardware PMU (Performance
+Monitoring Unit) counters are limited resources. When there are
+more PMU based perf events opened than available counters,
+kernel will multiplex these events so each event gets certain
+percentage (but not all) of the PMU time. In case that
+multiplexing happens, the number of samples or counter value
+will not reflect the case compared to when no multiplexing
+occurs. This makes comparison between different runs difficult.
+Typically, the counter value should be normalized before
+comparing to other experiments. The usual normalization is done
+as follows.
+.INDENT 7.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+normalized_counter = counter * t_enabled / t_running
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+Where t_enabled is the time enabled for event and t_running is
+the time running for event since last normalization. The
+enabled and running times are accumulated since the perf event
+open. To achieve scaling factor between two invocations of an
+eBPF program, users can use CPU id as the key (which is
+typical for perf array usage model) to remember the previous
+value and do the calculation inside the eBPF program.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_perf_prog_read_value(struct bpf_perf_event_data *\fP\fIctx\fP\fB, struct bpf_perf_event_value *\fP\fIbuf\fP\fB, u32\fP \fIbuf_size\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For en eBPF program attached to a perf event, retrieve the
+value of the event counter associated to \fIctx\fP and store it in
+the structure pointed by \fIbuf\fP and of size \fIbuf_size\fP\&. Enabled
+and running times are also stored in the structure (see
+description of helper \fBbpf_perf_event_read_value\fP() for
+more details).
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_getsockopt(void *\fP\fIbpf_socket\fP\fB, int\fP \fIlevel\fP\fB, int\fP \fIoptname\fP\fB, void *\fP\fIoptval\fP\fB, int\fP \fIoptlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Emulate a call to \fBgetsockopt()\fP on the socket associated to
+\fIbpf_socket\fP, which must be a full socket. The \fIlevel\fP at
+which the option resides and the name \fIoptname\fP of the option
+must be specified, see \fBgetsockopt(2)\fP for more information.
+The retrieved value is stored in the structure pointed by
+\fIopval\fP and of length \fIoptlen\fP\&.
+.sp
+\fIbpf_socket\fP should be one of the following:
+.INDENT 7.0
+.IP \(bu 2
+\fBstruct bpf_sock_ops\fP for \fBBPF_PROG_TYPE_SOCK_OPS\fP\&.
+.IP \(bu 2
+\fBstruct bpf_sock_addr\fP for \fBBPF_CGROUP_INET4_CONNECT\fP
+and \fBBPF_CGROUP_INET6_CONNECT\fP\&.
+.UNINDENT
+.sp
+This helper actually implements a subset of \fBgetsockopt()\fP\&.
+It supports the same set of \fIoptname\fPs that is supported by
+the \fBbpf_setsockopt\fP() helper. The exceptions are
+\fBTCP_BPF_*\fP is \fBbpf_setsockopt\fP() only and
+\fBTCP_SAVED_SYN\fP is \fBbpf_getsockopt\fP() only.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_override_return(struct pt_regs *\fP\fIregs\fP\fB, u64\fP \fIrc\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Used for error injection, this helper uses kprobes to override
+the return value of the probed function, and to set it to \fIrc\fP\&.
+The first argument is the context \fIregs\fP on which the kprobe
+works.
+.sp
+This helper works by setting the PC (program counter)
+to an override function which is run in place of the original
+probed function. This means the probed function is not run at
+all. The replacement function just returns with the required
+value.
+.sp
+This helper has security implications, and thus is subject to
+restrictions. It is only available if the kernel was compiled
+with the \fBCONFIG_BPF_KPROBE_OVERRIDE\fP configuration
+option, and in this case it only works on functions tagged with
+\fBALLOW_ERROR_INJECTION\fP in the kernel code.
+.sp
+Also, the helper is only available for the architectures having
+the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+x86 architecture is the only one to support this feature.
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBlong bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *\fP\fIbpf_sock\fP\fB, int\fP \fIargval\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Attempt to set the value of the \fBbpf_sock_ops_cb_flags\fP field
+for the full TCP socket associated to \fIbpf_sock_ops\fP to
+\fIargval\fP\&.
+.sp
+The primary use of this field is to determine if there should
+be calls to eBPF programs of type
+\fBBPF_PROG_TYPE_SOCK_OPS\fP at various points in the TCP
+code. A program of the same type can change its value, per
+connection and as necessary, when the connection is
+established. This field is directly accessible for reading, but
+this helper must be used for updates in order to return an
+error if an eBPF program tries to set a callback that is not
+supported in the current kernel.
+.sp
+\fIargval\fP is a flag array which can combine these flags:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_SOCK_OPS_RTO_CB_FLAG\fP (retransmission time out)
+.IP \(bu 2
+\fBBPF_SOCK_OPS_RETRANS_CB_FLAG\fP (retransmission)
+.IP \(bu 2
+\fBBPF_SOCK_OPS_STATE_CB_FLAG\fP (TCP state change)
+.IP \(bu 2
+\fBBPF_SOCK_OPS_RTT_CB_FLAG\fP (every RTT)
+.UNINDENT
+.sp
+Therefore, this function can be used to clear a callback flag by
+setting the appropriate bit to zero. e.g. to disable the RTO
+callback:
+.INDENT 7.0
+.TP
+.B \fBbpf_sock_ops_cb_flags_set(bpf_sock,\fP
+\fBbpf_sock\->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)\fP
+.UNINDENT
+.sp
+Here are some examples of where one could call such eBPF
+program:
+.INDENT 7.0
+.IP \(bu 2
+When RTO fires.
+.IP \(bu 2
+When a packet is retransmitted.
+.IP \(bu 2
+When the connection terminates.
+.IP \(bu 2
+When a packet is sent.
+.IP \(bu 2
+When a packet is received.
+.UNINDENT
+.TP
+.B Return
+Code \fB\-EINVAL\fP if the socket is not a full TCP socket;
+otherwise, a positive number containing the bits that could not
+be set is returned (which comes down to 0 if all bits were set
+as required).
+.UNINDENT
+.TP
+.B \fBlong bpf_msg_redirect_map(struct sk_msg_buff *\fP\fImsg\fP\fB, struct bpf_map *\fP\fImap\fP\fB, u32\fP \fIkey\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is used in programs implementing policies at the
+socket level. If the message \fImsg\fP is allowed to pass (i.e. if
+the verdict eBPF program returns \fBSK_PASS\fP), redirect it to
+the socket referenced by \fImap\fP (of type
+\fBBPF_MAP_TYPE_SOCKMAP\fP) at index \fIkey\fP\&. Both ingress and
+egress interfaces can be used for redirection. The
+\fBBPF_F_INGRESS\fP value in \fIflags\fP is used to make the
+distinction (ingress path is selected if the flag is present,
+egress path otherwise). This is the only flag supported for now.
+.TP
+.B Return
+\fBSK_PASS\fP on success, or \fBSK_DROP\fP on error.
+.UNINDENT
+.TP
+.B \fBlong bpf_msg_apply_bytes(struct sk_msg_buff *\fP\fImsg\fP\fB, u32\fP \fIbytes\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For socket policies, apply the verdict of the eBPF program to
+the next \fIbytes\fP (number of bytes) of message \fImsg\fP\&.
+.sp
+For example, this helper can be used in the following cases:
+.INDENT 7.0
+.IP \(bu 2
+A single \fBsendmsg\fP() or \fBsendfile\fP() system call
+contains multiple logical messages that the eBPF program is
+supposed to read and for which it should apply a verdict.
+.IP \(bu 2
+An eBPF program only cares to read the first \fIbytes\fP of a
+\fImsg\fP\&. If the message has a large payload, then setting up
+and calling the eBPF program repeatedly for all bytes, even
+though the verdict is already known, would create unnecessary
+overhead.
+.UNINDENT
+.sp
+When called from within an eBPF program, the helper sets a
+counter internal to the BPF infrastructure, that is used to
+apply the last verdict to the next \fIbytes\fP\&. If \fIbytes\fP is
+smaller than the current data being processed from a
+\fBsendmsg\fP() or \fBsendfile\fP() system call, the first
+\fIbytes\fP will be sent and the eBPF program will be re\-run with
+the pointer for start of data pointing to byte number \fIbytes\fP
+\fB+ 1\fP\&. If \fIbytes\fP is larger than the current data being
+processed, then the eBPF verdict will be applied to multiple
+\fBsendmsg\fP() or \fBsendfile\fP() calls until \fIbytes\fP are
+consumed.
+.sp
+Note that if a socket closes with the internal counter holding
+a non\-zero value, this is not a problem because data is not
+being buffered for \fIbytes\fP and is sent as it is received.
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBlong bpf_msg_cork_bytes(struct sk_msg_buff *\fP\fImsg\fP\fB, u32\fP \fIbytes\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For socket policies, prevent the execution of the verdict eBPF
+program for message \fImsg\fP until \fIbytes\fP (byte number) have been
+accumulated.
+.sp
+This can be used when one needs a specific number of bytes
+before a verdict can be assigned, even if the data spans
+multiple \fBsendmsg\fP() or \fBsendfile\fP() calls. The extreme
+case would be a user calling \fBsendmsg\fP() repeatedly with
+1\-byte long message segments. Obviously, this is bad for
+performance, but it is still valid. If the eBPF program needs
+\fIbytes\fP bytes to validate a header, this helper can be used to
+prevent the eBPF program to be called again until \fIbytes\fP have
+been accumulated.
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBlong bpf_msg_pull_data(struct sk_msg_buff *\fP\fImsg\fP\fB, u32\fP \fIstart\fP\fB, u32\fP \fIend\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For socket policies, pull in non\-linear data from user space
+for \fImsg\fP and set pointers \fImsg\fP\fB\->data\fP and \fImsg\fP\fB\->data_end\fP to \fIstart\fP and \fIend\fP bytes offsets into \fImsg\fP,
+respectively.
+.sp
+If a program of type \fBBPF_PROG_TYPE_SK_MSG\fP is run on a
+\fImsg\fP it can only parse data that the (\fBdata\fP, \fBdata_end\fP)
+pointers have already consumed. For \fBsendmsg\fP() hooks this
+is likely the first scatterlist element. But for calls relying
+on the \fBsendpage\fP handler (e.g. \fBsendfile\fP()) this will
+be the range (\fB0\fP, \fB0\fP) because the data is shared with
+user space and by default the objective is to avoid allowing
+user space to modify data while (or after) eBPF verdict is
+being decided. This helper can be used to pull in data and to
+set the start and end pointer to given values. Data will be
+copied if necessary (i.e. if data was not linear and if start
+and end pointers do not point to the same chunk).
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.sp
+All values for \fIflags\fP are reserved for future usage, and must
+be left at zero.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_bind(struct bpf_sock_addr *\fP\fIctx\fP\fB, struct sockaddr *\fP\fIaddr\fP\fB, int\fP \fIaddr_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Bind the socket associated to \fIctx\fP to the address pointed by
+\fIaddr\fP, of length \fIaddr_len\fP\&. This allows for making outgoing
+connection from the desired IP address, which can be useful for
+example when all processes inside a cgroup should use one
+single IP address on a host that has multiple IP configured.
+.sp
+This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+domain (\fIaddr\fP\fB\->sa_family\fP) must be \fBAF_INET\fP (or
+\fBAF_INET6\fP). It\(aqs advised to pass zero port (\fBsin_port\fP
+or \fBsin6_port\fP) which triggers IP_BIND_ADDRESS_NO_PORT\-like
+behavior and lets the kernel efficiently pick up an unused
+port as long as 4\-tuple is unique. Passing non\-zero port might
+lead to degraded performance.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_xdp_adjust_tail(struct xdp_buff *\fP\fIxdp_md\fP\fB, int\fP \fIdelta\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Adjust (move) \fIxdp_md\fP\fB\->data_end\fP by \fIdelta\fP bytes. It is
+possible to both shrink and grow the packet tail.
+Shrink done via \fIdelta\fP being a negative integer.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_get_xfrm_state(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIindex\fP\fB, struct bpf_xfrm_state *\fP\fIxfrm_state\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Retrieve the XFRM state (IP transform framework, see also
+\fBip\-xfrm(8)\fP) at \fIindex\fP in XFRM \(dqsecurity path\(dq for \fIskb\fP\&.
+.sp
+The retrieved value is stored in the \fBstruct bpf_xfrm_state\fP
+pointed by \fIxfrm_state\fP and of length \fIsize\fP\&.
+.sp
+All values for \fIflags\fP are reserved for future usage, and must
+be left at zero.
+.sp
+This helper is available only if the kernel was compiled with
+\fBCONFIG_XFRM\fP configuration option.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_stack(void *\fP\fIctx\fP\fB, void *\fP\fIbuf\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return a user or a kernel stack in bpf program provided buffer.
+To achieve this, the helper needs \fIctx\fP, which is a pointer
+to the context on which the tracing program is executed.
+To store the stacktrace, the bpf program provides \fIbuf\fP with
+a nonnegative \fIsize\fP\&.
+.sp
+The last argument, \fIflags\fP, holds the number of stack frames to
+skip (from 0 to 255), masked with
+\fBBPF_F_SKIP_FIELD_MASK\fP\&. The next bits can be used to set
+the following flags:
+.INDENT 7.0
+.TP
+.B \fBBPF_F_USER_STACK\fP
+Collect a user space stack instead of a kernel stack.
+.TP
+.B \fBBPF_F_USER_BUILD_ID\fP
+Collect (build_id, file_offset) instead of ips for user
+stack, only valid if \fBBPF_F_USER_STACK\fP is also
+specified.
+.sp
+\fIfile_offset\fP is an offset relative to the beginning
+of the executable or shared object file backing the vma
+which the \fIip\fP falls in. It is \fInot\fP an offset relative
+to that object\(aqs base address. Accordingly, it must be
+adjusted by adding (sh_addr \- sh_offset), where
+sh_{addr,offset} correspond to the executable section
+containing \fIfile_offset\fP in the object, for comparisons
+to symbols\(aq st_value to be valid.
+.UNINDENT
+.sp
+\fBbpf_get_stack\fP() can collect up to
+\fBPERF_MAX_STACK_DEPTH\fP both kernel and user frames, subject
+to sufficient large buffer size. Note that
+this limit can be controlled with the \fBsysctl\fP program, and
+that it should be manually increased in order to profile long
+user stacks (such as stacks for Java programs). To do so, use:
+.INDENT 7.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+# sysctl kernel.perf_event_max_stack=<new value>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.TP
+.B Return
+The non\-negative copied \fIbuf\fP length equal to or less than
+\fIsize\fP on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_load_bytes_relative(const void *\fP\fIskb\fP\fB, u32\fP \fIoffset\fP\fB, void *\fP\fIto\fP\fB, u32\fP \fIlen\fP\fB, u32\fP \fIstart_header\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is similar to \fBbpf_skb_load_bytes\fP() in that
+it provides an easy way to load \fIlen\fP bytes from \fIoffset\fP
+from the packet associated to \fIskb\fP, into the buffer pointed
+by \fIto\fP\&. The difference to \fBbpf_skb_load_bytes\fP() is that
+a fifth argument \fIstart_header\fP exists in order to select a
+base offset to start from. \fIstart_header\fP can be one of:
+.INDENT 7.0
+.TP
+.B \fBBPF_HDR_START_MAC\fP
+Base offset to load data from is \fIskb\fP\(aqs mac header.
+.TP
+.B \fBBPF_HDR_START_NET\fP
+Base offset to load data from is \fIskb\fP\(aqs network header.
+.UNINDENT
+.sp
+In general, \(dqdirect packet access\(dq is the preferred method to
+access packet data, however, this helper is in particular useful
+in socket filters where \fIskb\fP\fB\->data\fP does not always point
+to the start of the mac header and where \(dqdirect packet access\(dq
+is not available.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_fib_lookup(void *\fP\fIctx\fP\fB, struct bpf_fib_lookup *\fP\fIparams\fP\fB, int\fP \fIplen\fP\fB, u32\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Do FIB lookup in kernel tables using parameters in \fIparams\fP\&.
+If lookup is successful and result shows packet is to be
+forwarded, the neighbor tables are searched for the nexthop.
+If successful (ie., FIB lookup shows forwarding and nexthop
+is resolved), the nexthop address is returned in ipv4_dst
+or ipv6_dst based on family, smac is set to mac address of
+egress device, dmac is set to nexthop mac address, rt_metric
+is set to metric from route (IPv4/IPv6 only), and ifindex
+is set to the device index of the nexthop from the FIB lookup.
+.sp
+\fIplen\fP argument is the size of the passed in struct.
+\fIflags\fP argument can be a combination of one or more of the
+following values:
+.INDENT 7.0
+.TP
+.B \fBBPF_FIB_LOOKUP_DIRECT\fP
+Do a direct table lookup vs full lookup using FIB
+rules.
+.TP
+.B \fBBPF_FIB_LOOKUP_OUTPUT\fP
+Perform lookup from an egress perspective (default is
+ingress).
+.UNINDENT
+.sp
+\fIctx\fP is either \fBstruct xdp_md\fP for XDP programs or
+\fBstruct sk_buff\fP tc cls_act programs.
+.TP
+.B Return
+.INDENT 7.0
+.IP \(bu 2
+< 0 if any input argument is invalid
+.IP \(bu 2
+0 on success (packet is forwarded, nexthop neighbor exists)
+.IP \(bu 2
+> 0 one of \fBBPF_FIB_LKUP_RET_\fP codes explaining why the
+packet is not forwarded or needs assist from full stack
+.UNINDENT
+.sp
+If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU
+was exceeded and output params\->mtu_result contains the MTU.
+.UNINDENT
+.TP
+.B \fBlong bpf_sock_hash_update(struct bpf_sock_ops *\fP\fIskops\fP\fB, struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIkey\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Add an entry to, or update a sockhash \fImap\fP referencing sockets.
+The \fIskops\fP is used as a new value for the entry associated to
+\fIkey\fP\&. \fIflags\fP is one of:
+.INDENT 7.0
+.TP
+.B \fBBPF_NOEXIST\fP
+The entry for \fIkey\fP must not exist in the map.
+.TP
+.B \fBBPF_EXIST\fP
+The entry for \fIkey\fP must already exist in the map.
+.TP
+.B \fBBPF_ANY\fP
+No condition on the existence of the entry for \fIkey\fP\&.
+.UNINDENT
+.sp
+If the \fImap\fP has eBPF programs (parser and verdict), those will
+be inherited by the socket being added. If the socket is
+already attached to eBPF programs, this results in an error.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_msg_redirect_hash(struct sk_msg_buff *\fP\fImsg\fP\fB, struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIkey\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is used in programs implementing policies at the
+socket level. If the message \fImsg\fP is allowed to pass (i.e. if
+the verdict eBPF program returns \fBSK_PASS\fP), redirect it to
+the socket referenced by \fImap\fP (of type
+\fBBPF_MAP_TYPE_SOCKHASH\fP) using hash \fIkey\fP\&. Both ingress and
+egress interfaces can be used for redirection. The
+\fBBPF_F_INGRESS\fP value in \fIflags\fP is used to make the
+distinction (ingress path is selected if the flag is present,
+egress path otherwise). This is the only flag supported for now.
+.TP
+.B Return
+\fBSK_PASS\fP on success, or \fBSK_DROP\fP on error.
+.UNINDENT
+.TP
+.B \fBlong bpf_sk_redirect_hash(struct sk_buff *\fP\fIskb\fP\fB, struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIkey\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is used in programs implementing policies at the
+skb socket level. If the sk_buff \fIskb\fP is allowed to pass (i.e.
+if the verdict eBPF program returns \fBSK_PASS\fP), redirect it
+to the socket referenced by \fImap\fP (of type
+\fBBPF_MAP_TYPE_SOCKHASH\fP) using hash \fIkey\fP\&. Both ingress and
+egress interfaces can be used for redirection. The
+\fBBPF_F_INGRESS\fP value in \fIflags\fP is used to make the
+distinction (ingress path is selected if the flag is present,
+egress otherwise). This is the only flag supported for now.
+.TP
+.B Return
+\fBSK_PASS\fP on success, or \fBSK_DROP\fP on error.
+.UNINDENT
+.TP
+.B \fBlong bpf_lwt_push_encap(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fItype\fP\fB, void *\fP\fIhdr\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Encapsulate the packet associated to \fIskb\fP within a Layer 3
+protocol header. This header is provided in the buffer at
+address \fIhdr\fP, with \fIlen\fP its size in bytes. \fItype\fP indicates
+the protocol of the header and can be one of:
+.INDENT 7.0
+.TP
+.B \fBBPF_LWT_ENCAP_SEG6\fP
+IPv6 encapsulation with Segment Routing Header
+(\fBstruct ipv6_sr_hdr\fP). \fIhdr\fP only contains the SRH,
+the IPv6 header is computed by the kernel.
+.TP
+.B \fBBPF_LWT_ENCAP_SEG6_INLINE\fP
+Only works if \fIskb\fP contains an IPv6 packet. Insert a
+Segment Routing Header (\fBstruct ipv6_sr_hdr\fP) inside
+the IPv6 header.
+.TP
+.B \fBBPF_LWT_ENCAP_IP\fP
+IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+must be IPv4 or IPv6, followed by zero or more
+additional headers, up to \fBLWT_BPF_MAX_HEADROOM\fP
+total bytes in all prepended headers. Please note that
+if \fBskb_is_gso\fP(\fIskb\fP) is true, no more than two
+headers can be prepended, and the inner header, if
+present, should be either GRE or UDP/GUE.
+.UNINDENT
+.sp
+\fBBPF_LWT_ENCAP_SEG6\fP* types can be called by BPF programs
+of type \fBBPF_PROG_TYPE_LWT_IN\fP; \fBBPF_LWT_ENCAP_IP\fP type can
+be called by bpf programs of types \fBBPF_PROG_TYPE_LWT_IN\fP and
+\fBBPF_PROG_TYPE_LWT_XMIT\fP\&.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_lwt_seg6_store_bytes(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIoffset\fP\fB, const void *\fP\fIfrom\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Store \fIlen\fP bytes from address \fIfrom\fP into the packet
+associated to \fIskb\fP, at \fIoffset\fP\&. Only the flags, tag and TLVs
+inside the outermost IPv6 Segment Routing Header can be
+modified through this helper.
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_lwt_seg6_adjust_srh(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIoffset\fP\fB, s32\fP \fIdelta\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Adjust the size allocated to TLVs in the outermost IPv6
+Segment Routing Header contained in the packet associated to
+\fIskb\fP, at position \fIoffset\fP by \fIdelta\fP bytes. Only offsets
+after the segments are accepted. \fIdelta\fP can be as well
+positive (growing) as negative (shrinking).
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_lwt_seg6_action(struct sk_buff *\fP\fIskb\fP\fB, u32\fP \fIaction\fP\fB, void *\fP\fIparam\fP\fB, u32\fP \fIparam_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Apply an IPv6 Segment Routing action of type \fIaction\fP to the
+packet associated to \fIskb\fP\&. Each action takes a parameter
+contained at address \fIparam\fP, and of length \fIparam_len\fP bytes.
+\fIaction\fP can be one of:
+.INDENT 7.0
+.TP
+.B \fBSEG6_LOCAL_ACTION_END_X\fP
+End.X action: Endpoint with Layer\-3 cross\-connect.
+Type of \fIparam\fP: \fBstruct in6_addr\fP\&.
+.TP
+.B \fBSEG6_LOCAL_ACTION_END_T\fP
+End.T action: Endpoint with specific IPv6 table lookup.
+Type of \fIparam\fP: \fBint\fP\&.
+.TP
+.B \fBSEG6_LOCAL_ACTION_END_B6\fP
+End.B6 action: Endpoint bound to an SRv6 policy.
+Type of \fIparam\fP: \fBstruct ipv6_sr_hdr\fP\&.
+.TP
+.B \fBSEG6_LOCAL_ACTION_END_B6_ENCAP\fP
+End.B6.Encap action: Endpoint bound to an SRv6
+encapsulation policy.
+Type of \fIparam\fP: \fBstruct ipv6_sr_hdr\fP\&.
+.UNINDENT
+.sp
+A call to this helper is susceptible to change the underlying
+packet buffer. Therefore, at load time, all checks on pointers
+previously done by the verifier are invalidated and must be
+performed again, if the helper is used in combination with
+direct packet access.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_rc_repeat(void *\fP\fIctx\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is used in programs implementing IR decoding, to
+report a successfully decoded repeat key message. This delays
+the generation of a key up event for previously generated
+key down event.
+.sp
+Some IR protocols like NEC have a special IR message for
+repeating last button, for when a button is held down.
+.sp
+The \fIctx\fP should point to the lirc sample as passed into
+the program.
+.sp
+This helper is only available is the kernel was compiled with
+the \fBCONFIG_BPF_LIRC_MODE2\fP configuration option set to
+\(dq\fBy\fP\(dq.
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBlong bpf_rc_keydown(void *\fP\fIctx\fP\fB, u32\fP \fIprotocol\fP\fB, u64\fP \fIscancode\fP\fB, u32\fP \fItoggle\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is used in programs implementing IR decoding, to
+report a successfully decoded key press with \fIscancode\fP,
+\fItoggle\fP value in the given \fIprotocol\fP\&. The scancode will be
+translated to a keycode using the rc keymap, and reported as
+an input key down event. After a period a key up event is
+generated. This period can be extended by calling either
+\fBbpf_rc_keydown\fP() again with the same values, or calling
+\fBbpf_rc_repeat\fP().
+.sp
+Some protocols include a toggle bit, in case the button was
+released and pressed again between consecutive scancodes.
+.sp
+The \fIctx\fP should point to the lirc sample as passed into
+the program.
+.sp
+The \fIprotocol\fP is the decoded protocol number (see
+\fBenum rc_proto\fP for some predefined values).
+.sp
+This helper is only available is the kernel was compiled with
+the \fBCONFIG_BPF_LIRC_MODE2\fP configuration option set to
+\(dq\fBy\fP\(dq.
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBu64 bpf_skb_cgroup_id(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return the cgroup v2 id of the socket associated with the \fIskb\fP\&.
+This is roughly similar to the \fBbpf_get_cgroup_classid\fP()
+helper for cgroup v1 by providing a tag resp. identifier that
+can be matched on or used for map lookups e.g. to implement
+policy. The cgroup v2 id of a given path in the hierarchy is
+exposed in user space through the f_handle API in order to get
+to the same 64\-bit id.
+.sp
+This helper can be used on TC egress path, but not on ingress,
+and is available only if the kernel was compiled with the
+\fBCONFIG_SOCK_CGROUP_DATA\fP configuration option.
+.TP
+.B Return
+The id is returned or 0 in case the id could not be retrieved.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_current_cgroup_id(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the current cgroup id based on the cgroup within which
+the current task is running.
+.TP
+.B Return
+A 64\-bit integer containing the current cgroup id based
+on the cgroup within which the current task is running.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_get_local_storage(void *\fP\fImap\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the pointer to the local storage area.
+The type and the size of the local storage is defined
+by the \fImap\fP argument.
+The \fIflags\fP meaning is specific for each map type,
+and has to be 0 for cgroup local storage.
+.sp
+Depending on the BPF program type, a local storage area
+can be shared between multiple instances of the BPF program,
+running simultaneously.
+.sp
+A user should care about the synchronization by himself.
+For example, by using the \fBBPF_ATOMIC\fP instructions to alter
+the shared data.
+.TP
+.B Return
+A pointer to the local storage area.
+.UNINDENT
+.TP
+.B \fBlong bpf_sk_select_reuseport(struct sk_reuseport_md *\fP\fIreuse\fP\fB, struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIkey\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Select a \fBSO_REUSEPORT\fP socket from a
+\fBBPF_MAP_TYPE_REUSEPORT_SOCKARRAY\fP \fImap\fP\&.
+It checks the selected socket is matching the incoming
+request in the socket buffer.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBu64 bpf_skb_ancestor_cgroup_id(struct sk_buff *\fP\fIskb\fP\fB, int\fP \fIancestor_level\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return id of cgroup v2 that is ancestor of cgroup associated
+with the \fIskb\fP at the \fIancestor_level\fP\&. The root cgroup is at
+\fIancestor_level\fP zero and each step down the hierarchy
+increments the level. If \fIancestor_level\fP == level of cgroup
+associated with \fIskb\fP, then return value will be same as that
+of \fBbpf_skb_cgroup_id\fP().
+.sp
+The helper is useful to implement policies based on cgroups
+that are upper in hierarchy than immediate cgroup associated
+with \fIskb\fP\&.
+.sp
+The format of returned id and helper limitations are same as in
+\fBbpf_skb_cgroup_id\fP().
+.TP
+.B Return
+The id is returned or 0 in case the id could not be retrieved.
+.UNINDENT
+.TP
+.B \fBstruct bpf_sock *bpf_sk_lookup_tcp(void *\fP\fIctx\fP\fB, struct bpf_sock_tuple *\fP\fItuple\fP\fB, u32\fP \fItuple_size\fP\fB, u64\fP \fInetns\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Look for TCP socket matching \fItuple\fP, optionally in a child
+network namespace \fInetns\fP\&. The return value must be checked,
+and if non\-\fBNULL\fP, released via \fBbpf_sk_release\fP().
+.sp
+The \fIctx\fP should point to the context of the program, such as
+the skb or socket (depending on the hook in use). This is used
+to determine the base network namespace for the lookup.
+.sp
+\fItuple_size\fP must be one of:
+.INDENT 7.0
+.TP
+.B \fBsizeof\fP(\fItuple\fP\fB\->ipv4\fP)
+Look for an IPv4 socket.
+.TP
+.B \fBsizeof\fP(\fItuple\fP\fB\->ipv6\fP)
+Look for an IPv6 socket.
+.UNINDENT
+.sp
+If the \fInetns\fP is a negative signed 32\-bit integer, then the
+socket lookup table in the netns associated with the \fIctx\fP
+will be used. For the TC hooks, this is the netns of the device
+in the skb. For socket hooks, this is the netns of the socket.
+If \fInetns\fP is any other signed 32\-bit value greater than or
+equal to zero then it specifies the ID of the netns relative to
+the netns associated with the \fIctx\fP\&. \fInetns\fP values beyond the
+range of 32\-bit integers are reserved for future use.
+.sp
+All values for \fIflags\fP are reserved for future usage, and must
+be left at zero.
+.sp
+This helper is available only if the kernel was compiled with
+\fBCONFIG_NET\fP configuration option.
+.TP
+.B Return
+Pointer to \fBstruct bpf_sock\fP, or \fBNULL\fP in case of failure.
+For sockets with reuseport option, the \fBstruct bpf_sock\fP
+result is from \fIreuse\fP\fB\->socks\fP[] using the hash of the
+tuple.
+.UNINDENT
+.TP
+.B \fBstruct bpf_sock *bpf_sk_lookup_udp(void *\fP\fIctx\fP\fB, struct bpf_sock_tuple *\fP\fItuple\fP\fB, u32\fP \fItuple_size\fP\fB, u64\fP \fInetns\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Look for UDP socket matching \fItuple\fP, optionally in a child
+network namespace \fInetns\fP\&. The return value must be checked,
+and if non\-\fBNULL\fP, released via \fBbpf_sk_release\fP().
+.sp
+The \fIctx\fP should point to the context of the program, such as
+the skb or socket (depending on the hook in use). This is used
+to determine the base network namespace for the lookup.
+.sp
+\fItuple_size\fP must be one of:
+.INDENT 7.0
+.TP
+.B \fBsizeof\fP(\fItuple\fP\fB\->ipv4\fP)
+Look for an IPv4 socket.
+.TP
+.B \fBsizeof\fP(\fItuple\fP\fB\->ipv6\fP)
+Look for an IPv6 socket.
+.UNINDENT
+.sp
+If the \fInetns\fP is a negative signed 32\-bit integer, then the
+socket lookup table in the netns associated with the \fIctx\fP
+will be used. For the TC hooks, this is the netns of the device
+in the skb. For socket hooks, this is the netns of the socket.
+If \fInetns\fP is any other signed 32\-bit value greater than or
+equal to zero then it specifies the ID of the netns relative to
+the netns associated with the \fIctx\fP\&. \fInetns\fP values beyond the
+range of 32\-bit integers are reserved for future use.
+.sp
+All values for \fIflags\fP are reserved for future usage, and must
+be left at zero.
+.sp
+This helper is available only if the kernel was compiled with
+\fBCONFIG_NET\fP configuration option.
+.TP
+.B Return
+Pointer to \fBstruct bpf_sock\fP, or \fBNULL\fP in case of failure.
+For sockets with reuseport option, the \fBstruct bpf_sock\fP
+result is from \fIreuse\fP\fB\->socks\fP[] using the hash of the
+tuple.
+.UNINDENT
+.TP
+.B \fBlong bpf_sk_release(void *\fP\fIsock\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Release the reference held by \fIsock\fP\&. \fIsock\fP must be a
+non\-\fBNULL\fP pointer that was returned from
+\fBbpf_sk_lookup_xxx\fP().
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_map_push_elem(struct bpf_map *\fP\fImap\fP\fB, const void *\fP\fIvalue\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Push an element \fIvalue\fP in \fImap\fP\&. \fIflags\fP is one of:
+.INDENT 7.0
+.TP
+.B \fBBPF_EXIST\fP
+If the queue/stack is full, the oldest element is
+removed to make room for this.
+.UNINDENT
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_map_pop_elem(struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIvalue\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Pop an element from \fImap\fP\&.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_map_peek_elem(struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIvalue\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get an element from \fImap\fP without removing it.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_msg_push_data(struct sk_msg_buff *\fP\fImsg\fP\fB, u32\fP \fIstart\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For socket policies, insert \fIlen\fP bytes into \fImsg\fP at offset
+\fIstart\fP\&.
+.sp
+If a program of type \fBBPF_PROG_TYPE_SK_MSG\fP is run on a
+\fImsg\fP it may want to insert metadata or options into the \fImsg\fP\&.
+This can later be read and used by any of the lower layer BPF
+hooks.
+.sp
+This helper may fail if under memory pressure (a malloc
+fails) in these cases BPF programs will get an appropriate
+error and BPF programs will need to handle them.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_msg_pop_data(struct sk_msg_buff *\fP\fImsg\fP\fB, u32\fP \fIstart\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Will remove \fIlen\fP bytes from a \fImsg\fP starting at byte \fIstart\fP\&.
+This may result in \fBENOMEM\fP errors under certain situations if
+an allocation and copy are required due to a full ring buffer.
+However, the helper will try to avoid doing the allocation
+if possible. Other errors can occur if input parameters are
+invalid either due to \fIstart\fP byte not being valid part of \fImsg\fP
+payload and/or \fIpop\fP value being to large.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_rc_pointer_rel(void *\fP\fIctx\fP\fB, s32\fP \fIrel_x\fP\fB, s32\fP \fIrel_y\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is used in programs implementing IR decoding, to
+report a successfully decoded pointer movement.
+.sp
+The \fIctx\fP should point to the lirc sample as passed into
+the program.
+.sp
+This helper is only available is the kernel was compiled with
+the \fBCONFIG_BPF_LIRC_MODE2\fP configuration option set to
+\(dq\fBy\fP\(dq.
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBlong bpf_spin_lock(struct bpf_spin_lock *\fP\fIlock\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Acquire a spinlock represented by the pointer \fIlock\fP, which is
+stored as part of a value of a map. Taking the lock allows to
+safely update the rest of the fields in that value. The
+spinlock can (and must) later be released with a call to
+\fBbpf_spin_unlock\fP(\fIlock\fP).
+.sp
+Spinlocks in BPF programs come with a number of restrictions
+and constraints:
+.INDENT 7.0
+.IP \(bu 2
+\fBbpf_spin_lock\fP objects are only allowed inside maps of
+types \fBBPF_MAP_TYPE_HASH\fP and \fBBPF_MAP_TYPE_ARRAY\fP (this
+list could be extended in the future).
+.IP \(bu 2
+BTF description of the map is mandatory.
+.IP \(bu 2
+The BPF program can take ONE lock at a time, since taking two
+or more could cause dead locks.
+.IP \(bu 2
+Only one \fBstruct bpf_spin_lock\fP is allowed per map element.
+.IP \(bu 2
+When the lock is taken, calls (either BPF to BPF or helpers)
+are not allowed.
+.IP \(bu 2
+The \fBBPF_LD_ABS\fP and \fBBPF_LD_IND\fP instructions are not
+allowed inside a spinlock\-ed region.
+.IP \(bu 2
+The BPF program MUST call \fBbpf_spin_unlock\fP() to release
+the lock, on all execution paths, before it returns.
+.IP \(bu 2
+The BPF program can access \fBstruct bpf_spin_lock\fP only via
+the \fBbpf_spin_lock\fP() and \fBbpf_spin_unlock\fP()
+helpers. Loading or storing data into the \fBstruct
+bpf_spin_lock\fP \fIlock\fP\fB;\fP field of a map is not allowed.
+.IP \(bu 2
+To use the \fBbpf_spin_lock\fP() helper, the BTF description
+of the map value must be a struct and have \fBstruct
+bpf_spin_lock\fP \fIanyname\fP\fB;\fP field at the top level.
+Nested lock inside another struct is not allowed.
+.IP \(bu 2
+The \fBstruct bpf_spin_lock\fP \fIlock\fP field in a map value must
+be aligned on a multiple of 4 bytes in that value.
+.IP \(bu 2
+Syscall with command \fBBPF_MAP_LOOKUP_ELEM\fP does not copy
+the \fBbpf_spin_lock\fP field to user space.
+.IP \(bu 2
+Syscall with command \fBBPF_MAP_UPDATE_ELEM\fP, or update from
+a BPF program, do not update the \fBbpf_spin_lock\fP field.
+.IP \(bu 2
+\fBbpf_spin_lock\fP cannot be on the stack or inside a
+networking packet (it can only be inside of a map values).
+.IP \(bu 2
+\fBbpf_spin_lock\fP is available to root only.
+.IP \(bu 2
+Tracing programs and socket filter programs cannot use
+\fBbpf_spin_lock\fP() due to insufficient preemption checks
+(but this may change in the future).
+.IP \(bu 2
+\fBbpf_spin_lock\fP is not allowed in inner maps of map\-in\-map.
+.UNINDENT
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBlong bpf_spin_unlock(struct bpf_spin_lock *\fP\fIlock\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Release the \fIlock\fP previously locked by a call to
+\fBbpf_spin_lock\fP(\fIlock\fP).
+.TP
+.B Return
+0
+.UNINDENT
+.TP
+.B \fBstruct bpf_sock *bpf_sk_fullsock(struct bpf_sock *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper gets a \fBstruct bpf_sock\fP pointer such
+that all the fields in this \fBbpf_sock\fP can be accessed.
+.TP
+.B Return
+A \fBstruct bpf_sock\fP pointer on success, or \fBNULL\fP in
+case of failure.
+.UNINDENT
+.TP
+.B \fBstruct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper gets a \fBstruct bpf_tcp_sock\fP pointer from a
+\fBstruct bpf_sock\fP pointer.
+.TP
+.B Return
+A \fBstruct bpf_tcp_sock\fP pointer on success, or \fBNULL\fP in
+case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_ecn_set_ce(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Set ECN (Explicit Congestion Notification) field of IP header
+to \fBCE\fP (Congestion Encountered) if current value is \fBECT\fP
+(ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+and IPv4.
+.TP
+.B Return
+1 if the \fBCE\fP flag is set (either by the current helper call
+or because it was already present), 0 if it is not set.
+.UNINDENT
+.TP
+.B \fBstruct bpf_sock *bpf_get_listener_sock(struct bpf_sock *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return a \fBstruct bpf_sock\fP pointer in \fBTCP_LISTEN\fP state.
+\fBbpf_sk_release\fP() is unnecessary and not allowed.
+.TP
+.B Return
+A \fBstruct bpf_sock\fP pointer on success, or \fBNULL\fP in
+case of failure.
+.UNINDENT
+.TP
+.B \fBstruct bpf_sock *bpf_skc_lookup_tcp(void *\fP\fIctx\fP\fB, struct bpf_sock_tuple *\fP\fItuple\fP\fB, u32\fP \fItuple_size\fP\fB, u64\fP \fInetns\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Look for TCP socket matching \fItuple\fP, optionally in a child
+network namespace \fInetns\fP\&. The return value must be checked,
+and if non\-\fBNULL\fP, released via \fBbpf_sk_release\fP().
+.sp
+This function is identical to \fBbpf_sk_lookup_tcp\fP(), except
+that it also returns timewait or request sockets. Use
+\fBbpf_sk_fullsock\fP() or \fBbpf_tcp_sock\fP() to access the
+full structure.
+.sp
+This helper is available only if the kernel was compiled with
+\fBCONFIG_NET\fP configuration option.
+.TP
+.B Return
+Pointer to \fBstruct bpf_sock\fP, or \fBNULL\fP in case of failure.
+For sockets with reuseport option, the \fBstruct bpf_sock\fP
+result is from \fIreuse\fP\fB\->socks\fP[] using the hash of the
+tuple.
+.UNINDENT
+.TP
+.B \fBlong bpf_tcp_check_syncookie(void *\fP\fIsk\fP\fB, void *\fP\fIiph\fP\fB, u32\fP \fIiph_len\fP\fB, struct tcphdr *\fP\fIth\fP\fB, u32\fP \fIth_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Check whether \fIiph\fP and \fIth\fP contain a valid SYN cookie ACK for
+the listening socket in \fIsk\fP\&.
+.sp
+\fIiph\fP points to the start of the IPv4 or IPv6 header, while
+\fIiph_len\fP contains \fBsizeof\fP(\fBstruct iphdr\fP) or
+\fBsizeof\fP(\fBstruct ipv6hdr\fP).
+.sp
+\fIth\fP points to the start of the TCP header, while \fIth_len\fP
+contains the length of the TCP header (at least
+\fBsizeof\fP(\fBstruct tcphdr\fP)).
+.TP
+.B Return
+0 if \fIiph\fP and \fIth\fP are a valid SYN cookie ACK, or a negative
+error otherwise.
+.UNINDENT
+.TP
+.B \fBlong bpf_sysctl_get_name(struct bpf_sysctl *\fP\fIctx\fP\fB, char *\fP\fIbuf\fP\fB, size_t\fP \fIbuf_len\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get name of sysctl in /proc/sys/ and copy it into provided by
+program buffer \fIbuf\fP of size \fIbuf_len\fP\&.
+.sp
+The buffer is always NUL terminated, unless it\(aqs zero\-sized.
+.sp
+If \fIflags\fP is zero, full name (e.g. \(dqnet/ipv4/tcp_mem\(dq) is
+copied. Use \fBBPF_F_SYSCTL_BASE_NAME\fP flag to copy base name
+only (e.g. \(dqtcp_mem\(dq).
+.TP
+.B Return
+Number of character copied (not including the trailing NUL).
+.sp
+\fB\-E2BIG\fP if the buffer wasn\(aqt big enough (\fIbuf\fP will contain
+truncated name in this case).
+.UNINDENT
+.TP
+.B \fBlong bpf_sysctl_get_current_value(struct bpf_sysctl *\fP\fIctx\fP\fB, char *\fP\fIbuf\fP\fB, size_t\fP \fIbuf_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get current value of sysctl as it is presented in /proc/sys
+(incl. newline, etc), and copy it as a string into provided
+by program buffer \fIbuf\fP of size \fIbuf_len\fP\&.
+.sp
+The whole value is copied, no matter what file position user
+space issued e.g. sys_read at.
+.sp
+The buffer is always NUL terminated, unless it\(aqs zero\-sized.
+.TP
+.B Return
+Number of character copied (not including the trailing NUL).
+.sp
+\fB\-E2BIG\fP if the buffer wasn\(aqt big enough (\fIbuf\fP will contain
+truncated name in this case).
+.sp
+\fB\-EINVAL\fP if current value was unavailable, e.g. because
+sysctl is uninitialized and read returns \-EIO for it.
+.UNINDENT
+.TP
+.B \fBlong bpf_sysctl_get_new_value(struct bpf_sysctl *\fP\fIctx\fP\fB, char *\fP\fIbuf\fP\fB, size_t\fP \fIbuf_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get new value being written by user space to sysctl (before
+the actual write happens) and copy it as a string into
+provided by program buffer \fIbuf\fP of size \fIbuf_len\fP\&.
+.sp
+User space may write new value at file position > 0.
+.sp
+The buffer is always NUL terminated, unless it\(aqs zero\-sized.
+.TP
+.B Return
+Number of character copied (not including the trailing NUL).
+.sp
+\fB\-E2BIG\fP if the buffer wasn\(aqt big enough (\fIbuf\fP will contain
+truncated name in this case).
+.sp
+\fB\-EINVAL\fP if sysctl is being read.
+.UNINDENT
+.TP
+.B \fBlong bpf_sysctl_set_new_value(struct bpf_sysctl *\fP\fIctx\fP\fB, const char *\fP\fIbuf\fP\fB, size_t\fP \fIbuf_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Override new value being written by user space to sysctl with
+value provided by program in buffer \fIbuf\fP of size \fIbuf_len\fP\&.
+.sp
+\fIbuf\fP should contain a string in same form as provided by user
+space on sysctl write.
+.sp
+User space may write new value at file position > 0. To override
+the whole sysctl value file position should be set to zero.
+.TP
+.B Return
+0 on success.
+.sp
+\fB\-E2BIG\fP if the \fIbuf_len\fP is too big.
+.sp
+\fB\-EINVAL\fP if sysctl is being read.
+.UNINDENT
+.TP
+.B \fBlong bpf_strtol(const char *\fP\fIbuf\fP\fB, size_t\fP \fIbuf_len\fP\fB, u64\fP \fIflags\fP\fB, long *\fP\fIres\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Convert the initial part of the string from buffer \fIbuf\fP of
+size \fIbuf_len\fP to a long integer according to the given base
+and save the result in \fIres\fP\&.
+.sp
+The string may begin with an arbitrary amount of white space
+(as determined by \fBisspace\fP(3)) followed by a single
+optional \(aq\fB\-\fP\(aq sign.
+.sp
+Five least significant bits of \fIflags\fP encode base, other bits
+are currently unused.
+.sp
+Base must be either 8, 10, 16 or 0 to detect it automatically
+similar to user space \fBstrtol\fP(3).
+.TP
+.B Return
+Number of characters consumed on success. Must be positive but
+no more than \fIbuf_len\fP\&.
+.sp
+\fB\-EINVAL\fP if no valid digits were found or unsupported base
+was provided.
+.sp
+\fB\-ERANGE\fP if resulting value was out of range.
+.UNINDENT
+.TP
+.B \fBlong bpf_strtoul(const char *\fP\fIbuf\fP\fB, size_t\fP \fIbuf_len\fP\fB, u64\fP \fIflags\fP\fB, unsigned long *\fP\fIres\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Convert the initial part of the string from buffer \fIbuf\fP of
+size \fIbuf_len\fP to an unsigned long integer according to the
+given base and save the result in \fIres\fP\&.
+.sp
+The string may begin with an arbitrary amount of white space
+(as determined by \fBisspace\fP(3)).
+.sp
+Five least significant bits of \fIflags\fP encode base, other bits
+are currently unused.
+.sp
+Base must be either 8, 10, 16 or 0 to detect it automatically
+similar to user space \fBstrtoul\fP(3).
+.TP
+.B Return
+Number of characters consumed on success. Must be positive but
+no more than \fIbuf_len\fP\&.
+.sp
+\fB\-EINVAL\fP if no valid digits were found or unsupported base
+was provided.
+.sp
+\fB\-ERANGE\fP if resulting value was out of range.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_sk_storage_get(struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIsk\fP\fB, void *\fP\fIvalue\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get a bpf\-local\-storage from a \fIsk\fP\&.
+.sp
+Logically, it could be thought of getting the value from
+a \fImap\fP with \fIsk\fP as the \fBkey\fP\&. From this
+perspective, the usage is not much different from
+\fBbpf_map_lookup_elem\fP(\fImap\fP, \fB&\fP\fIsk\fP) except this
+helper enforces the key must be a full socket and the map must
+be a \fBBPF_MAP_TYPE_SK_STORAGE\fP also.
+.sp
+Underneath, the value is stored locally at \fIsk\fP instead of
+the \fImap\fP\&. The \fImap\fP is used as the bpf\-local\-storage
+\(dqtype\(dq. The bpf\-local\-storage \(dqtype\(dq (i.e. the \fImap\fP) is
+searched against all bpf\-local\-storages residing at \fIsk\fP\&.
+.sp
+\fIsk\fP is a kernel \fBstruct sock\fP pointer for LSM program.
+\fIsk\fP is a \fBstruct bpf_sock\fP pointer for other program types.
+.sp
+An optional \fIflags\fP (\fBBPF_SK_STORAGE_GET_F_CREATE\fP) can be
+used such that a new bpf\-local\-storage will be
+created if one does not exist. \fIvalue\fP can be used
+together with \fBBPF_SK_STORAGE_GET_F_CREATE\fP to specify
+the initial value of a bpf\-local\-storage. If \fIvalue\fP is
+\fBNULL\fP, the new bpf\-local\-storage will be zero initialized.
+.TP
+.B Return
+A bpf\-local\-storage pointer is returned on success.
+.sp
+\fBNULL\fP if not found or there was an error in adding
+a new bpf\-local\-storage.
+.UNINDENT
+.TP
+.B \fBlong bpf_sk_storage_delete(struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Delete a bpf\-local\-storage from a \fIsk\fP\&.
+.TP
+.B Return
+0 on success.
+.sp
+\fB\-ENOENT\fP if the bpf\-local\-storage cannot be found.
+\fB\-EINVAL\fP if sk is not a fullsock (e.g. a request_sock).
+.UNINDENT
+.TP
+.B \fBlong bpf_send_signal(u32\fP \fIsig\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Send signal \fIsig\fP to the process of the current task.
+The signal may be delivered to any of this process\(aqs threads.
+.TP
+.B Return
+0 on success or successfully queued.
+.sp
+\fB\-EBUSY\fP if work queue under nmi is full.
+.sp
+\fB\-EINVAL\fP if \fIsig\fP is invalid.
+.sp
+\fB\-EPERM\fP if no permission to send the \fIsig\fP\&.
+.sp
+\fB\-EAGAIN\fP if bpf program can try again.
+.UNINDENT
+.TP
+.B \fBs64 bpf_tcp_gen_syncookie(void *\fP\fIsk\fP\fB, void *\fP\fIiph\fP\fB, u32\fP \fIiph_len\fP\fB, struct tcphdr *\fP\fIth\fP\fB, u32\fP \fIth_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Try to issue a SYN cookie for the packet with corresponding
+IP/TCP headers, \fIiph\fP and \fIth\fP, on the listening socket in \fIsk\fP\&.
+.sp
+\fIiph\fP points to the start of the IPv4 or IPv6 header, while
+\fIiph_len\fP contains \fBsizeof\fP(\fBstruct iphdr\fP) or
+\fBsizeof\fP(\fBstruct ipv6hdr\fP).
+.sp
+\fIth\fP points to the start of the TCP header, while \fIth_len\fP
+contains the length of the TCP header with options (at least
+\fBsizeof\fP(\fBstruct tcphdr\fP)).
+.TP
+.B Return
+On success, lower 32 bits hold the generated SYN cookie in
+followed by 16 bits which hold the MSS value for that cookie,
+and the top 16 bits are unused.
+.sp
+On failure, the returned value is one of the following:
+.sp
+\fB\-EINVAL\fP SYN cookie cannot be issued due to error
+.sp
+\fB\-ENOENT\fP SYN cookie should not be issued (no SYN flood)
+.sp
+\fB\-EOPNOTSUPP\fP kernel configuration does not enable SYN cookies
+.sp
+\fB\-EPROTONOSUPPORT\fP IP packet version is not 4 or 6
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_output(void *\fP\fIctx\fP\fB, struct bpf_map *\fP\fImap\fP\fB, u64\fP \fIflags\fP\fB, void *\fP\fIdata\fP\fB, u64\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Write raw \fIdata\fP blob into a special BPF perf event held by
+\fImap\fP of type \fBBPF_MAP_TYPE_PERF_EVENT_ARRAY\fP\&. This perf
+event must have the following attributes: \fBPERF_SAMPLE_RAW\fP
+as \fBsample_type\fP, \fBPERF_TYPE_SOFTWARE\fP as \fBtype\fP, and
+\fBPERF_COUNT_SW_BPF_OUTPUT\fP as \fBconfig\fP\&.
+.sp
+The \fIflags\fP are used to indicate the index in \fImap\fP for which
+the value must be put, masked with \fBBPF_F_INDEX_MASK\fP\&.
+Alternatively, \fIflags\fP can be set to \fBBPF_F_CURRENT_CPU\fP
+to indicate that the index of the current CPU core should be
+used.
+.sp
+The value to write, of \fIsize\fP, is passed through eBPF stack and
+pointed by \fIdata\fP\&.
+.sp
+\fIctx\fP is a pointer to in\-kernel struct sk_buff.
+.sp
+This helper is similar to \fBbpf_perf_event_output\fP() but
+restricted to raw_tracepoint bpf programs.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_probe_read_user(void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB, const void *\fP\fIunsafe_ptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Safely attempt to read \fIsize\fP bytes from user space address
+\fIunsafe_ptr\fP and store the data in \fIdst\fP\&.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_probe_read_kernel(void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB, const void *\fP\fIunsafe_ptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Safely attempt to read \fIsize\fP bytes from kernel space address
+\fIunsafe_ptr\fP and store the data in \fIdst\fP\&.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_probe_read_user_str(void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB, const void *\fP\fIunsafe_ptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Copy a NUL terminated string from an unsafe user address
+\fIunsafe_ptr\fP to \fIdst\fP\&. The \fIsize\fP should include the
+terminating NUL byte. In case the string length is smaller than
+\fIsize\fP, the target is not padded with further NUL bytes. If the
+string length is larger than \fIsize\fP, just \fIsize\fP\-1 bytes are
+copied and the last byte is set to NUL.
+.sp
+On success, returns the number of bytes that were written,
+including the terminal NUL. This makes this helper useful in
+tracing programs for reading strings, and more importantly to
+get its length at runtime. See the following snippet:
+.INDENT 7.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+SEC(\(dqkprobe/sys_open\(dq)
+void bpf_sys_open(struct pt_regs *ctx)
+{
+ char buf[PATHLEN]; // PATHLEN is defined to 256
+ int res;
+
+ res = bpf_probe_read_user_str(buf, sizeof(buf),
+ ctx\->di);
+
+ // Consume buf, for example push it to
+ // userspace via bpf_perf_event_output(); we
+ // can use res (the string length) as event
+ // size, after checking its boundaries.
+}
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+In comparison, using \fBbpf_probe_read_user\fP() helper here
+instead to read the string would require to estimate the length
+at compile time, and would often result in copying more memory
+than necessary.
+.sp
+Another useful use case is when parsing individual process
+arguments or individual environment variables navigating
+\fIcurrent\fP\fB\->mm\->arg_start\fP and \fIcurrent\fP\fB\->mm\->env_start\fP: using this helper and the return value,
+one can quickly iterate at the right offset of the memory area.
+.TP
+.B Return
+On success, the strictly positive length of the output string,
+including the trailing NUL character. On error, a negative
+value.
+.UNINDENT
+.TP
+.B \fBlong bpf_probe_read_kernel_str(void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB, const void *\fP\fIunsafe_ptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Copy a NUL terminated string from an unsafe kernel address \fIunsafe_ptr\fP
+to \fIdst\fP\&. Same semantics as with \fBbpf_probe_read_user_str\fP() apply.
+.TP
+.B Return
+On success, the strictly positive length of the string, including
+the trailing NUL character. On error, a negative value.
+.UNINDENT
+.TP
+.B \fBlong bpf_tcp_send_ack(void *\fP\fItp\fP\fB, u32\fP \fIrcv_nxt\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Send out a tcp\-ack. \fItp\fP is the in\-kernel struct \fBtcp_sock\fP\&.
+\fIrcv_nxt\fP is the ack_seq to be sent out.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_send_signal_thread(u32\fP \fIsig\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Send signal \fIsig\fP to the thread corresponding to the current task.
+.TP
+.B Return
+0 on success or successfully queued.
+.sp
+\fB\-EBUSY\fP if work queue under nmi is full.
+.sp
+\fB\-EINVAL\fP if \fIsig\fP is invalid.
+.sp
+\fB\-EPERM\fP if no permission to send the \fIsig\fP\&.
+.sp
+\fB\-EAGAIN\fP if bpf program can try again.
+.UNINDENT
+.TP
+.B \fBu64 bpf_jiffies64(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Obtain the 64bit jiffies
+.TP
+.B Return
+The 64 bit jiffies
+.UNINDENT
+.TP
+.B \fBlong bpf_read_branch_records(struct bpf_perf_event_data *\fP\fIctx\fP\fB, void *\fP\fIbuf\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For an eBPF program attached to a perf event, retrieve the
+branch records (\fBstruct perf_branch_entry\fP) associated to \fIctx\fP
+and store it in the buffer pointed by \fIbuf\fP up to size
+\fIsize\fP bytes.
+.TP
+.B Return
+On success, number of bytes written to \fIbuf\fP\&. On error, a
+negative value.
+.sp
+The \fIflags\fP can be set to \fBBPF_F_GET_BRANCH_RECORDS_SIZE\fP to
+instead return the number of bytes required to store all the
+branch entries. If this flag is set, \fIbuf\fP may be NULL.
+.sp
+\fB\-EINVAL\fP if arguments invalid or \fBsize\fP not a multiple
+of \fBsizeof\fP(\fBstruct perf_branch_entry\fP).
+.sp
+\fB\-ENOENT\fP if architecture does not support branch records.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_ns_current_pid_tgid(u64\fP \fIdev\fP\fB, u64\fP \fIino\fP\fB, struct bpf_pidns_info *\fP\fInsdata\fP\fB, u32\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Returns 0 on success, values for \fIpid\fP and \fItgid\fP as seen from the current
+\fInamespace\fP will be returned in \fInsdata\fP\&.
+.TP
+.B Return
+0 on success, or one of the following in case of failure:
+.sp
+\fB\-EINVAL\fP if dev and inum supplied don\(aqt match dev_t and inode number
+with nsfs of current task, or if dev conversion to dev_t lost high bits.
+.sp
+\fB\-ENOENT\fP if pidns does not exists for the current task.
+.UNINDENT
+.TP
+.B \fBlong bpf_xdp_output(void *\fP\fIctx\fP\fB, struct bpf_map *\fP\fImap\fP\fB, u64\fP \fIflags\fP\fB, void *\fP\fIdata\fP\fB, u64\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Write raw \fIdata\fP blob into a special BPF perf event held by
+\fImap\fP of type \fBBPF_MAP_TYPE_PERF_EVENT_ARRAY\fP\&. This perf
+event must have the following attributes: \fBPERF_SAMPLE_RAW\fP
+as \fBsample_type\fP, \fBPERF_TYPE_SOFTWARE\fP as \fBtype\fP, and
+\fBPERF_COUNT_SW_BPF_OUTPUT\fP as \fBconfig\fP\&.
+.sp
+The \fIflags\fP are used to indicate the index in \fImap\fP for which
+the value must be put, masked with \fBBPF_F_INDEX_MASK\fP\&.
+Alternatively, \fIflags\fP can be set to \fBBPF_F_CURRENT_CPU\fP
+to indicate that the index of the current CPU core should be
+used.
+.sp
+The value to write, of \fIsize\fP, is passed through eBPF stack and
+pointed by \fIdata\fP\&.
+.sp
+\fIctx\fP is a pointer to in\-kernel struct xdp_buff.
+.sp
+This helper is similar to \fBbpf_perf_eventoutput\fP() but
+restricted to raw_tracepoint bpf programs.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_netns_cookie(void *\fP\fIctx\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Retrieve the cookie (generated by the kernel) of the network
+namespace the input \fIctx\fP is associated with. The network
+namespace cookie remains stable for its lifetime and provides
+a global identifier that can be assumed unique. If \fIctx\fP is
+NULL, then the helper returns the cookie for the initial
+network namespace. The cookie itself is very similar to that
+of \fBbpf_get_socket_cookie\fP() helper, but for network
+namespaces instead of sockets.
+.TP
+.B Return
+A 8\-byte long opaque number.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_current_ancestor_cgroup_id(int\fP \fIancestor_level\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return id of cgroup v2 that is ancestor of the cgroup associated
+with the current task at the \fIancestor_level\fP\&. The root cgroup
+is at \fIancestor_level\fP zero and each step down the hierarchy
+increments the level. If \fIancestor_level\fP == level of cgroup
+associated with the current task, then return value will be the
+same as that of \fBbpf_get_current_cgroup_id\fP().
+.sp
+The helper is useful to implement policies based on cgroups
+that are upper in hierarchy than immediate cgroup associated
+with the current task.
+.sp
+The format of returned id and helper limitations are same as in
+\fBbpf_get_current_cgroup_id\fP().
+.TP
+.B Return
+The id is returned or 0 in case the id could not be retrieved.
+.UNINDENT
+.TP
+.B \fBlong bpf_sk_assign(struct sk_buff *\fP\fIskb\fP\fB, void *\fP\fIsk\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Helper is overloaded depending on BPF program type. This
+description applies to \fBBPF_PROG_TYPE_SCHED_CLS\fP and
+\fBBPF_PROG_TYPE_SCHED_ACT\fP programs.
+.sp
+Assign the \fIsk\fP to the \fIskb\fP\&. When combined with appropriate
+routing configuration to receive the packet towards the socket,
+will cause \fIskb\fP to be delivered to the specified socket.
+Subsequent redirection of \fIskb\fP via \fBbpf_redirect\fP(),
+\fBbpf_clone_redirect\fP() or other methods outside of BPF may
+interfere with successful delivery to the socket.
+.sp
+This operation is only valid from TC ingress path.
+.sp
+The \fIflags\fP argument must be zero.
+.TP
+.B Return
+0 on success, or a negative error in case of failure:
+.sp
+\fB\-EINVAL\fP if specified \fIflags\fP are not supported.
+.sp
+\fB\-ENOENT\fP if the socket is unavailable for assignment.
+.sp
+\fB\-ENETUNREACH\fP if the socket is unreachable (wrong netns).
+.sp
+\fB\-EOPNOTSUPP\fP if the operation is not supported, for example
+a call from outside of TC ingress.
+.sp
+\fB\-ESOCKTNOSUPPORT\fP if the socket type is not supported
+(reuseport).
+.UNINDENT
+.TP
+.B \fBlong bpf_sk_assign(struct bpf_sk_lookup *\fP\fIctx\fP\fB, struct bpf_sock *\fP\fIsk\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Helper is overloaded depending on BPF program type. This
+description applies to \fBBPF_PROG_TYPE_SK_LOOKUP\fP programs.
+.sp
+Select the \fIsk\fP as a result of a socket lookup.
+.sp
+For the operation to succeed passed socket must be compatible
+with the packet description provided by the \fIctx\fP object.
+.sp
+L4 protocol (\fBIPPROTO_TCP\fP or \fBIPPROTO_UDP\fP) must
+be an exact match. While IP family (\fBAF_INET\fP or
+\fBAF_INET6\fP) must be compatible, that is IPv6 sockets
+that are not v6\-only can be selected for IPv4 packets.
+.sp
+Only TCP listeners and UDP unconnected sockets can be
+selected. \fIsk\fP can also be NULL to reset any previous
+selection.
+.sp
+\fIflags\fP argument can combination of following values:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_SK_LOOKUP_F_REPLACE\fP to override the previous
+socket selection, potentially done by a BPF program
+that ran before us.
+.IP \(bu 2
+\fBBPF_SK_LOOKUP_F_NO_REUSEPORT\fP to skip
+load\-balancing within reuseport group for the socket
+being selected.
+.UNINDENT
+.sp
+On success \fIctx\->sk\fP will point to the selected socket.
+.TP
+.B Return
+0 on success, or a negative errno in case of failure.
+.INDENT 7.0
+.IP \(bu 2
+\fB\-EAFNOSUPPORT\fP if socket family (\fIsk\->family\fP) is
+not compatible with packet family (\fIctx\->family\fP).
+.IP \(bu 2
+\fB\-EEXIST\fP if socket has been already selected,
+potentially by another program, and
+\fBBPF_SK_LOOKUP_F_REPLACE\fP flag was not specified.
+.IP \(bu 2
+\fB\-EINVAL\fP if unsupported flags were specified.
+.IP \(bu 2
+\fB\-EPROTOTYPE\fP if socket L4 protocol
+(\fIsk\->protocol\fP) doesn\(aqt match packet protocol
+(\fIctx\->protocol\fP).
+.IP \(bu 2
+\fB\-ESOCKTNOSUPPORT\fP if socket is not in allowed
+state (TCP listening or UDP unconnected).
+.UNINDENT
+.UNINDENT
+.TP
+.B \fBu64 bpf_ktime_get_boot_ns(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return the time elapsed since system boot, in nanoseconds.
+Does include the time the system was suspended.
+See: \fBclock_gettime\fP(\fBCLOCK_BOOTTIME\fP)
+.TP
+.B Return
+Current \fIktime\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_seq_printf(struct seq_file *\fP\fIm\fP\fB, const char *\fP\fIfmt\fP\fB, u32\fP \fIfmt_size\fP\fB, const void *\fP\fIdata\fP\fB, u32\fP \fIdata_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+\fBbpf_seq_printf\fP() uses seq_file \fBseq_printf\fP() to print
+out the format string.
+The \fIm\fP represents the seq_file. The \fIfmt\fP and \fIfmt_size\fP are for
+the format string itself. The \fIdata\fP and \fIdata_len\fP are format string
+arguments. The \fIdata\fP are a \fBu64\fP array and corresponding format string
+values are stored in the array. For strings and pointers where pointees
+are accessed, only the pointer values are stored in the \fIdata\fP array.
+The \fIdata_len\fP is the size of \fIdata\fP in bytes \- must be a multiple of 8.
+.sp
+Formats \fB%s\fP, \fB%p{i,I}{4,6}\fP requires to read kernel memory.
+Reading kernel memory may fail due to either invalid address or
+valid address but requiring a major memory fault. If reading kernel memory
+fails, the string for \fB%s\fP will be an empty string, and the ip
+address for \fB%p{i,I}{4,6}\fP will be 0. Not returning error to
+bpf program is consistent with what \fBbpf_trace_printk\fP() does for now.
+.TP
+.B Return
+0 on success, or a negative error in case of failure:
+.sp
+\fB\-EBUSY\fP if per\-CPU memory copy buffer is busy, can try again
+by returning 1 from bpf program.
+.sp
+\fB\-EINVAL\fP if arguments are invalid, or if \fIfmt\fP is invalid/unsupported.
+.sp
+\fB\-E2BIG\fP if \fIfmt\fP contains too many format specifiers.
+.sp
+\fB\-EOVERFLOW\fP if an overflow happened: The same object will be tried again.
+.UNINDENT
+.TP
+.B \fBlong bpf_seq_write(struct seq_file *\fP\fIm\fP\fB, const void *\fP\fIdata\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+\fBbpf_seq_write\fP() uses seq_file \fBseq_write\fP() to write the data.
+The \fIm\fP represents the seq_file. The \fIdata\fP and \fIlen\fP represent the
+data to write in bytes.
+.TP
+.B Return
+0 on success, or a negative error in case of failure:
+.sp
+\fB\-EOVERFLOW\fP if an overflow happened: The same object will be tried again.
+.UNINDENT
+.TP
+.B \fBu64 bpf_sk_cgroup_id(void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return the cgroup v2 id of the socket \fIsk\fP\&.
+.sp
+\fIsk\fP must be a non\-\fBNULL\fP pointer to a socket, e.g. one
+returned from \fBbpf_sk_lookup_xxx\fP(),
+\fBbpf_sk_fullsock\fP(), etc. The format of returned id is
+same as in \fBbpf_skb_cgroup_id\fP().
+.sp
+This helper is available only if the kernel was compiled with
+the \fBCONFIG_SOCK_CGROUP_DATA\fP configuration option.
+.TP
+.B Return
+The id is returned or 0 in case the id could not be retrieved.
+.UNINDENT
+.TP
+.B \fBu64 bpf_sk_ancestor_cgroup_id(void *\fP\fIsk\fP\fB, int\fP \fIancestor_level\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return id of cgroup v2 that is ancestor of cgroup associated
+with the \fIsk\fP at the \fIancestor_level\fP\&. The root cgroup is at
+\fIancestor_level\fP zero and each step down the hierarchy
+increments the level. If \fIancestor_level\fP == level of cgroup
+associated with \fIsk\fP, then return value will be same as that
+of \fBbpf_sk_cgroup_id\fP().
+.sp
+The helper is useful to implement policies based on cgroups
+that are upper in hierarchy than immediate cgroup associated
+with \fIsk\fP\&.
+.sp
+The format of returned id and helper limitations are same as in
+\fBbpf_sk_cgroup_id\fP().
+.TP
+.B Return
+The id is returned or 0 in case the id could not be retrieved.
+.UNINDENT
+.TP
+.B \fBlong bpf_ringbuf_output(void *\fP\fIringbuf\fP\fB, void *\fP\fIdata\fP\fB, u64\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Copy \fIsize\fP bytes from \fIdata\fP into a ring buffer \fIringbuf\fP\&.
+If \fBBPF_RB_NO_WAKEUP\fP is specified in \fIflags\fP, no notification
+of new data availability is sent.
+If \fBBPF_RB_FORCE_WAKEUP\fP is specified in \fIflags\fP, notification
+of new data availability is sent unconditionally.
+If \fB0\fP is specified in \fIflags\fP, an adaptive notification
+of new data availability is sent.
+.sp
+An adaptive notification is a notification sent whenever the user\-space
+process has caught up and consumed all available payloads. In case the user\-space
+process is still processing a previous payload, then no notification is needed
+as it will process the newly added payload automatically.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_ringbuf_reserve(void *\fP\fIringbuf\fP\fB, u64\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Reserve \fIsize\fP bytes of payload in a ring buffer \fIringbuf\fP\&.
+\fIflags\fP must be 0.
+.TP
+.B Return
+Valid pointer with \fIsize\fP bytes of memory available; NULL,
+otherwise.
+.UNINDENT
+.TP
+.B \fBvoid bpf_ringbuf_submit(void *\fP\fIdata\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Submit reserved ring buffer sample, pointed to by \fIdata\fP\&.
+If \fBBPF_RB_NO_WAKEUP\fP is specified in \fIflags\fP, no notification
+of new data availability is sent.
+If \fBBPF_RB_FORCE_WAKEUP\fP is specified in \fIflags\fP, notification
+of new data availability is sent unconditionally.
+If \fB0\fP is specified in \fIflags\fP, an adaptive notification
+of new data availability is sent.
+.sp
+See \(aqbpf_ringbuf_output()\(aq for the definition of adaptive notification.
+.TP
+.B Return
+Nothing. Always succeeds.
+.UNINDENT
+.TP
+.B \fBvoid bpf_ringbuf_discard(void *\fP\fIdata\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Discard reserved ring buffer sample, pointed to by \fIdata\fP\&.
+If \fBBPF_RB_NO_WAKEUP\fP is specified in \fIflags\fP, no notification
+of new data availability is sent.
+If \fBBPF_RB_FORCE_WAKEUP\fP is specified in \fIflags\fP, notification
+of new data availability is sent unconditionally.
+If \fB0\fP is specified in \fIflags\fP, an adaptive notification
+of new data availability is sent.
+.sp
+See \(aqbpf_ringbuf_output()\(aq for the definition of adaptive notification.
+.TP
+.B Return
+Nothing. Always succeeds.
+.UNINDENT
+.TP
+.B \fBu64 bpf_ringbuf_query(void *\fP\fIringbuf\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Query various characteristics of provided ring buffer. What
+exactly is queries is determined by \fIflags\fP:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_RB_AVAIL_DATA\fP: Amount of data not yet consumed.
+.IP \(bu 2
+\fBBPF_RB_RING_SIZE\fP: The size of ring buffer.
+.IP \(bu 2
+\fBBPF_RB_CONS_POS\fP: Consumer position (can wrap around).
+.IP \(bu 2
+\fBBPF_RB_PROD_POS\fP: Producer(s) position (can wrap around).
+.UNINDENT
+.sp
+Data returned is just a momentary snapshot of actual values
+and could be inaccurate, so this facility should be used to
+power heuristics and for reporting, not to make 100% correct
+calculation.
+.TP
+.B Return
+Requested value, or 0, if \fIflags\fP are not recognized.
+.UNINDENT
+.TP
+.B \fBlong bpf_csum_level(struct sk_buff *\fP\fIskb\fP\fB, u64\fP \fIlevel\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Change the skbs checksum level by one layer up or down, or
+reset it entirely to none in order to have the stack perform
+checksum validation. The level is applicable to the following
+protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+| ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+through \fBbpf_skb_adjust_room\fP() helper with passing in
+\fBBPF_F_ADJ_ROOM_NO_CSUM_RESET\fP flag would require one call
+to \fBbpf_csum_level\fP() with \fBBPF_CSUM_LEVEL_DEC\fP since
+the UDP header is removed. Similarly, an encap of the latter
+into the former could be accompanied by a helper call to
+\fBbpf_csum_level\fP() with \fBBPF_CSUM_LEVEL_INC\fP if the
+skb is still intended to be processed in higher layers of the
+stack instead of just egressing at tc.
+.sp
+There are three supported level settings at this time:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_CSUM_LEVEL_INC\fP: Increases skb\->csum_level for skbs
+with CHECKSUM_UNNECESSARY.
+.IP \(bu 2
+\fBBPF_CSUM_LEVEL_DEC\fP: Decreases skb\->csum_level for skbs
+with CHECKSUM_UNNECESSARY.
+.IP \(bu 2
+\fBBPF_CSUM_LEVEL_RESET\fP: Resets skb\->csum_level to 0 and
+sets CHECKSUM_NONE to force checksum validation by the stack.
+.IP \(bu 2
+\fBBPF_CSUM_LEVEL_QUERY\fP: No\-op, returns the current
+skb\->csum_level.
+.UNINDENT
+.TP
+.B Return
+0 on success, or a negative error in case of failure. In the
+case of \fBBPF_CSUM_LEVEL_QUERY\fP, the current skb\->csum_level
+is returned or the error code \-EACCES in case the skb is not
+subject to CHECKSUM_UNNECESSARY.
+.UNINDENT
+.TP
+.B \fBstruct tcp6_sock *bpf_skc_to_tcp6_sock(void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Dynamically cast a \fIsk\fP pointer to a \fItcp6_sock\fP pointer.
+.TP
+.B Return
+\fIsk\fP if casting is valid, or \fBNULL\fP otherwise.
+.UNINDENT
+.TP
+.B \fBstruct tcp_sock *bpf_skc_to_tcp_sock(void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Dynamically cast a \fIsk\fP pointer to a \fItcp_sock\fP pointer.
+.TP
+.B Return
+\fIsk\fP if casting is valid, or \fBNULL\fP otherwise.
+.UNINDENT
+.TP
+.B \fBstruct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Dynamically cast a \fIsk\fP pointer to a \fItcp_timewait_sock\fP pointer.
+.TP
+.B Return
+\fIsk\fP if casting is valid, or \fBNULL\fP otherwise.
+.UNINDENT
+.TP
+.B \fBstruct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Dynamically cast a \fIsk\fP pointer to a \fItcp_request_sock\fP pointer.
+.TP
+.B Return
+\fIsk\fP if casting is valid, or \fBNULL\fP otherwise.
+.UNINDENT
+.TP
+.B \fBstruct udp6_sock *bpf_skc_to_udp6_sock(void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Dynamically cast a \fIsk\fP pointer to a \fIudp6_sock\fP pointer.
+.TP
+.B Return
+\fIsk\fP if casting is valid, or \fBNULL\fP otherwise.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_task_stack(struct task_struct *\fP\fItask\fP\fB, void *\fP\fIbuf\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return a user or a kernel stack in bpf program provided buffer.
+To achieve this, the helper needs \fItask\fP, which is a valid
+pointer to \fBstruct task_struct\fP\&. To store the stacktrace, the
+bpf program provides \fIbuf\fP with a nonnegative \fIsize\fP\&.
+.sp
+The last argument, \fIflags\fP, holds the number of stack frames to
+skip (from 0 to 255), masked with
+\fBBPF_F_SKIP_FIELD_MASK\fP\&. The next bits can be used to set
+the following flags:
+.INDENT 7.0
+.TP
+.B \fBBPF_F_USER_STACK\fP
+Collect a user space stack instead of a kernel stack.
+.TP
+.B \fBBPF_F_USER_BUILD_ID\fP
+Collect buildid+offset instead of ips for user stack,
+only valid if \fBBPF_F_USER_STACK\fP is also specified.
+.UNINDENT
+.sp
+\fBbpf_get_task_stack\fP() can collect up to
+\fBPERF_MAX_STACK_DEPTH\fP both kernel and user frames, subject
+to sufficient large buffer size. Note that
+this limit can be controlled with the \fBsysctl\fP program, and
+that it should be manually increased in order to profile long
+user stacks (such as stacks for Java programs). To do so, use:
+.INDENT 7.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+# sysctl kernel.perf_event_max_stack=<new value>
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.TP
+.B Return
+The non\-negative copied \fIbuf\fP length equal to or less than
+\fIsize\fP on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_load_hdr_opt(struct bpf_sock_ops *\fP\fIskops\fP\fB, void *\fP\fIsearchby_res\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Load header option. Support reading a particular TCP header
+option for bpf program (\fBBPF_PROG_TYPE_SOCK_OPS\fP).
+.sp
+If \fIflags\fP is 0, it will search the option from the
+\fIskops\fP\fB\->skb_data\fP\&. The comment in \fBstruct bpf_sock_ops\fP
+has details on what skb_data contains under different
+\fIskops\fP\fB\->op\fP\&.
+.sp
+The first byte of the \fIsearchby_res\fP specifies the
+kind that it wants to search.
+.sp
+If the searching kind is an experimental kind
+(i.e. 253 or 254 according to RFC6994). It also
+needs to specify the \(dqmagic\(dq which is either
+2 bytes or 4 bytes. It then also needs to
+specify the size of the magic by using
+the 2nd byte which is \(dqkind\-length\(dq of a TCP
+header option and the \(dqkind\-length\(dq also
+includes the first 2 bytes \(dqkind\(dq and \(dqkind\-length\(dq
+itself as a normal TCP header option also does.
+.sp
+For example, to search experimental kind 254 with
+2 byte magic 0xeB9F, the searchby_res should be
+[ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ].
+.sp
+To search for the standard window scale option (3),
+the \fIsearchby_res\fP should be [ 3, 0, 0, .... 0 ].
+Note, kind\-length must be 0 for regular option.
+.sp
+Searching for No\-Op (0) and End\-of\-Option\-List (1) are
+not supported.
+.sp
+\fIlen\fP must be at least 2 bytes which is the minimal size
+of a header option.
+.sp
+Supported flags:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_LOAD_HDR_OPT_TCP_SYN\fP to search from the
+saved_syn packet or the just\-received syn packet.
+.UNINDENT
+.TP
+.B Return
+> 0 when found, the header option is copied to \fIsearchby_res\fP\&.
+The return value is the total length copied. On failure, a
+negative error code is returned:
+.sp
+\fB\-EINVAL\fP if a parameter is invalid.
+.sp
+\fB\-ENOMSG\fP if the option is not found.
+.sp
+\fB\-ENOENT\fP if no syn packet is available when
+\fBBPF_LOAD_HDR_OPT_TCP_SYN\fP is used.
+.sp
+\fB\-ENOSPC\fP if there is not enough space. Only \fIlen\fP number of
+bytes are copied.
+.sp
+\fB\-EFAULT\fP on failure to parse the header options in the
+packet.
+.sp
+\fB\-EPERM\fP if the helper cannot be used under the current
+\fIskops\fP\fB\->op\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_store_hdr_opt(struct bpf_sock_ops *\fP\fIskops\fP\fB, const void *\fP\fIfrom\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Store header option. The data will be copied
+from buffer \fIfrom\fP with length \fIlen\fP to the TCP header.
+.sp
+The buffer \fIfrom\fP should have the whole option that
+includes the kind, kind\-length, and the actual
+option data. The \fIlen\fP must be at least kind\-length
+long. The kind\-length does not have to be 4 byte
+aligned. The kernel will take care of the padding
+and setting the 4 bytes aligned value to th\->doff.
+.sp
+This helper will check for duplicated option
+by searching the same option in the outgoing skb.
+.sp
+This helper can only be called during
+\fBBPF_SOCK_OPS_WRITE_HDR_OPT_CB\fP\&.
+.TP
+.B Return
+0 on success, or negative error in case of failure:
+.sp
+\fB\-EINVAL\fP If param is invalid.
+.sp
+\fB\-ENOSPC\fP if there is not enough space in the header.
+Nothing has been written
+.sp
+\fB\-EEXIST\fP if the option already exists.
+.sp
+\fB\-EFAULT\fP on failure to parse the existing header options.
+.sp
+\fB\-EPERM\fP if the helper cannot be used under the current
+\fIskops\fP\fB\->op\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_reserve_hdr_opt(struct bpf_sock_ops *\fP\fIskops\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Reserve \fIlen\fP bytes for the bpf header option. The
+space will be used by \fBbpf_store_hdr_opt\fP() later in
+\fBBPF_SOCK_OPS_WRITE_HDR_OPT_CB\fP\&.
+.sp
+If \fBbpf_reserve_hdr_opt\fP() is called multiple times,
+the total number of bytes will be reserved.
+.sp
+This helper can only be called during
+\fBBPF_SOCK_OPS_HDR_OPT_LEN_CB\fP\&.
+.TP
+.B Return
+0 on success, or negative error in case of failure:
+.sp
+\fB\-EINVAL\fP if a parameter is invalid.
+.sp
+\fB\-ENOSPC\fP if there is not enough space in the header.
+.sp
+\fB\-EPERM\fP if the helper cannot be used under the current
+\fIskops\fP\fB\->op\fP\&.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_inode_storage_get(struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIinode\fP\fB, void *\fP\fIvalue\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get a bpf_local_storage from an \fIinode\fP\&.
+.sp
+Logically, it could be thought of as getting the value from
+a \fImap\fP with \fIinode\fP as the \fBkey\fP\&. From this
+perspective, the usage is not much different from
+\fBbpf_map_lookup_elem\fP(\fImap\fP, \fB&\fP\fIinode\fP) except this
+helper enforces the key must be an inode and the map must also
+be a \fBBPF_MAP_TYPE_INODE_STORAGE\fP\&.
+.sp
+Underneath, the value is stored locally at \fIinode\fP instead of
+the \fImap\fP\&. The \fImap\fP is used as the bpf\-local\-storage
+\(dqtype\(dq. The bpf\-local\-storage \(dqtype\(dq (i.e. the \fImap\fP) is
+searched against all bpf_local_storage residing at \fIinode\fP\&.
+.sp
+An optional \fIflags\fP (\fBBPF_LOCAL_STORAGE_GET_F_CREATE\fP) can be
+used such that a new bpf_local_storage will be
+created if one does not exist. \fIvalue\fP can be used
+together with \fBBPF_LOCAL_STORAGE_GET_F_CREATE\fP to specify
+the initial value of a bpf_local_storage. If \fIvalue\fP is
+\fBNULL\fP, the new bpf_local_storage will be zero initialized.
+.TP
+.B Return
+A bpf_local_storage pointer is returned on success.
+.sp
+\fBNULL\fP if not found or there was an error in adding
+a new bpf_local_storage.
+.UNINDENT
+.TP
+.B \fBint bpf_inode_storage_delete(struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIinode\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Delete a bpf_local_storage from an \fIinode\fP\&.
+.TP
+.B Return
+0 on success.
+.sp
+\fB\-ENOENT\fP if the bpf_local_storage cannot be found.
+.UNINDENT
+.TP
+.B \fBlong bpf_d_path(struct path *\fP\fIpath\fP\fB, char *\fP\fIbuf\fP\fB, u32\fP \fIsz\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return full path for given \fBstruct path\fP object, which
+needs to be the kernel BTF \fIpath\fP object. The path is
+returned in the provided buffer \fIbuf\fP of size \fIsz\fP and
+is zero terminated.
+.TP
+.B Return
+On success, the strictly positive length of the string,
+including the trailing NUL character. On error, a negative
+value.
+.UNINDENT
+.TP
+.B \fBlong bpf_copy_from_user(void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB, const void *\fP\fIuser_ptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Read \fIsize\fP bytes from user space address \fIuser_ptr\fP and store
+the data in \fIdst\fP\&. This is a wrapper of \fBcopy_from_user\fP().
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_snprintf_btf(char *\fP\fIstr\fP\fB, u32\fP \fIstr_size\fP\fB, struct btf_ptr *\fP\fIptr\fP\fB, u32\fP \fIbtf_ptr_size\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Use BTF to store a string representation of \fIptr\fP\->ptr in \fIstr\fP,
+using \fIptr\fP\->type_id. This value should specify the type
+that \fIptr\fP\->ptr points to. LLVM __builtin_btf_type_id(type, 1)
+can be used to look up vmlinux BTF type ids. Traversing the
+data structure using BTF, the type information and values are
+stored in the first \fIstr_size\fP \- 1 bytes of \fIstr\fP\&. Safe copy of
+the pointer data is carried out to avoid kernel crashes during
+operation. Smaller types can use string space on the stack;
+larger programs can use map data to store the string
+representation.
+.sp
+The string can be subsequently shared with userspace via
+bpf_perf_event_output() or ring buffer interfaces.
+bpf_trace_printk() is to be avoided as it places too small
+a limit on string size to be useful.
+.sp
+\fIflags\fP is a combination of
+.INDENT 7.0
+.TP
+.B \fBBTF_F_COMPACT\fP
+no formatting around type information
+.TP
+.B \fBBTF_F_NONAME\fP
+no struct/union member names/types
+.TP
+.B \fBBTF_F_PTR_RAW\fP
+show raw (unobfuscated) pointer values;
+equivalent to printk specifier %px.
+.TP
+.B \fBBTF_F_ZERO\fP
+show zero\-valued struct/union members; they
+are not displayed by default
+.UNINDENT
+.TP
+.B Return
+The number of bytes that were written (or would have been
+written if output had to be truncated due to string size),
+or a negative error in cases of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_seq_printf_btf(struct seq_file *\fP\fIm\fP\fB, struct btf_ptr *\fP\fIptr\fP\fB, u32\fP \fIptr_size\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Use BTF to write to seq_write a string representation of
+\fIptr\fP\->ptr, using \fIptr\fP\->type_id as per bpf_snprintf_btf().
+\fIflags\fP are identical to those used for bpf_snprintf_btf.
+.TP
+.B Return
+0 on success or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBu64 bpf_skb_cgroup_classid(struct sk_buff *\fP\fIskb\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+See \fBbpf_get_cgroup_classid\fP() for the main description.
+This helper differs from \fBbpf_get_cgroup_classid\fP() in that
+the cgroup v1 net_cls class is retrieved only from the \fIskb\fP\(aqs
+associated socket instead of the current process.
+.TP
+.B Return
+The id is returned or 0 in case the id could not be retrieved.
+.UNINDENT
+.TP
+.B \fBlong bpf_redirect_neigh(u32\fP \fIifindex\fP\fB, struct bpf_redir_neigh *\fP\fIparams\fP\fB, int\fP \fIplen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Redirect the packet to another net device of index \fIifindex\fP
+and fill in L2 addresses from neighboring subsystem. This helper
+is somewhat similar to \fBbpf_redirect\fP(), except that it
+populates L2 addresses as well, meaning, internally, the helper
+relies on the neighbor lookup for the L2 address of the nexthop.
+.sp
+The helper will perform a FIB lookup based on the skb\(aqs
+networking header to get the address of the next hop, unless
+this is supplied by the caller in the \fIparams\fP argument. The
+\fIplen\fP argument indicates the len of \fIparams\fP and should be set
+to 0 if \fIparams\fP is NULL.
+.sp
+The \fIflags\fP argument is reserved and must be 0. The helper is
+currently only supported for tc BPF program types, and enabled
+for IPv4 and IPv6 protocols.
+.TP
+.B Return
+The helper returns \fBTC_ACT_REDIRECT\fP on success or
+\fBTC_ACT_SHOT\fP on error.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_per_cpu_ptr(const void *\fP\fIpercpu_ptr\fP\fB, u32\fP \fIcpu\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Take a pointer to a percpu ksym, \fIpercpu_ptr\fP, and return a
+pointer to the percpu kernel variable on \fIcpu\fP\&. A ksym is an
+extern variable decorated with \(aq__ksym\(aq. For ksym, there is a
+global var (either static or global) defined of the same name
+in the kernel. The ksym is percpu if the global var is percpu.
+The returned pointer points to the global percpu var on \fIcpu\fP\&.
+.sp
+bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the
+kernel, except that bpf_per_cpu_ptr() may return NULL. This
+happens if \fIcpu\fP is larger than nr_cpu_ids. The caller of
+bpf_per_cpu_ptr() must check the returned value.
+.TP
+.B Return
+A pointer pointing to the kernel percpu variable on \fIcpu\fP, or
+NULL, if \fIcpu\fP is invalid.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_this_cpu_ptr(const void *\fP\fIpercpu_ptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Take a pointer to a percpu ksym, \fIpercpu_ptr\fP, and return a
+pointer to the percpu kernel variable on this cpu. See the
+description of \(aqksym\(aq in \fBbpf_per_cpu_ptr\fP().
+.sp
+bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in
+the kernel. Different from \fBbpf_per_cpu_ptr\fP(), it would
+never return NULL.
+.TP
+.B Return
+A pointer pointing to the kernel percpu variable on this cpu.
+.UNINDENT
+.TP
+.B \fBlong bpf_redirect_peer(u32\fP \fIifindex\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Redirect the packet to another net device of index \fIifindex\fP\&.
+This helper is somewhat similar to \fBbpf_redirect\fP(), except
+that the redirection happens to the \fIifindex\fP\(aq peer device and
+the netns switch takes place from ingress to ingress without
+going through the CPU\(aqs backlog queue.
+.sp
+The \fIflags\fP argument is reserved and must be 0. The helper is
+currently only supported for tc BPF program types at the ingress
+hook and for veth device types. The peer device must reside in a
+different network namespace.
+.TP
+.B Return
+The helper returns \fBTC_ACT_REDIRECT\fP on success or
+\fBTC_ACT_SHOT\fP on error.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_task_storage_get(struct bpf_map *\fP\fImap\fP\fB, struct task_struct *\fP\fItask\fP\fB, void *\fP\fIvalue\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get a bpf_local_storage from the \fItask\fP\&.
+.sp
+Logically, it could be thought of as getting the value from
+a \fImap\fP with \fItask\fP as the \fBkey\fP\&. From this
+perspective, the usage is not much different from
+\fBbpf_map_lookup_elem\fP(\fImap\fP, \fB&\fP\fItask\fP) except this
+helper enforces the key must be a task_struct and the map must also
+be a \fBBPF_MAP_TYPE_TASK_STORAGE\fP\&.
+.sp
+Underneath, the value is stored locally at \fItask\fP instead of
+the \fImap\fP\&. The \fImap\fP is used as the bpf\-local\-storage
+\(dqtype\(dq. The bpf\-local\-storage \(dqtype\(dq (i.e. the \fImap\fP) is
+searched against all bpf_local_storage residing at \fItask\fP\&.
+.sp
+An optional \fIflags\fP (\fBBPF_LOCAL_STORAGE_GET_F_CREATE\fP) can be
+used such that a new bpf_local_storage will be
+created if one does not exist. \fIvalue\fP can be used
+together with \fBBPF_LOCAL_STORAGE_GET_F_CREATE\fP to specify
+the initial value of a bpf_local_storage. If \fIvalue\fP is
+\fBNULL\fP, the new bpf_local_storage will be zero initialized.
+.TP
+.B Return
+A bpf_local_storage pointer is returned on success.
+.sp
+\fBNULL\fP if not found or there was an error in adding
+a new bpf_local_storage.
+.UNINDENT
+.TP
+.B \fBlong bpf_task_storage_delete(struct bpf_map *\fP\fImap\fP\fB, struct task_struct *\fP\fItask\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Delete a bpf_local_storage from a \fItask\fP\&.
+.TP
+.B Return
+0 on success.
+.sp
+\fB\-ENOENT\fP if the bpf_local_storage cannot be found.
+.UNINDENT
+.TP
+.B \fBstruct task_struct *bpf_get_current_task_btf(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return a BTF pointer to the \(dqcurrent\(dq task.
+This pointer can also be used in helpers that accept an
+\fIARG_PTR_TO_BTF_ID\fP of type \fItask_struct\fP\&.
+.TP
+.B Return
+Pointer to the current task.
+.UNINDENT
+.TP
+.B \fBlong bpf_bprm_opts_set(struct linux_binprm *\fP\fIbprm\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Set or clear certain options on \fIbprm\fP:
+.sp
+\fBBPF_F_BPRM_SECUREEXEC\fP Set the secureexec bit
+which sets the \fBAT_SECURE\fP auxv for glibc. The bit
+is cleared if the flag is not specified.
+.TP
+.B Return
+\fB\-EINVAL\fP if invalid \fIflags\fP are passed, zero otherwise.
+.UNINDENT
+.TP
+.B \fBu64 bpf_ktime_get_coarse_ns(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Return a coarse\-grained version of the time elapsed since
+system boot, in nanoseconds. Does not include time the system
+was suspended.
+.sp
+See: \fBclock_gettime\fP(\fBCLOCK_MONOTONIC_COARSE\fP)
+.TP
+.B Return
+Current \fIktime\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_ima_inode_hash(struct inode *\fP\fIinode\fP\fB, void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Returns the stored IMA hash of the \fIinode\fP (if it\(aqs available).
+If the hash is larger than \fIsize\fP, then only \fIsize\fP
+bytes will be copied to \fIdst\fP
+.TP
+.B Return
+The \fBhash_algo\fP is returned on success,
+\fB\-EOPNOTSUP\fP if IMA is disabled or \fB\-EINVAL\fP if
+invalid arguments are passed.
+.UNINDENT
+.TP
+.B \fBstruct socket *bpf_sock_from_file(struct file *\fP\fIfile\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+If the given file represents a socket, returns the associated
+socket.
+.TP
+.B Return
+A pointer to a struct socket on success or NULL if the file is
+not a socket.
+.UNINDENT
+.TP
+.B \fBlong bpf_check_mtu(void *\fP\fIctx\fP\fB, u32\fP \fIifindex\fP\fB, u32 *\fP\fImtu_len\fP\fB, s32\fP \fIlen_diff\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Check packet size against exceeding MTU of net device (based
+on \fIifindex\fP). This helper will likely be used in combination
+with helpers that adjust/change the packet size.
+.sp
+The argument \fIlen_diff\fP can be used for querying with a planned
+size change. This allows to check MTU prior to changing packet
+ctx. Providing a \fIlen_diff\fP adjustment that is larger than the
+actual packet size (resulting in negative packet size) will in
+principle not exceed the MTU, which is why it is not considered
+a failure. Other BPF helpers are needed for performing the
+planned size change; therefore the responsibility for catching
+a negative packet size belongs in those helpers.
+.sp
+Specifying \fIifindex\fP zero means the MTU check is performed
+against the current net device. This is practical if this isn\(aqt
+used prior to redirect.
+.sp
+On input \fImtu_len\fP must be a valid pointer, else verifier will
+reject BPF program. If the value \fImtu_len\fP is initialized to
+zero then the ctx packet size is use. When value \fImtu_len\fP is
+provided as input this specify the L3 length that the MTU check
+is done against. Remember XDP and TC length operate at L2, but
+this value is L3 as this correlate to MTU and IP\-header tot_len
+values which are L3 (similar behavior as bpf_fib_lookup).
+.sp
+The Linux kernel route table can configure MTUs on a more
+specific per route level, which is not provided by this helper.
+For route level MTU checks use the \fBbpf_fib_lookup\fP()
+helper.
+.sp
+\fIctx\fP is either \fBstruct xdp_md\fP for XDP programs or
+\fBstruct sk_buff\fP for tc cls_act programs.
+.sp
+The \fIflags\fP argument can be a combination of one or more of the
+following values:
+.INDENT 7.0
+.TP
+.B \fBBPF_MTU_CHK_SEGS\fP
+This flag will only works for \fIctx\fP \fBstruct sk_buff\fP\&.
+If packet context contains extra packet segment buffers
+(often knows as GSO skb), then MTU check is harder to
+check at this point, because in transmit path it is
+possible for the skb packet to get re\-segmented
+(depending on net device features). This could still be
+a MTU violation, so this flag enables performing MTU
+check against segments, with a different violation
+return code to tell it apart. Check cannot use len_diff.
+.UNINDENT
+.sp
+On return \fImtu_len\fP pointer contains the MTU value of the net
+device. Remember the net device configured MTU is the L3 size,
+which is returned here and XDP and TC length operate at L2.
+Helper take this into account for you, but remember when using
+MTU value in your BPF\-code.
+.TP
+.B Return
+.INDENT 7.0
+.IP \(bu 2
+0 on success, and populate MTU value in \fImtu_len\fP pointer.
+.IP \(bu 2
+< 0 if any input argument is invalid (\fImtu_len\fP not updated)
+.UNINDENT
+.sp
+MTU violations return positive values, but also populate MTU
+value in \fImtu_len\fP pointer, as this can be needed for
+implementing PMTU handing:
+.INDENT 7.0
+.IP \(bu 2
+\fBBPF_MTU_CHK_RET_FRAG_NEEDED\fP
+.IP \(bu 2
+\fBBPF_MTU_CHK_RET_SEGS_TOOBIG\fP
+.UNINDENT
+.UNINDENT
+.TP
+.B \fBlong bpf_for_each_map_elem(struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIcallback_fn\fP\fB, void *\fP\fIcallback_ctx\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For each element in \fBmap\fP, call \fBcallback_fn\fP function with
+\fBmap\fP, \fBcallback_ctx\fP and other map\-specific parameters.
+The \fBcallback_fn\fP should be a static function and
+the \fBcallback_ctx\fP should be a pointer to the stack.
+The \fBflags\fP is used to control certain aspects of the helper.
+Currently, the \fBflags\fP must be 0.
+.sp
+The following are a list of supported map types and their
+respective expected callback signatures:
+.sp
+BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+.sp
+long (*callback_fn)(struct bpf_map *map, const void *key, void *value, void *ctx);
+.sp
+For per_cpu maps, the map_value is the value on the cpu where the
+bpf_prog is running.
+.sp
+If \fBcallback_fn\fP return 0, the helper will continue to the next
+element. If return value is 1, the helper will skip the rest of
+elements and return. Other return values are not used now.
+.TP
+.B Return
+The number of traversed map elements for success, \fB\-EINVAL\fP for
+invalid \fBflags\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_snprintf(char *\fP\fIstr\fP\fB, u32\fP \fIstr_size\fP\fB, const char *\fP\fIfmt\fP\fB, u64 *\fP\fIdata\fP\fB, u32\fP \fIdata_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Outputs a string into the \fBstr\fP buffer of size \fBstr_size\fP
+based on a format string stored in a read\-only map pointed by
+\fBfmt\fP\&.
+.sp
+Each format specifier in \fBfmt\fP corresponds to one u64 element
+in the \fBdata\fP array. For strings and pointers where pointees
+are accessed, only the pointer values are stored in the \fIdata\fP
+array. The \fIdata_len\fP is the size of \fIdata\fP in bytes \- must be
+a multiple of 8.
+.sp
+Formats \fB%s\fP and \fB%p{i,I}{4,6}\fP require to read kernel
+memory. Reading kernel memory may fail due to either invalid
+address or valid address but requiring a major memory fault. If
+reading kernel memory fails, the string for \fB%s\fP will be an
+empty string, and the ip address for \fB%p{i,I}{4,6}\fP will be 0.
+Not returning error to bpf program is consistent with what
+\fBbpf_trace_printk\fP() does for now.
+.TP
+.B Return
+The strictly positive length of the formatted string, including
+the trailing zero character. If the return value is greater than
+\fBstr_size\fP, \fBstr\fP contains a truncated string, guaranteed to
+be zero\-terminated except when \fBstr_size\fP is 0.
+.sp
+Or \fB\-EBUSY\fP if the per\-CPU memory copy buffer is busy.
+.UNINDENT
+.TP
+.B \fBlong bpf_sys_bpf(u32\fP \fIcmd\fP\fB, void *\fP\fIattr\fP\fB, u32\fP \fIattr_size\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Execute bpf syscall with given arguments.
+.TP
+.B Return
+A syscall result.
+.UNINDENT
+.TP
+.B \fBlong bpf_btf_find_by_name_kind(char *\fP\fIname\fP\fB, int\fP \fIname_sz\fP\fB, u32\fP \fIkind\fP\fB, int\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Find BTF type with given name and kind in vmlinux BTF or in module\(aqs BTFs.
+.TP
+.B Return
+Returns btf_id and btf_obj_fd in lower and upper 32 bits.
+.UNINDENT
+.TP
+.B \fBlong bpf_sys_close(u32\fP \fIfd\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Execute close syscall for given FD.
+.TP
+.B Return
+A syscall result.
+.UNINDENT
+.TP
+.B \fBlong bpf_timer_init(struct bpf_timer *\fP\fItimer\fP\fB, struct bpf_map *\fP\fImap\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Initialize the timer.
+First 4 bits of \fIflags\fP specify clockid.
+Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
+All other bits of \fIflags\fP are reserved.
+The verifier will reject the program if \fItimer\fP is not from
+the same \fImap\fP\&.
+.TP
+.B Return
+0 on success.
+\fB\-EBUSY\fP if \fItimer\fP is already initialized.
+\fB\-EINVAL\fP if invalid \fIflags\fP are passed.
+\fB\-EPERM\fP if \fItimer\fP is in a map that doesn\(aqt have any user references.
+The user space should either hold a file descriptor to a map with timers
+or pin such map in bpffs. When map is unpinned or file descriptor is
+closed all timers in the map will be cancelled and freed.
+.UNINDENT
+.TP
+.B \fBlong bpf_timer_set_callback(struct bpf_timer *\fP\fItimer\fP\fB, void *\fP\fIcallback_fn\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Configure the timer to call \fIcallback_fn\fP static function.
+.TP
+.B Return
+0 on success.
+\fB\-EINVAL\fP if \fItimer\fP was not initialized with bpf_timer_init() earlier.
+\fB\-EPERM\fP if \fItimer\fP is in a map that doesn\(aqt have any user references.
+The user space should either hold a file descriptor to a map with timers
+or pin such map in bpffs. When map is unpinned or file descriptor is
+closed all timers in the map will be cancelled and freed.
+.UNINDENT
+.TP
+.B \fBlong bpf_timer_start(struct bpf_timer *\fP\fItimer\fP\fB, u64\fP \fInsecs\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Set timer expiration N nanoseconds from the current time. The
+configured callback will be invoked in soft irq context on some cpu
+and will not repeat unless another bpf_timer_start() is made.
+In such case the next invocation can migrate to a different cpu.
+Since struct bpf_timer is a field inside map element the map
+owns the timer. The bpf_timer_set_callback() will increment refcnt
+of BPF program to make sure that callback_fn code stays valid.
+When user space reference to a map reaches zero all timers
+in a map are cancelled and corresponding program\(aqs refcnts are
+decremented. This is done to make sure that Ctrl\-C of a user
+process doesn\(aqt leave any timers running. If map is pinned in
+bpffs the callback_fn can re\-arm itself indefinitely.
+bpf_map_update/delete_elem() helpers and user space sys_bpf commands
+cancel and free the timer in the given map element.
+The map can contain timers that invoke callback_fn\-s from different
+programs. The same callback_fn can serve different timers from
+different maps if key/value layout matches across maps.
+Every bpf_timer_set_callback() can have different callback_fn.
+.TP
+.B Return
+0 on success.
+\fB\-EINVAL\fP if \fItimer\fP was not initialized with bpf_timer_init() earlier
+or invalid \fIflags\fP are passed.
+.UNINDENT
+.TP
+.B \fBlong bpf_timer_cancel(struct bpf_timer *\fP\fItimer\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Cancel the timer and wait for callback_fn to finish if it was running.
+.TP
+.B Return
+0 if the timer was not active.
+1 if the timer was active.
+\fB\-EINVAL\fP if \fItimer\fP was not initialized with bpf_timer_init() earlier.
+\fB\-EDEADLK\fP if callback_fn tried to call bpf_timer_cancel() on its
+own timer which would have led to a deadlock otherwise.
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_func_ip(void *\fP\fIctx\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get address of the traced function (for tracing and kprobe programs).
+.TP
+.B Return
+Address of the traced function.
+0 for kprobes placed within the function (not at the entry).
+.UNINDENT
+.TP
+.B \fBu64 bpf_get_attach_cookie(void *\fP\fIctx\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get bpf_cookie value provided (optionally) during the program
+attachment. It might be different for each individual
+attachment, even if BPF program itself is the same.
+Expects BPF program context \fIctx\fP as a first argument.
+.INDENT 7.0
+.TP
+.B Supported for the following program types:
+.INDENT 7.0
+.IP \(bu 2
+kprobe/uprobe;
+.IP \(bu 2
+tracepoint;
+.IP \(bu 2
+perf_event.
+.UNINDENT
+.UNINDENT
+.TP
+.B Return
+Value specified by user at BPF link creation/attachment time
+or 0, if it was not specified.
+.UNINDENT
+.TP
+.B \fBlong bpf_task_pt_regs(struct task_struct *\fP\fItask\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the struct pt_regs associated with \fBtask\fP\&.
+.TP
+.B Return
+A pointer to struct pt_regs.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_branch_snapshot(void *\fP\fIentries\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get branch trace from hardware engines like Intel LBR. The
+hardware engine is stopped shortly after the helper is
+called. Therefore, the user need to filter branch entries
+based on the actual use case. To capture branch trace
+before the trigger point of the BPF program, the helper
+should be called at the beginning of the BPF program.
+.sp
+The data is stored as struct perf_branch_entry into output
+buffer \fIentries\fP\&. \fIsize\fP is the size of \fIentries\fP in bytes.
+\fIflags\fP is reserved for now and must be zero.
+.TP
+.B Return
+On success, number of bytes written to \fIbuf\fP\&. On error, a
+negative value.
+.sp
+\fB\-EINVAL\fP if \fIflags\fP is not zero.
+.sp
+\fB\-ENOENT\fP if architecture does not support branch records.
+.UNINDENT
+.TP
+.B \fBlong bpf_trace_vprintk(const char *\fP\fIfmt\fP\fB, u32\fP \fIfmt_size\fP\fB, const void *\fP\fIdata\fP\fB, u32\fP \fIdata_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Behaves like \fBbpf_trace_printk\fP() helper, but takes an array of u64
+to format and can handle more format args as a result.
+.sp
+Arguments are to be used as in \fBbpf_seq_printf\fP() helper.
+.TP
+.B Return
+The number of bytes written to the buffer, or a negative error
+in case of failure.
+.UNINDENT
+.TP
+.B \fBstruct unix_sock *bpf_skc_to_unix_sock(void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Dynamically cast a \fIsk\fP pointer to a \fIunix_sock\fP pointer.
+.TP
+.B Return
+\fIsk\fP if casting is valid, or \fBNULL\fP otherwise.
+.UNINDENT
+.TP
+.B \fBlong bpf_kallsyms_lookup_name(const char *\fP\fIname\fP\fB, int\fP \fIname_sz\fP\fB, int\fP \fIflags\fP\fB, u64 *\fP\fIres\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the address of a kernel symbol, returned in \fIres\fP\&. \fIres\fP is
+set to 0 if the symbol is not found.
+.TP
+.B Return
+On success, zero. On error, a negative value.
+.sp
+\fB\-EINVAL\fP if \fIflags\fP is not zero.
+.sp
+\fB\-EINVAL\fP if string \fIname\fP is not the same size as \fIname_sz\fP\&.
+.sp
+\fB\-ENOENT\fP if symbol is not found.
+.sp
+\fB\-EPERM\fP if caller does not have permission to obtain kernel address.
+.UNINDENT
+.TP
+.B \fBlong bpf_find_vma(struct task_struct *\fP\fItask\fP\fB, u64\fP \fIaddr\fP\fB, void *\fP\fIcallback_fn\fP\fB, void *\fP\fIcallback_ctx\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Find vma of \fItask\fP that contains \fIaddr\fP, call \fIcallback_fn\fP
+function with \fItask\fP, \fIvma\fP, and \fIcallback_ctx\fP\&.
+The \fIcallback_fn\fP should be a static function and
+the \fIcallback_ctx\fP should be a pointer to the stack.
+The \fIflags\fP is used to control certain aspects of the helper.
+Currently, the \fIflags\fP must be 0.
+.sp
+The expected callback signature is
+.sp
+long (*callback_fn)(struct task_struct *task, struct vm_area_struct *vma, void *callback_ctx);
+.TP
+.B Return
+0 on success.
+\fB\-ENOENT\fP if \fItask\->mm\fP is NULL, or no vma contains \fIaddr\fP\&.
+\fB\-EBUSY\fP if failed to try lock mmap_lock.
+\fB\-EINVAL\fP for invalid \fBflags\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_loop(u32\fP \fInr_loops\fP\fB, void *\fP\fIcallback_fn\fP\fB, void *\fP\fIcallback_ctx\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+For \fBnr_loops\fP, call \fBcallback_fn\fP function
+with \fBcallback_ctx\fP as the context parameter.
+The \fBcallback_fn\fP should be a static function and
+the \fBcallback_ctx\fP should be a pointer to the stack.
+The \fBflags\fP is used to control certain aspects of the helper.
+Currently, the \fBflags\fP must be 0. Currently, nr_loops is
+limited to 1 << 23 (~8 million) loops.
+.sp
+long (*callback_fn)(u32 index, void *ctx);
+.sp
+where \fBindex\fP is the current index in the loop. The index
+is zero\-indexed.
+.sp
+If \fBcallback_fn\fP returns 0, the helper will continue to the next
+loop. If return value is 1, the helper will skip the rest of
+the loops and return. Other return values are not used now,
+and will be rejected by the verifier.
+.TP
+.B Return
+The number of loops performed, \fB\-EINVAL\fP for invalid \fBflags\fP,
+\fB\-E2BIG\fP if \fBnr_loops\fP exceeds the maximum number of loops.
+.UNINDENT
+.TP
+.B \fBlong bpf_strncmp(const char *\fP\fIs1\fP\fB, u32\fP \fIs1_sz\fP\fB, const char *\fP\fIs2\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Do strncmp() between \fBs1\fP and \fBs2\fP\&. \fBs1\fP doesn\(aqt need
+to be null\-terminated and \fBs1_sz\fP is the maximum storage
+size of \fBs1\fP\&. \fBs2\fP must be a read\-only string.
+.TP
+.B Return
+An integer less than, equal to, or greater than zero
+if the first \fBs1_sz\fP bytes of \fBs1\fP is found to be
+less than, to match, or be greater than \fBs2\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_func_arg(void *\fP\fIctx\fP\fB, u32\fP \fIn\fP\fB, u64 *\fP\fIvalue\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get \fBn\fP\-th argument register (zero based) of the traced function (for tracing programs)
+returned in \fBvalue\fP\&.
+.TP
+.B Return
+0 on success.
+\fB\-EINVAL\fP if n >= argument register count of traced function.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_func_ret(void *\fP\fIctx\fP\fB, u64 *\fP\fIvalue\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get return value of the traced function (for tracing programs)
+in \fBvalue\fP\&.
+.TP
+.B Return
+0 on success.
+\fB\-EOPNOTSUPP\fP for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN.
+.UNINDENT
+.TP
+.B \fBlong bpf_get_func_arg_cnt(void *\fP\fIctx\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get number of registers of the traced function (for tracing programs) where
+function arguments are stored in these registers.
+.TP
+.B Return
+The number of argument registers of the traced function.
+.UNINDENT
+.TP
+.B \fBint bpf_get_retval(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the BPF program\(aqs return value that will be returned to the upper layers.
+.sp
+This helper is currently supported by cgroup programs and only by the hooks
+where BPF program\(aqs return value is returned to the userspace via errno.
+.TP
+.B Return
+The BPF program\(aqs return value.
+.UNINDENT
+.TP
+.B \fBint bpf_set_retval(int\fP \fIretval\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Set the BPF program\(aqs return value that will be returned to the upper layers.
+.sp
+This helper is currently supported by cgroup programs and only by the hooks
+where BPF program\(aqs return value is returned to the userspace via errno.
+.sp
+Note that there is the following corner case where the program exports an error
+via bpf_set_retval but signals success via \(aqreturn 1\(aq:
+.INDENT 7.0
+.INDENT 3.5
+bpf_set_retval(\-EPERM);
+return 1;
+.UNINDENT
+.UNINDENT
+.sp
+In this case, the BPF program\(aqs return value will use helper\(aqs \-EPERM. This
+still holds true for cgroup/bind{4,6} which supports extra \(aqreturn 3\(aq success case.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBu64 bpf_xdp_get_buff_len(struct xdp_buff *\fP\fIxdp_md\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get the total size of a given xdp buff (linear and paged area)
+.TP
+.B Return
+The total size of a given xdp buffer.
+.UNINDENT
+.TP
+.B \fBlong bpf_xdp_load_bytes(struct xdp_buff *\fP\fIxdp_md\fP\fB, u32\fP \fIoffset\fP\fB, void *\fP\fIbuf\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+This helper is provided as an easy way to load data from a
+xdp buffer. It can be used to load \fIlen\fP bytes from \fIoffset\fP from
+the frame associated to \fIxdp_md\fP, into the buffer pointed by
+\fIbuf\fP\&.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_xdp_store_bytes(struct xdp_buff *\fP\fIxdp_md\fP\fB, u32\fP \fIoffset\fP\fB, void *\fP\fIbuf\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Store \fIlen\fP bytes from buffer \fIbuf\fP into the frame
+associated to \fIxdp_md\fP, at \fIoffset\fP\&.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBlong bpf_copy_from_user_task(void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB, const void *\fP\fIuser_ptr\fP\fB, struct task_struct *\fP\fItsk\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Read \fIsize\fP bytes from user space address \fIuser_ptr\fP in \fItsk\fP\(aqs
+address space, and stores the data in \fIdst\fP\&. \fIflags\fP is not
+used yet and is provided for future extensibility. This helper
+can only be used by sleepable programs.
+.TP
+.B Return
+0 on success, or a negative error in case of failure. On error
+\fIdst\fP buffer is zeroed out.
+.UNINDENT
+.TP
+.B \fBlong bpf_skb_set_tstamp(struct sk_buff *\fP\fIskb\fP\fB, u64\fP \fItstamp\fP\fB, u32\fP \fItstamp_type\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Change the __sk_buff\->tstamp_type to \fItstamp_type\fP
+and set \fItstamp\fP to the __sk_buff\->tstamp together.
+.sp
+If there is no need to change the __sk_buff\->tstamp_type,
+the tstamp value can be directly written to __sk_buff\->tstamp
+instead.
+.sp
+BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that
+will be kept during bpf_redirect_*(). A non zero
+\fItstamp\fP must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO
+\fItstamp_type\fP\&.
+.sp
+A BPF_SKB_TSTAMP_UNSPEC \fItstamp_type\fP can only be used
+with a zero \fItstamp\fP\&.
+.sp
+Only IPv4 and IPv6 skb\->protocol are supported.
+.sp
+This function is most useful when it needs to set a
+mono delivery time to __sk_buff\->tstamp and then
+bpf_redirect_*() to the egress of an iface. For example,
+changing the (rcv) timestamp in __sk_buff\->tstamp at
+ingress to a mono delivery time and then bpf_redirect_*()
+to \fI\%sch_fq@phy\-dev\fP\&.
+.TP
+.B Return
+0 on success.
+\fB\-EINVAL\fP for invalid input
+\fB\-EOPNOTSUPP\fP for unsupported protocol
+.UNINDENT
+.TP
+.B \fBlong bpf_ima_file_hash(struct file *\fP\fIfile\fP\fB, void *\fP\fIdst\fP\fB, u32\fP \fIsize\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Returns a calculated IMA hash of the \fIfile\fP\&.
+If the hash is larger than \fIsize\fP, then only \fIsize\fP
+bytes will be copied to \fIdst\fP
+.TP
+.B Return
+The \fBhash_algo\fP is returned on success,
+\fB\-EOPNOTSUP\fP if the hash calculation failed or \fB\-EINVAL\fP if
+invalid arguments are passed.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_kptr_xchg(void *\fP\fImap_value\fP\fB, void *\fP\fIptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Exchange kptr at pointer \fImap_value\fP with \fIptr\fP, and return the
+old value. \fIptr\fP can be NULL, otherwise it must be a referenced
+pointer which will be released when this helper is called.
+.TP
+.B Return
+The old value of kptr (which can be NULL). The returned pointer
+if not NULL, is a reference which must be released using its
+corresponding release function, or moved into a BPF map before
+program exit.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_map_lookup_percpu_elem(struct bpf_map *\fP\fImap\fP\fB, const void *\fP\fIkey\fP\fB, u32\fP \fIcpu\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Perform a lookup in \fIpercpu map\fP for an entry associated to
+\fIkey\fP on \fIcpu\fP\&.
+.TP
+.B Return
+Map value associated to \fIkey\fP on \fIcpu\fP, or \fBNULL\fP if no entry
+was found or \fIcpu\fP is invalid.
+.UNINDENT
+.TP
+.B \fBstruct mptcp_sock *bpf_skc_to_mptcp_sock(void *\fP\fIsk\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Dynamically cast a \fIsk\fP pointer to a \fImptcp_sock\fP pointer.
+.TP
+.B Return
+\fIsk\fP if casting is valid, or \fBNULL\fP otherwise.
+.UNINDENT
+.TP
+.B \fBlong bpf_dynptr_from_mem(void *\fP\fIdata\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB, struct bpf_dynptr *\fP\fIptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get a dynptr to local memory \fIdata\fP\&.
+.sp
+\fIdata\fP must be a ptr to a map value.
+The maximum \fIsize\fP supported is DYNPTR_MAX_SIZE.
+\fIflags\fP is currently unused.
+.TP
+.B Return
+0 on success, \-E2BIG if the size exceeds DYNPTR_MAX_SIZE,
+\-EINVAL if flags is not 0.
+.UNINDENT
+.TP
+.B \fBlong bpf_ringbuf_reserve_dynptr(void *\fP\fIringbuf\fP\fB, u32\fP \fIsize\fP\fB, u64\fP \fIflags\fP\fB, struct bpf_dynptr *\fP\fIptr\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Reserve \fIsize\fP bytes of payload in a ring buffer \fIringbuf\fP
+through the dynptr interface. \fIflags\fP must be 0.
+.sp
+Please note that a corresponding bpf_ringbuf_submit_dynptr or
+bpf_ringbuf_discard_dynptr must be called on \fIptr\fP, even if the
+reservation fails. This is enforced by the verifier.
+.TP
+.B Return
+0 on success, or a negative error in case of failure.
+.UNINDENT
+.TP
+.B \fBvoid bpf_ringbuf_submit_dynptr(struct bpf_dynptr *\fP\fIptr\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Submit reserved ring buffer sample, pointed to by \fIdata\fP,
+through the dynptr interface. This is a no\-op if the dynptr is
+invalid/null.
+.sp
+For more information on \fIflags\fP, please see
+\(aqbpf_ringbuf_submit\(aq.
+.TP
+.B Return
+Nothing. Always succeeds.
+.UNINDENT
+.TP
+.B \fBvoid bpf_ringbuf_discard_dynptr(struct bpf_dynptr *\fP\fIptr\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Discard reserved ring buffer sample through the dynptr
+interface. This is a no\-op if the dynptr is invalid/null.
+.sp
+For more information on \fIflags\fP, please see
+\(aqbpf_ringbuf_discard\(aq.
+.TP
+.B Return
+Nothing. Always succeeds.
+.UNINDENT
+.TP
+.B \fBlong bpf_dynptr_read(void *\fP\fIdst\fP\fB, u32\fP \fIlen\fP\fB, const struct bpf_dynptr *\fP\fIsrc\fP\fB, u32\fP \fIoffset\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Read \fIlen\fP bytes from \fIsrc\fP into \fIdst\fP, starting from \fIoffset\fP
+into \fIsrc\fP\&.
+\fIflags\fP is currently unused.
+.TP
+.B Return
+0 on success, \-E2BIG if \fIoffset\fP + \fIlen\fP exceeds the length
+of \fIsrc\fP\(aqs data, \-EINVAL if \fIsrc\fP is an invalid dynptr or if
+\fIflags\fP is not 0.
+.UNINDENT
+.TP
+.B \fBlong bpf_dynptr_write(const struct bpf_dynptr *\fP\fIdst\fP\fB, u32\fP \fIoffset\fP\fB, void *\fP\fIsrc\fP\fB, u32\fP \fIlen\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Write \fIlen\fP bytes from \fIsrc\fP into \fIdst\fP, starting from \fIoffset\fP
+into \fIdst\fP\&.
+\fIflags\fP is currently unused.
+.TP
+.B Return
+0 on success, \-E2BIG if \fIoffset\fP + \fIlen\fP exceeds the length
+of \fIdst\fP\(aqs data, \-EINVAL if \fIdst\fP is an invalid dynptr or if \fIdst\fP
+is a read\-only dynptr or if \fIflags\fP is not 0.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_dynptr_data(const struct bpf_dynptr *\fP\fIptr\fP\fB, u32\fP \fIoffset\fP\fB, u32\fP \fIlen\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get a pointer to the underlying dynptr data.
+.sp
+\fIlen\fP must be a statically known value. The returned data slice
+is invalidated whenever the dynptr is invalidated.
+.TP
+.B Return
+Pointer to the underlying dynptr data, NULL if the dynptr is
+read\-only, if the dynptr is invalid, or if the offset and length
+is out of bounds.
+.UNINDENT
+.TP
+.B \fBs64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *\fP\fIiph\fP\fB, struct tcphdr *\fP\fIth\fP\fB, u32\fP \fIth_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Try to issue a SYN cookie for the packet with corresponding
+IPv4/TCP headers, \fIiph\fP and \fIth\fP, without depending on a
+listening socket.
+.sp
+\fIiph\fP points to the IPv4 header.
+.sp
+\fIth\fP points to the start of the TCP header, while \fIth_len\fP
+contains the length of the TCP header (at least
+\fBsizeof\fP(\fBstruct tcphdr\fP)).
+.TP
+.B Return
+On success, lower 32 bits hold the generated SYN cookie in
+followed by 16 bits which hold the MSS value for that cookie,
+and the top 16 bits are unused.
+.sp
+On failure, the returned value is one of the following:
+.sp
+\fB\-EINVAL\fP if \fIth_len\fP is invalid.
+.UNINDENT
+.TP
+.B \fBs64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *\fP\fIiph\fP\fB, struct tcphdr *\fP\fIth\fP\fB, u32\fP \fIth_len\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Try to issue a SYN cookie for the packet with corresponding
+IPv6/TCP headers, \fIiph\fP and \fIth\fP, without depending on a
+listening socket.
+.sp
+\fIiph\fP points to the IPv6 header.
+.sp
+\fIth\fP points to the start of the TCP header, while \fIth_len\fP
+contains the length of the TCP header (at least
+\fBsizeof\fP(\fBstruct tcphdr\fP)).
+.TP
+.B Return
+On success, lower 32 bits hold the generated SYN cookie in
+followed by 16 bits which hold the MSS value for that cookie,
+and the top 16 bits are unused.
+.sp
+On failure, the returned value is one of the following:
+.sp
+\fB\-EINVAL\fP if \fIth_len\fP is invalid.
+.sp
+\fB\-EPROTONOSUPPORT\fP if CONFIG_IPV6 is not builtin.
+.UNINDENT
+.TP
+.B \fBlong bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *\fP\fIiph\fP\fB, struct tcphdr *\fP\fIth\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Check whether \fIiph\fP and \fIth\fP contain a valid SYN cookie ACK
+without depending on a listening socket.
+.sp
+\fIiph\fP points to the IPv4 header.
+.sp
+\fIth\fP points to the TCP header.
+.TP
+.B Return
+0 if \fIiph\fP and \fIth\fP are a valid SYN cookie ACK.
+.sp
+On failure, the returned value is one of the following:
+.sp
+\fB\-EACCES\fP if the SYN cookie is not valid.
+.UNINDENT
+.TP
+.B \fBlong bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *\fP\fIiph\fP\fB, struct tcphdr *\fP\fIth\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Check whether \fIiph\fP and \fIth\fP contain a valid SYN cookie ACK
+without depending on a listening socket.
+.sp
+\fIiph\fP points to the IPv6 header.
+.sp
+\fIth\fP points to the TCP header.
+.TP
+.B Return
+0 if \fIiph\fP and \fIth\fP are a valid SYN cookie ACK.
+.sp
+On failure, the returned value is one of the following:
+.sp
+\fB\-EACCES\fP if the SYN cookie is not valid.
+.sp
+\fB\-EPROTONOSUPPORT\fP if CONFIG_IPV6 is not builtin.
+.UNINDENT
+.TP
+.B \fBu64 bpf_ktime_get_tai_ns(void)\fP
+.INDENT 7.0
+.TP
+.B Description
+A nonsettable system\-wide clock derived from wall\-clock time but
+ignoring leap seconds. This clock does not experience
+discontinuities and backwards jumps caused by NTP inserting leap
+seconds as CLOCK_REALTIME does.
+.sp
+See: \fBclock_gettime\fP(\fBCLOCK_TAI\fP)
+.TP
+.B Return
+Current \fIktime\fP\&.
+.UNINDENT
+.TP
+.B \fBlong bpf_user_ringbuf_drain(struct bpf_map *\fP\fImap\fP\fB, void *\fP\fIcallback_fn\fP\fB, void *\fP\fIctx\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Drain samples from the specified user ring buffer, and invoke
+the provided callback for each such sample:
+.sp
+long (*callback_fn)(const struct bpf_dynptr *dynptr, void *ctx);
+.sp
+If \fBcallback_fn\fP returns 0, the helper will continue to try
+and drain the next sample, up to a maximum of
+BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
+the helper will skip the rest of the samples and return. Other
+return values are not used now, and will be rejected by the
+verifier.
+.TP
+.B Return
+The number of drained samples if no error was encountered while
+draining samples, or 0 if no samples were present in the ring
+buffer. If a user\-space producer was epoll\-waiting on this map,
+and at least one sample was drained, they will receive an event
+notification notifying them of available space in the ring
+buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
+function, no wakeup notification will be sent. If the
+BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
+be sent even if no sample was drained.
+.sp
+On failure, the returned value is one of the following:
+.sp
+\fB\-EBUSY\fP if the ring buffer is contended, and another calling
+context was concurrently draining the ring buffer.
+.sp
+\fB\-EINVAL\fP if user\-space is not properly tracking the ring
+buffer due to the producer position not being aligned to 8
+bytes, a sample not being aligned to 8 bytes, or the producer
+position not matching the advertised length of a sample.
+.sp
+\fB\-E2BIG\fP if user\-space has tried to publish a sample which is
+larger than the size of the ring buffer, or which cannot fit
+within a struct bpf_dynptr.
+.UNINDENT
+.TP
+.B \fBvoid *bpf_cgrp_storage_get(struct bpf_map *\fP\fImap\fP\fB, struct cgroup *\fP\fIcgroup\fP\fB, void *\fP\fIvalue\fP\fB, u64\fP \fIflags\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Get a bpf_local_storage from the \fIcgroup\fP\&.
+.sp
+Logically, it could be thought of as getting the value from
+a \fImap\fP with \fIcgroup\fP as the \fBkey\fP\&. From this
+perspective, the usage is not much different from
+\fBbpf_map_lookup_elem\fP(\fImap\fP, \fB&\fP\fIcgroup\fP) except this
+helper enforces the key must be a cgroup struct and the map must also
+be a \fBBPF_MAP_TYPE_CGRP_STORAGE\fP\&.
+.sp
+In reality, the local\-storage value is embedded directly inside of the
+\fIcgroup\fP object itself, rather than being located in the
+\fBBPF_MAP_TYPE_CGRP_STORAGE\fP map. When the local\-storage value is
+queried for some \fImap\fP on a \fIcgroup\fP object, the kernel will perform an
+O(n) iteration over all of the live local\-storage values for that
+\fIcgroup\fP object until the local\-storage value for the \fImap\fP is found.
+.sp
+An optional \fIflags\fP (\fBBPF_LOCAL_STORAGE_GET_F_CREATE\fP) can be
+used such that a new bpf_local_storage will be
+created if one does not exist. \fIvalue\fP can be used
+together with \fBBPF_LOCAL_STORAGE_GET_F_CREATE\fP to specify
+the initial value of a bpf_local_storage. If \fIvalue\fP is
+\fBNULL\fP, the new bpf_local_storage will be zero initialized.
+.TP
+.B Return
+A bpf_local_storage pointer is returned on success.
+.sp
+\fBNULL\fP if not found or there was an error in adding
+a new bpf_local_storage.
+.UNINDENT
+.TP
+.B \fBlong bpf_cgrp_storage_delete(struct bpf_map *\fP\fImap\fP\fB, struct cgroup *\fP\fIcgroup\fP\fB)\fP
+.INDENT 7.0
+.TP
+.B Description
+Delete a bpf_local_storage from a \fIcgroup\fP\&.
+.TP
+.B Return
+0 on success.
+.sp
+\fB\-ENOENT\fP if the bpf_local_storage cannot be found.
+.UNINDENT
+.UNINDENT
+.SH EXAMPLES
+.sp
+Example usage for most of the eBPF helpers listed in this manual page are
+available within the Linux kernel sources, at the following locations:
+.INDENT 0.0
+.IP \(bu 2
+\fIsamples/bpf/\fP
+.IP \(bu 2
+\fItools/testing/selftests/bpf/\fP
+.UNINDENT
+.SH LICENSE
+.sp
+eBPF programs can have an associated license, passed along with the bytecode
+instructions to the kernel when the programs are loaded. The format for that
+string is identical to the one in use for kernel modules (Dual licenses, such
+as \(dqDual BSD/GPL\(dq, may be used). Some helper functions are only accessible to
+programs that are compatible with the GNU Privacy License (GPL).
+.sp
+In order to use such helpers, the eBPF program must be loaded with the correct
+license string passed (via \fBattr\fP) to the \fBbpf\fP() system call, and this
+generally translates into the C source code of the program containing a line
+similar to the following:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+char ____license[] __attribute__((section(\(dqlicense\(dq), used)) = \(dqGPL\(dq;
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.SH IMPLEMENTATION
+.sp
+This manual page is an effort to document the existing eBPF helper functions.
+But as of this writing, the BPF sub\-system is under heavy development. New eBPF
+program or map types are added, along with new helper functions. Some helpers
+are occasionally made available for additional program types. So in spite of
+the efforts of the community, this page might not be up\-to\-date. If you want to
+check by yourself what helper functions exist in your kernel, or what types of
+programs they can support, here are some files among the kernel tree that you
+may be interested in:
+.INDENT 0.0
+.IP \(bu 2
+\fIinclude/uapi/linux/bpf.h\fP is the main BPF header. It contains the full list
+of all helper functions, as well as many other BPF definitions including most
+of the flags, structs or constants used by the helpers.
+.IP \(bu 2
+\fInet/core/filter.c\fP contains the definition of most network\-related helper
+functions, and the list of program types from which they can be used.
+.IP \(bu 2
+\fIkernel/trace/bpf_trace.c\fP is the equivalent for most tracing program\-related
+helpers.
+.IP \(bu 2
+\fIkernel/bpf/verifier.c\fP contains the functions used to check that valid types
+of eBPF maps are used with a given helper function.
+.IP \(bu 2
+\fIkernel/bpf/\fP directory contains other files in which additional helpers are
+defined (for cgroups, sockmaps, etc.).
+.IP \(bu 2
+The bpftool utility can be used to probe the availability of helper functions
+on the system (as well as supported program and map types, and a number of
+other parameters). To do so, run \fBbpftool feature probe\fP (see
+\fBbpftool\-feature\fP(8) for details). Add the \fBunprivileged\fP keyword to
+list features available to unprivileged users.
+.UNINDENT
+.sp
+Compatibility between helper functions and program types can generally be found
+in the files where helper functions are defined. Look for the \fBstruct
+bpf_func_proto\fP objects and for functions returning them: these functions
+contain a list of helpers that a given program type can call. Note that the
+\fBdefault:\fP label of the \fBswitch ... case\fP used to filter helpers can call
+other functions, themselves allowing access to additional helpers. The
+requirement for GPL license is also in those \fBstruct bpf_func_proto\fP\&.
+.sp
+Compatibility between helper functions and map types can be found in the
+\fBcheck_map_func_compatibility\fP() function in file \fIkernel/bpf/verifier.c\fP\&.
+.sp
+Helper functions that invalidate the checks on \fBdata\fP and \fBdata_end\fP
+pointers for network processing are listed in function
+\fBbpf_helper_changes_pkt_data\fP() in file \fInet/core/filter.c\fP\&.
+.SH SEE ALSO
+.sp
+\fBbpf\fP(2),
+\fBbpftool\fP(8),
+\fBcgroups\fP(7),
+\fBip\fP(8),
+\fBperf_event_open\fP(2),
+\fBsendmsg\fP(2),
+\fBsocket\fP(7),
+\fBtc\-bpf\fP(8)
+.\" Generated by docutils manpage writer.
+.
diff --git a/man7/capabilities.7 b/man7/capabilities.7
new file mode 100644
index 0000000..c8766d2
--- /dev/null
+++ b/man7/capabilities.7
@@ -0,0 +1,1872 @@
+.\" Copyright (c) 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" 6 Aug 2002 - Initial Creation
+.\" Modified 2003-05-23, Michael Kerrisk, <mtk.manpages@gmail.com>
+.\" Modified 2004-05-27, Michael Kerrisk, <mtk.manpages@gmail.com>
+.\" 2004-12-08, mtk Added O_NOATIME for CAP_FOWNER
+.\" 2005-08-16, mtk, Added CAP_AUDIT_CONTROL and CAP_AUDIT_WRITE
+.\" 2008-07-15, Serge Hallyn <serue@us.bbm.com>
+.\" Document file capabilities, per-process capability
+.\" bounding set, changed semantics for CAP_SETPCAP,
+.\" and other changes in Linux 2.6.2[45].
+.\" Add CAP_MAC_ADMIN, CAP_MAC_OVERRIDE, CAP_SETFCAP.
+.\" 2008-07-15, mtk
+.\" Add text describing circumstances in which CAP_SETPCAP
+.\" (theoretically) permits a thread to change the
+.\" capability sets of another thread.
+.\" Add section describing rules for programmatically
+.\" adjusting thread capability sets.
+.\" Describe rationale for capability bounding set.
+.\" Document "securebits" flags.
+.\" Add text noting that if we set the effective flag for one file
+.\" capability, then we must also set the effective flag for all
+.\" other capabilities where the permitted or inheritable bit is set.
+.\" 2011-09-07, mtk/Serge hallyn: Add CAP_SYSLOG
+.\"
+.TH Capabilities 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+capabilities \- overview of Linux capabilities
+.SH DESCRIPTION
+For the purpose of performing permission checks,
+traditional UNIX implementations distinguish two categories of processes:
+.I privileged
+processes (whose effective user ID is 0, referred to as superuser or root),
+and
+.I unprivileged
+processes (whose effective UID is nonzero).
+Privileged processes bypass all kernel permission checks,
+while unprivileged processes are subject to full permission
+checking based on the process's credentials
+(usually: effective UID, effective GID, and supplementary group list).
+.PP
+Starting with Linux 2.2, Linux divides the privileges traditionally
+associated with superuser into distinct units, known as
+.IR capabilities ,
+which can be independently enabled and disabled.
+Capabilities are a per-thread attribute.
+.\"
+.SS Capabilities list
+The following list shows the capabilities implemented on Linux,
+and the operations or behaviors that each capability permits:
+.TP
+.BR CAP_AUDIT_CONTROL " (since Linux 2.6.11)"
+Enable and disable kernel auditing; change auditing filter rules;
+retrieve auditing status and filtering rules.
+.TP
+.BR CAP_AUDIT_READ " (since Linux 3.16)"
+.\" commit a29b694aa1739f9d76538e34ae25524f9c549d59
+.\" commit 3a101b8de0d39403b2c7e5c23fd0b005668acf48
+Allow reading the audit log via a multicast netlink socket.
+.TP
+.BR CAP_AUDIT_WRITE " (since Linux 2.6.11)"
+Write records to kernel auditing log.
+.\" FIXME Add FAN_ENABLE_AUDIT
+.TP
+.BR CAP_BLOCK_SUSPEND " (since Linux 3.5)"
+Employ features that can block system suspend
+.RB ( epoll (7)
+.BR EPOLLWAKEUP ,
+.IR /proc/sys/wake_lock ).
+.TP
+.BR CAP_BPF " (since Linux 5.8)"
+Employ privileged BPF operations; see
+.BR bpf (2)
+and
+.BR bpf\-helpers (7).
+.IP
+This capability was added in Linux 5.8 to separate out
+BPF functionality from the overloaded
+.B CAP_SYS_ADMIN
+capability.
+.TP
+.BR CAP_CHECKPOINT_RESTORE " (since Linux 5.9)"
+.\" commit 124ea650d3072b005457faed69909221c2905a1f
+.PD 0
+.RS
+.IP \[bu] 3
+Update
+.I /proc/sys/kernel/ns_last_pid
+(see
+.BR pid_namespaces (7));
+.IP \[bu]
+employ the
+.I set_tid
+feature of
+.BR clone3 (2);
+.\" FIXME There is also some use case relating to
+.\" prctl_set_mm_exe_file(); in the 5.9 sources, see
+.\" prctl_set_mm_map().
+.IP \[bu]
+read the contents of the symbolic links in
+.IR /proc/ pid /map_files
+for other processes.
+.RE
+.PD
+.IP
+This capability was added in Linux 5.9 to separate out
+checkpoint/restore functionality from the overloaded
+.B CAP_SYS_ADMIN
+capability.
+.TP
+.B CAP_CHOWN
+Make arbitrary changes to file UIDs and GIDs (see
+.BR chown (2)).
+.TP
+.B CAP_DAC_OVERRIDE
+Bypass file read, write, and execute permission checks.
+(DAC is an abbreviation of "discretionary access control".)
+.TP
+.B CAP_DAC_READ_SEARCH
+.PD 0
+.RS
+.IP \[bu] 3
+Bypass file read permission checks and
+directory read and execute permission checks;
+.IP \[bu]
+invoke
+.BR open_by_handle_at (2);
+.IP \[bu]
+use the
+.BR linkat (2)
+.B AT_EMPTY_PATH
+flag to create a link to a file referred to by a file descriptor.
+.RE
+.PD
+.TP
+.B CAP_FOWNER
+.PD 0
+.RS
+.IP \[bu] 3
+Bypass permission checks on operations that normally
+require the filesystem UID of the process to match the UID of
+the file (e.g.,
+.BR chmod (2),
+.BR utime (2)),
+excluding those operations covered by
+.B CAP_DAC_OVERRIDE
+and
+.BR CAP_DAC_READ_SEARCH ;
+.IP \[bu]
+set inode flags (see
+.BR ioctl_iflags (2))
+on arbitrary files;
+.IP \[bu]
+set Access Control Lists (ACLs) on arbitrary files;
+.IP \[bu]
+ignore directory sticky bit on file deletion;
+.IP \[bu]
+modify
+.I user
+extended attributes on sticky directory owned by any user;
+.IP \[bu]
+specify
+.B O_NOATIME
+for arbitrary files in
+.BR open (2)
+and
+.BR fcntl (2).
+.RE
+.PD
+.TP
+.B CAP_FSETID
+.PD 0
+.RS
+.IP \[bu] 3
+Don't clear set-user-ID and set-group-ID mode
+bits when a file is modified;
+.IP \[bu]
+set the set-group-ID bit for a file whose GID does not match
+the filesystem or any of the supplementary GIDs of the calling process.
+.RE
+.PD
+.TP
+.B CAP_IPC_LOCK
+.\" FIXME . As at Linux 3.2, there are some strange uses of this capability
+.\" in other places; they probably should be replaced with something else.
+.PD 0
+.RS
+.IP \[bu] 3
+Lock memory
+.RB ( mlock (2),
+.BR mlockall (2),
+.BR mmap (2),
+.BR shmctl (2));
+.IP \[bu]
+Allocate memory using huge pages
+.RB ( memfd_create (2),
+.BR mmap (2),
+.BR shmctl (2)).
+.RE
+.PD
+.TP
+.B CAP_IPC_OWNER
+Bypass permission checks for operations on System V IPC objects.
+.TP
+.B CAP_KILL
+Bypass permission checks for sending signals (see
+.BR kill (2)).
+This includes use of the
+.BR ioctl (2)
+.B KDSIGACCEPT
+operation.
+.\" FIXME . CAP_KILL also has an effect for threads + setting child
+.\" termination signal to other than SIGCHLD: without this
+.\" capability, the termination signal reverts to SIGCHLD
+.\" if the child does an exec(). What is the rationale
+.\" for this?
+.TP
+.BR CAP_LEASE " (since Linux 2.4)"
+Establish leases on arbitrary files (see
+.BR fcntl (2)).
+.TP
+.B CAP_LINUX_IMMUTABLE
+Set the
+.B FS_APPEND_FL
+and
+.B FS_IMMUTABLE_FL
+inode flags (see
+.BR ioctl_iflags (2)).
+.TP
+.BR CAP_MAC_ADMIN " (since Linux 2.6.25)"
+Allow MAC configuration or state changes.
+Implemented for the Smack Linux Security Module (LSM).
+.TP
+.BR CAP_MAC_OVERRIDE " (since Linux 2.6.25)"
+Override Mandatory Access Control (MAC).
+Implemented for the Smack LSM.
+.TP
+.BR CAP_MKNOD " (since Linux 2.4)"
+Create special files using
+.BR mknod (2).
+.TP
+.B CAP_NET_ADMIN
+Perform various network-related operations:
+.PD 0
+.RS
+.IP \[bu] 3
+interface configuration;
+.IP \[bu]
+administration of IP firewall, masquerading, and accounting;
+.IP \[bu]
+modify routing tables;
+.IP \[bu]
+bind to any address for transparent proxying;
+.IP \[bu]
+set type-of-service (TOS);
+.IP \[bu]
+clear driver statistics;
+.IP \[bu]
+set promiscuous mode;
+.IP \[bu]
+enabling multicasting;
+.IP \[bu]
+use
+.BR setsockopt (2)
+to set the following socket options:
+.BR SO_DEBUG ,
+.BR SO_MARK ,
+.B SO_PRIORITY
+(for a priority outside the range 0 to 6),
+.BR SO_RCVBUFFORCE ,
+and
+.BR SO_SNDBUFFORCE .
+.RE
+.PD
+.TP
+.B CAP_NET_BIND_SERVICE
+Bind a socket to Internet domain privileged ports
+(port numbers less than 1024).
+.TP
+.B CAP_NET_BROADCAST
+(Unused) Make socket broadcasts, and listen to multicasts.
+.\" FIXME Since Linux 4.2, there are use cases for netlink sockets
+.\" commit 59324cf35aba5336b611074028777838a963d03b
+.TP
+.B CAP_NET_RAW
+.PD 0
+.RS
+.IP \[bu] 3
+Use RAW and PACKET sockets;
+.IP \[bu]
+bind to any address for transparent proxying.
+.RE
+.PD
+.\" Also various IP options and setsockopt(SO_BINDTODEVICE)
+.TP
+.BR CAP_PERFMON " (since Linux 5.8)"
+Employ various performance-monitoring mechanisms, including:
+.RS
+.IP \[bu] 3
+.PD 0
+call
+.BR perf_event_open (2);
+.IP \[bu]
+employ various BPF operations that have performance implications.
+.RE
+.PD
+.IP
+This capability was added in Linux 5.8 to separate out
+performance monitoring functionality from the overloaded
+.B CAP_SYS_ADMIN
+capability.
+See also the kernel source file
+.IR Documentation/admin\-guide/perf\-security.rst .
+.TP
+.B CAP_SETGID
+.RS
+.PD 0
+.IP \[bu] 3
+Make arbitrary manipulations of process GIDs and supplementary GID list;
+.IP \[bu]
+forge GID when passing socket credentials via UNIX domain sockets;
+.IP \[bu]
+write a group ID mapping in a user namespace (see
+.BR user_namespaces (7)).
+.PD
+.RE
+.TP
+.BR CAP_SETFCAP " (since Linux 2.6.24)"
+Set arbitrary capabilities on a file.
+.IP
+.\" commit db2e718a47984b9d71ed890eb2ea36ecf150de18
+Since Linux 5.12, this capability is
+also needed to map user ID 0 in a new user namespace; see
+.BR user_namespaces (7)
+for details.
+.TP
+.B CAP_SETPCAP
+If file capabilities are supported (i.e., since Linux 2.6.24):
+add any capability from the calling thread's bounding set
+to its inheritable set;
+drop capabilities from the bounding set (via
+.BR prctl (2)
+.BR PR_CAPBSET_DROP );
+make changes to the
+.I securebits
+flags.
+.IP
+If file capabilities are not supported (i.e., before Linux 2.6.24):
+grant or remove any capability in the
+caller's permitted capability set to or from any other process.
+(This property of
+.B CAP_SETPCAP
+is not available when the kernel is configured to support
+file capabilities, since
+.B CAP_SETPCAP
+has entirely different semantics for such kernels.)
+.TP
+.B CAP_SETUID
+.RS
+.PD 0
+.IP \[bu] 3
+Make arbitrary manipulations of process UIDs
+.RB ( setuid (2),
+.BR setreuid (2),
+.BR setresuid (2),
+.BR setfsuid (2));
+.IP \[bu]
+forge UID when passing socket credentials via UNIX domain sockets;
+.IP \[bu]
+write a user ID mapping in a user namespace (see
+.BR user_namespaces (7)).
+.PD
+.RE
+.\" FIXME CAP_SETUID also an effect in exec(); document this.
+.TP
+.B CAP_SYS_ADMIN
+.IR Note :
+this capability is overloaded; see
+.I Notes to kernel developers
+below.
+.IP
+.PD 0
+.RS
+.IP \[bu] 3
+Perform a range of system administration operations including:
+.BR quotactl (2),
+.BR mount (2),
+.BR umount (2),
+.BR pivot_root (2),
+.BR swapon (2),
+.BR swapoff (2),
+.BR sethostname (2),
+and
+.BR setdomainname (2);
+.IP \[bu]
+perform privileged
+.BR syslog (2)
+operations (since Linux 2.6.37,
+.B CAP_SYSLOG
+should be used to permit such operations);
+.IP \[bu]
+perform
+.B VM86_REQUEST_IRQ
+.BR vm86 (2)
+command;
+.IP \[bu]
+access the same checkpoint/restore functionality that is governed by
+.B CAP_CHECKPOINT_RESTORE
+(but the latter, weaker capability is preferred for accessing
+that functionality).
+.IP \[bu]
+perform the same BPF operations as are governed by
+.B CAP_BPF
+(but the latter, weaker capability is preferred for accessing
+that functionality).
+.IP \[bu]
+employ the same performance monitoring mechanisms as are governed by
+.B CAP_PERFMON
+(but the latter, weaker capability is preferred for accessing
+that functionality).
+.IP \[bu]
+perform
+.B IPC_SET
+and
+.B IPC_RMID
+operations on arbitrary System V IPC objects;
+.IP \[bu]
+override
+.B RLIMIT_NPROC
+resource limit;
+.IP \[bu]
+perform operations on
+.I trusted
+and
+.I security
+extended attributes (see
+.BR xattr (7));
+.IP \[bu]
+use
+.BR lookup_dcookie (2);
+.IP \[bu]
+use
+.BR ioprio_set (2)
+to assign
+.B IOPRIO_CLASS_RT
+and (before Linux 2.6.25)
+.B IOPRIO_CLASS_IDLE
+I/O scheduling classes;
+.IP \[bu]
+forge PID when passing socket credentials via UNIX domain sockets;
+.IP \[bu]
+exceed
+.IR /proc/sys/fs/file\-max ,
+the system-wide limit on the number of open files,
+in system calls that open files (e.g.,
+.BR accept (2),
+.BR execve (2),
+.BR open (2),
+.BR pipe (2));
+.IP \[bu]
+employ
+.B CLONE_*
+flags that create new namespaces with
+.BR clone (2)
+and
+.BR unshare (2)
+(but, since Linux 3.8,
+creating user namespaces does not require any capability);
+.IP \[bu]
+access privileged
+.I perf
+event information;
+.IP \[bu]
+call
+.BR setns (2)
+(requires
+.B CAP_SYS_ADMIN
+in the
+.I target
+namespace);
+.IP \[bu]
+call
+.BR fanotify_init (2);
+.IP \[bu]
+perform privileged
+.B KEYCTL_CHOWN
+and
+.B KEYCTL_SETPERM
+.BR keyctl (2)
+operations;
+.IP \[bu]
+perform
+.BR madvise (2)
+.B MADV_HWPOISON
+operation;
+.IP \[bu]
+employ the
+.B TIOCSTI
+.BR ioctl (2)
+to insert characters into the input queue of a terminal other than
+the caller's controlling terminal;
+.IP \[bu]
+employ the obsolete
+.BR nfsservctl (2)
+system call;
+.IP \[bu]
+employ the obsolete
+.BR bdflush (2)
+system call;
+.IP \[bu]
+perform various privileged block-device
+.BR ioctl (2)
+operations;
+.IP \[bu]
+perform various privileged filesystem
+.BR ioctl (2)
+operations;
+.IP \[bu]
+perform privileged
+.BR ioctl (2)
+operations on the
+.I /dev/random
+device (see
+.BR random (4));
+.IP \[bu]
+install a
+.BR seccomp (2)
+filter without first having to set the
+.I no_new_privs
+thread attribute;
+.IP \[bu]
+modify allow/deny rules for device control groups;
+.IP \[bu]
+employ the
+.BR ptrace (2)
+.B PTRACE_SECCOMP_GET_FILTER
+operation to dump tracee's seccomp filters;
+.IP \[bu]
+employ the
+.BR ptrace (2)
+.B PTRACE_SETOPTIONS
+operation to suspend the tracee's seccomp protections (i.e., the
+.B PTRACE_O_SUSPEND_SECCOMP
+flag);
+.IP \[bu]
+perform administrative operations on many device drivers;
+.IP \[bu]
+modify autogroup nice values by writing to
+.IR /proc/ pid /autogroup
+(see
+.BR sched (7)).
+.RE
+.PD
+.TP
+.B CAP_SYS_BOOT
+Use
+.BR reboot (2)
+and
+.BR kexec_load (2).
+.TP
+.B CAP_SYS_CHROOT
+.RS
+.PD 0
+.IP \[bu] 3
+Use
+.BR chroot (2);
+.IP \[bu]
+change mount namespaces using
+.BR setns (2).
+.PD
+.RE
+.TP
+.B CAP_SYS_MODULE
+.RS
+.PD 0
+.IP \[bu] 3
+Load and unload kernel modules
+(see
+.BR init_module (2)
+and
+.BR delete_module (2));
+.IP \[bu]
+before Linux 2.6.25:
+drop capabilities from the system-wide capability bounding set.
+.PD
+.RE
+.TP
+.B CAP_SYS_NICE
+.PD 0
+.RS
+.IP \[bu] 3
+Lower the process nice value
+.RB ( nice (2),
+.BR setpriority (2))
+and change the nice value for arbitrary processes;
+.IP \[bu]
+set real-time scheduling policies for calling process,
+and set scheduling policies and priorities for arbitrary processes
+.RB ( sched_setscheduler (2),
+.BR sched_setparam (2),
+.BR sched_setattr (2));
+.IP \[bu]
+set CPU affinity for arbitrary processes
+.RB ( sched_setaffinity (2));
+.IP \[bu]
+set I/O scheduling class and priority for arbitrary processes
+.RB ( ioprio_set (2));
+.IP \[bu]
+apply
+.BR migrate_pages (2)
+to arbitrary processes and allow processes
+to be migrated to arbitrary nodes;
+.\" FIXME CAP_SYS_NICE also has the following effect for
+.\" migrate_pages(2):
+.\" do_migrate_pages(mm, &old, &new,
+.\" capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
+.\"
+.\" Document this.
+.IP \[bu]
+apply
+.BR move_pages (2)
+to arbitrary processes;
+.IP \[bu]
+use the
+.B MPOL_MF_MOVE_ALL
+flag with
+.BR mbind (2)
+and
+.BR move_pages (2).
+.RE
+.PD
+.TP
+.B CAP_SYS_PACCT
+Use
+.BR acct (2).
+.TP
+.B CAP_SYS_PTRACE
+.PD 0
+.RS
+.IP \[bu] 3
+Trace arbitrary processes using
+.BR ptrace (2);
+.IP \[bu]
+apply
+.BR get_robust_list (2)
+to arbitrary processes;
+.IP \[bu]
+transfer data to or from the memory of arbitrary processes using
+.BR process_vm_readv (2)
+and
+.BR process_vm_writev (2);
+.IP \[bu]
+inspect processes using
+.BR kcmp (2).
+.RE
+.PD
+.TP
+.B CAP_SYS_RAWIO
+.PD 0
+.RS
+.IP \[bu] 3
+Perform I/O port operations
+.RB ( iopl (2)
+and
+.BR ioperm (2));
+.IP \[bu]
+access
+.IR /proc/kcore ;
+.IP \[bu]
+employ the
+.B FIBMAP
+.BR ioctl (2)
+operation;
+.IP \[bu]
+open devices for accessing x86 model-specific registers (MSRs, see
+.BR msr (4));
+.IP \[bu]
+update
+.IR /proc/sys/vm/mmap_min_addr ;
+.IP \[bu]
+create memory mappings at addresses below the value specified by
+.IR /proc/sys/vm/mmap_min_addr ;
+.IP \[bu]
+map files in
+.IR /proc/bus/pci ;
+.IP \[bu]
+open
+.I /dev/mem
+and
+.IR /dev/kmem ;
+.IP \[bu]
+perform various SCSI device commands;
+.IP \[bu]
+perform certain operations on
+.BR hpsa (4)
+and
+.BR cciss (4)
+devices;
+.IP \[bu]
+perform a range of device-specific operations on other devices.
+.RE
+.PD
+.TP
+.B CAP_SYS_RESOURCE
+.PD 0
+.RS
+.IP \[bu] 3
+Use reserved space on ext2 filesystems;
+.IP \[bu]
+make
+.BR ioctl (2)
+calls controlling ext3 journaling;
+.IP \[bu]
+override disk quota limits;
+.IP \[bu]
+increase resource limits (see
+.BR setrlimit (2));
+.IP \[bu]
+override
+.B RLIMIT_NPROC
+resource limit;
+.IP \[bu]
+override maximum number of consoles on console allocation;
+.IP \[bu]
+override maximum number of keymaps;
+.IP \[bu]
+allow more than 64hz interrupts from the real-time clock;
+.IP \[bu]
+raise
+.I msg_qbytes
+limit for a System V message queue above the limit in
+.I /proc/sys/kernel/msgmnb
+(see
+.BR msgop (2)
+and
+.BR msgctl (2));
+.IP \[bu]
+allow the
+.B RLIMIT_NOFILE
+resource limit on the number of "in-flight" file descriptors
+to be bypassed when passing file descriptors to another process
+via a UNIX domain socket (see
+.BR unix (7));
+.IP \[bu]
+override the
+.I /proc/sys/fs/pipe\-size\-max
+limit when setting the capacity of a pipe using the
+.B F_SETPIPE_SZ
+.BR fcntl (2)
+command;
+.IP \[bu]
+use
+.B F_SETPIPE_SZ
+to increase the capacity of a pipe above the limit specified by
+.IR /proc/sys/fs/pipe\-max\-size ;
+.IP \[bu]
+override
+.IR /proc/sys/fs/mqueue/queues_max ,
+.IR /proc/sys/fs/mqueue/msg_max ,
+and
+.I /proc/sys/fs/mqueue/msgsize_max
+limits when creating POSIX message queues (see
+.BR mq_overview (7));
+.IP \[bu]
+employ the
+.BR prctl (2)
+.B PR_SET_MM
+operation;
+.IP \[bu]
+set
+.IR /proc/ pid /oom_score_adj
+to a value lower than the value last set by a process with
+.BR CAP_SYS_RESOURCE .
+.RE
+.PD
+.TP
+.B CAP_SYS_TIME
+Set system clock
+.RB ( settimeofday (2),
+.BR stime (2),
+.BR adjtimex (2));
+set real-time (hardware) clock.
+.TP
+.B CAP_SYS_TTY_CONFIG
+Use
+.BR vhangup (2);
+employ various privileged
+.BR ioctl (2)
+operations on virtual terminals.
+.TP
+.BR CAP_SYSLOG " (since Linux 2.6.37)"
+.RS
+.PD 0
+.IP \[bu] 3
+Perform privileged
+.BR syslog (2)
+operations.
+See
+.BR syslog (2)
+for information on which operations require privilege.
+.IP \[bu]
+View kernel addresses exposed via
+.I /proc
+and other interfaces when
+.I /proc/sys/kernel/kptr_restrict
+has the value 1.
+(See the discussion of the
+.I kptr_restrict
+in
+.BR proc (5).)
+.PD
+.RE
+.TP
+.BR CAP_WAKE_ALARM " (since Linux 3.0)"
+Trigger something that will wake up the system (set
+.B CLOCK_REALTIME_ALARM
+and
+.B CLOCK_BOOTTIME_ALARM
+timers).
+.\"
+.SS Past and current implementation
+A full implementation of capabilities requires that:
+.IP \[bu] 3
+For all privileged operations,
+the kernel must check whether the thread has the required
+capability in its effective set.
+.IP \[bu]
+The kernel must provide system calls allowing a thread's capability sets to
+be changed and retrieved.
+.IP \[bu]
+The filesystem must support attaching capabilities to an executable file,
+so that a process gains those capabilities when the file is executed.
+.PP
+Before Linux 2.6.24, only the first two of these requirements are met;
+since Linux 2.6.24, all three requirements are met.
+.\"
+.SS Notes to kernel developers
+When adding a new kernel feature that should be governed by a capability,
+consider the following points.
+.IP \[bu] 3
+The goal of capabilities is divide the power of superuser into pieces,
+such that if a program that has one or more capabilities is compromised,
+its power to do damage to the system would be less than the same program
+running with root privilege.
+.IP \[bu]
+You have the choice of either creating a new capability for your new feature,
+or associating the feature with one of the existing capabilities.
+In order to keep the set of capabilities to a manageable size,
+the latter option is preferable,
+unless there are compelling reasons to take the former option.
+(There is also a technical limit:
+the size of capability sets is currently limited to 64 bits.)
+.IP \[bu]
+To determine which existing capability might best be associated
+with your new feature, review the list of capabilities above in order
+to find a "silo" into which your new feature best fits.
+One approach to take is to determine if there are other features
+requiring capabilities that will always be used along with the new feature.
+If the new feature is useless without these other features,
+you should use the same capability as the other features.
+.IP \[bu]
+.I Don't
+choose
+.B CAP_SYS_ADMIN
+if you can possibly avoid it!
+A vast proportion of existing capability checks are associated
+with this capability (see the partial list above).
+It can plausibly be called "the new root",
+since on the one hand, it confers a wide range of powers,
+and on the other hand,
+its broad scope means that this is the capability
+that is required by many privileged programs.
+Don't make the problem worse.
+The only new features that should be associated with
+.B CAP_SYS_ADMIN
+are ones that
+.I closely
+match existing uses in that silo.
+.IP \[bu]
+If you have determined that it really is necessary to create
+a new capability for your feature,
+don't make or name it as a "single-use" capability.
+Thus, for example, the addition of the highly specific
+.B CAP_SYS_PACCT
+was probably a mistake.
+Instead, try to identify and name your new capability as a broader
+silo into which other related future use cases might fit.
+.\"
+.SS Thread capability sets
+Each thread has the following capability sets containing zero or more
+of the above capabilities:
+.TP
+.I Permitted
+This is a limiting superset for the effective
+capabilities that the thread may assume.
+It is also a limiting superset for the capabilities that
+may be added to the inheritable set by a thread that does not have the
+.B CAP_SETPCAP
+capability in its effective set.
+.IP
+If a thread drops a capability from its permitted set,
+it can never reacquire that capability (unless it
+.BR execve (2)s
+either a set-user-ID-root program, or
+a program whose associated file capabilities grant that capability).
+.TP
+.I Inheritable
+This is a set of capabilities preserved across an
+.BR execve (2).
+Inheritable capabilities remain inheritable when executing any program,
+and inheritable capabilities are added to the permitted set when executing
+a program that has the corresponding bits set in the file inheritable set.
+.IP
+Because inheritable capabilities are not generally preserved across
+.BR execve (2)
+when running as a non-root user, applications that wish to run helper
+programs with elevated capabilities should consider using
+ambient capabilities, described below.
+.TP
+.I Effective
+This is the set of capabilities used by the kernel to
+perform permission checks for the thread.
+.TP
+.IR Bounding " (per-thread since Linux 2.6.25)"
+The capability bounding set is a mechanism that can be used
+to limit the capabilities that are gained during
+.BR execve (2).
+.IP
+Since Linux 2.6.25, this is a per-thread capability set.
+In older kernels, the capability bounding set was a system wide attribute
+shared by all threads on the system.
+.IP
+For more details, see
+.I Capability bounding set
+below.
+.TP
+.IR Ambient " (since Linux 4.3)"
+.\" commit 58319057b7847667f0c9585b9de0e8932b0fdb08
+This is a set of capabilities that are preserved across an
+.BR execve (2)
+of a program that is not privileged.
+The ambient capability set obeys the invariant that no capability
+can ever be ambient if it is not both permitted and inheritable.
+.IP
+The ambient capability set can be directly modified using
+.BR prctl (2).
+Ambient capabilities are automatically lowered if either of
+the corresponding permitted or inheritable capabilities is lowered.
+.IP
+Executing a program that changes UID or GID due to the
+set-user-ID or set-group-ID bits or executing a program that has
+any file capabilities set will clear the ambient set.
+Ambient capabilities are added to the permitted set and
+assigned to the effective set when
+.BR execve (2)
+is called.
+If ambient capabilities cause a process's permitted and effective
+capabilities to increase during an
+.BR execve (2),
+this does not trigger the secure-execution mode described in
+.BR ld.so (8).
+.PP
+A child created via
+.BR fork (2)
+inherits copies of its parent's capability sets.
+For details on how
+.BR execve (2)
+affects capabilities, see
+.I Transformation of capabilities during execve()
+below.
+.PP
+Using
+.BR capset (2),
+a thread may manipulate its own capability sets; see
+.I Programmatically adjusting capability sets
+below.
+.PP
+Since Linux 3.2, the file
+.I /proc/sys/kernel/cap_last_cap
+.\" commit 73efc0394e148d0e15583e13712637831f926720
+exposes the numerical value of the highest capability
+supported by the running kernel;
+this can be used to determine the highest bit
+that may be set in a capability set.
+.\"
+.SS File capabilities
+Since Linux 2.6.24, the kernel supports
+associating capability sets with an executable file using
+.BR setcap (8).
+The file capability sets are stored in an extended attribute (see
+.BR setxattr (2)
+and
+.BR xattr (7))
+named
+.IR "security.capability" .
+Writing to this extended attribute requires the
+.B CAP_SETFCAP
+capability.
+The file capability sets,
+in conjunction with the capability sets of the thread,
+determine the capabilities of a thread after an
+.BR execve (2).
+.PP
+The three file capability sets are:
+.TP
+.IR Permitted " (formerly known as " forced ):
+These capabilities are automatically permitted to the thread,
+regardless of the thread's inheritable capabilities.
+.TP
+.IR Inheritable " (formerly known as " allowed ):
+This set is ANDed with the thread's inheritable set to determine which
+inheritable capabilities are enabled in the permitted set of
+the thread after the
+.BR execve (2).
+.TP
+.IR Effective :
+This is not a set, but rather just a single bit.
+If this bit is set, then during an
+.BR execve (2)
+all of the new permitted capabilities for the thread are
+also raised in the effective set.
+If this bit is not set, then after an
+.BR execve (2),
+none of the new permitted capabilities is in the new effective set.
+.IP
+Enabling the file effective capability bit implies
+that any file permitted or inheritable capability that causes a
+thread to acquire the corresponding permitted capability during an
+.BR execve (2)
+(see
+.I Transformation of capabilities during execve()
+below) will also acquire that
+capability in its effective set.
+Therefore, when assigning capabilities to a file
+.RB ( setcap (8),
+.BR cap_set_file (3),
+.BR cap_set_fd (3)),
+if we specify the effective flag as being enabled for any capability,
+then the effective flag must also be specified as enabled
+for all other capabilities for which the corresponding permitted or
+inheritable flag is enabled.
+.\"
+.SS File capability extended attribute versioning
+To allow extensibility,
+the kernel supports a scheme to encode a version number inside the
+.I security.capability
+extended attribute that is used to implement file capabilities.
+These version numbers are internal to the implementation,
+and not directly visible to user-space applications.
+To date, the following versions are supported:
+.TP
+.B VFS_CAP_REVISION_1
+This was the original file capability implementation,
+which supported 32-bit masks for file capabilities.
+.TP
+.BR VFS_CAP_REVISION_2 " (since Linux 2.6.25)"
+.\" commit e338d263a76af78fe8f38a72131188b58fceb591
+This version allows for file capability masks that are 64 bits in size,
+and was necessary as the number of supported capabilities grew beyond 32.
+The kernel transparently continues to support the execution of files
+that have 32-bit version 1 capability masks,
+but when adding capabilities to files that did not previously
+have capabilities, or modifying the capabilities of existing files,
+it automatically uses the version 2 scheme
+(or possibly the version 3 scheme, as described below).
+.TP
+.BR VFS_CAP_REVISION_3 " (since Linux 4.14)"
+.\" commit 8db6c34f1dbc8e06aa016a9b829b06902c3e1340
+Version 3 file capabilities are provided
+to support namespaced file capabilities (described below).
+.IP
+As with version 2 file capabilities,
+version 3 capability masks are 64 bits in size.
+But in addition, the root user ID of namespace is encoded in the
+.I security.capability
+extended attribute.
+(A namespace's root user ID is the value that user ID 0
+inside that namespace maps to in the initial user namespace.)
+.IP
+Version 3 file capabilities are designed to coexist
+with version 2 capabilities;
+that is, on a modern Linux system,
+there may be some files with version 2 capabilities
+while others have version 3 capabilities.
+.PP
+Before Linux 4.14,
+the only kind of file capability extended attribute
+that could be attached to a file was a
+.B VFS_CAP_REVISION_2
+attribute.
+Since Linux 4.14,
+the version of the
+.I security.capability
+extended attribute that is attached to a file
+depends on the circumstances in which the attribute was created.
+.PP
+Starting with Linux 4.14, a
+.I security.capability
+extended attribute is automatically created as (or converted to)
+a version 3
+.RB ( VFS_CAP_REVISION_3 )
+attribute if both of the following are true:
+.IP \[bu] 3
+The thread writing the attribute resides in a noninitial user namespace.
+(More precisely: the thread resides in a user namespace other
+than the one from which the underlying filesystem was mounted.)
+.IP \[bu]
+The thread has the
+.B CAP_SETFCAP
+capability over the file inode,
+meaning that (a) the thread has the
+.B CAP_SETFCAP
+capability in its own user namespace;
+and (b) the UID and GID of the file inode have mappings in
+the writer's user namespace.
+.PP
+When a
+.B VFS_CAP_REVISION_3
+.I security.capability
+extended attribute is created, the root user ID of the creating thread's
+user namespace is saved in the extended attribute.
+.PP
+By contrast, creating or modifying a
+.I security.capability
+extended attribute from a privileged
+.RB ( CAP_SETFCAP )
+thread that resides in the
+namespace where the underlying filesystem was mounted
+(this normally means the initial user namespace)
+automatically results in the creation of a version 2
+.RB ( VFS_CAP_REVISION_2 )
+attribute.
+.PP
+Note that the creation of a version 3
+.I security.capability
+extended attribute is automatic.
+That is to say, when a user-space application writes
+.RB ( setxattr (2))
+a
+.I security.capability
+attribute in the version 2 format,
+the kernel will automatically create a version 3 attribute
+if the attribute is created in the circumstances described above.
+Correspondingly, when a version 3
+.I security.capability
+attribute is retrieved
+.RB ( getxattr (2))
+by a process that resides inside a user namespace that was created by the
+root user ID (or a descendant of that user namespace),
+the returned attribute is (automatically)
+simplified to appear as a version 2 attribute
+(i.e., the returned value is the size of a version 2 attribute and does
+not include the root user ID).
+These automatic translations mean that no changes are required to
+user-space tools (e.g.,
+.BR setcap (1)
+and
+.BR getcap (1))
+in order for those tools to be used to create and retrieve version 3
+.I security.capability
+attributes.
+.PP
+Note that a file can have either a version 2 or a version 3
+.I security.capability
+extended attribute associated with it, but not both:
+creation or modification of the
+.I security.capability
+extended attribute will automatically modify the version
+according to the circumstances in which the extended attribute is
+created or modified.
+.\"
+.SS Transformation of capabilities during execve()
+During an
+.BR execve (2),
+the kernel calculates the new capabilities of
+the process using the following algorithm:
+.PP
+.in +4n
+.EX
+P'(ambient) = (file is privileged) ? 0 : P(ambient)
+\&
+P'(permitted) = (P(inheritable) & F(inheritable)) |
+ (F(permitted) & P(bounding)) | P'(ambient)
+\&
+P'(effective) = F(effective) ? P'(permitted) : P'(ambient)
+\&
+P'(inheritable) = P(inheritable) [i.e., unchanged]
+\&
+P'(bounding) = P(bounding) [i.e., unchanged]
+.EE
+.in
+.PP
+where:
+.RS 4
+.TP
+P()
+denotes the value of a thread capability set before the
+.BR execve (2)
+.TP
+P'()
+denotes the value of a thread capability set after the
+.BR execve (2)
+.TP
+F()
+denotes a file capability set
+.RE
+.PP
+Note the following details relating to the above capability
+transformation rules:
+.IP \[bu] 3
+The ambient capability set is present only since Linux 4.3.
+When determining the transformation of the ambient set during
+.BR execve (2),
+a privileged file is one that has capabilities or
+has the set-user-ID or set-group-ID bit set.
+.IP \[bu]
+Prior to Linux 2.6.25,
+the bounding set was a system-wide attribute shared by all threads.
+That system-wide value was employed to calculate the new permitted set during
+.BR execve (2)
+in the same manner as shown above for
+.IR P(bounding) .
+.PP
+.IR Note :
+during the capability transitions described above,
+file capabilities may be ignored (treated as empty) for the same reasons
+that the set-user-ID and set-group-ID bits are ignored; see
+.BR execve (2).
+File capabilities are similarly ignored if the kernel was booted with the
+.I no_file_caps
+option.
+.PP
+.IR Note :
+according to the rules above,
+if a process with nonzero user IDs performs an
+.BR execve (2)
+then any capabilities that are present in
+its permitted and effective sets will be cleared.
+For the treatment of capabilities when a process with a
+user ID of zero performs an
+.BR execve (2),
+see
+.I Capabilities and execution of programs by root
+below.
+.\"
+.SS Safety checking for capability-dumb binaries
+A capability-dumb binary is an application that has been
+marked to have file capabilities, but has not been converted to use the
+.BR libcap (3)
+API to manipulate its capabilities.
+(In other words, this is a traditional set-user-ID-root program
+that has been switched to use file capabilities,
+but whose code has not been modified to understand capabilities.)
+For such applications,
+the effective capability bit is set on the file,
+so that the file permitted capabilities are automatically
+enabled in the process effective set when executing the file.
+The kernel recognizes a file which has the effective capability bit set
+as capability-dumb for the purpose of the check described here.
+.PP
+When executing a capability-dumb binary,
+the kernel checks if the process obtained all permitted capabilities
+that were specified in the file permitted set,
+after the capability transformations described above have been performed.
+(The typical reason why this might
+.I not
+occur is that the capability bounding set masked out some
+of the capabilities in the file permitted set.)
+If the process did not obtain the full set of
+file permitted capabilities, then
+.BR execve (2)
+fails with the error
+.BR EPERM .
+This prevents possible security risks that could arise when
+a capability-dumb application is executed with less privilege than it needs.
+Note that, by definition,
+the application could not itself recognize this problem,
+since it does not employ the
+.BR libcap (3)
+API.
+.\"
+.SS Capabilities and execution of programs by root
+.\" See cap_bprm_set_creds(), bprm_caps_from_vfs_cap() and
+.\" handle_privileged_root() in security/commoncap.c (Linux 5.0 source)
+In order to mirror traditional UNIX semantics,
+the kernel performs special treatment of file capabilities when
+a process with UID 0 (root) executes a program and
+when a set-user-ID-root program is executed.
+.PP
+After having performed any changes to the process effective ID that
+were triggered by the set-user-ID mode bit of the binary\[em]e.g.,
+switching the effective user ID to 0 (root) because
+a set-user-ID-root program was executed\[em]the
+kernel calculates the file capability sets as follows:
+.IP (1) 5
+If the real or effective user ID of the process is 0 (root),
+then the file inheritable and permitted sets are ignored;
+instead they are notionally considered to be all ones
+(i.e., all capabilities enabled).
+(There is one exception to this behavior, described in
+.I Set-user-ID-root programs that have file capabilities
+below.)
+.IP (2)
+If the effective user ID of the process is 0 (root) or
+the file effective bit is in fact enabled,
+then the file effective bit is notionally defined to be one (enabled).
+.PP
+These notional values for the file's capability sets are then used
+as described above to calculate the transformation of the process's
+capabilities during
+.BR execve (2).
+.PP
+Thus, when a process with nonzero UIDs
+.BR execve (2)s
+a set-user-ID-root program that does not have capabilities attached,
+or when a process whose real and effective UIDs are zero
+.BR execve (2)s
+a program, the calculation of the process's new
+permitted capabilities simplifies to:
+.PP
+.in +4n
+.EX
+P'(permitted) = P(inheritable) | P(bounding)
+\&
+P'(effective) = P'(permitted)
+.EE
+.in
+.PP
+Consequently, the process gains all capabilities in its permitted and
+effective capability sets,
+except those masked out by the capability bounding set.
+(In the calculation of P'(permitted),
+the P'(ambient) term can be simplified away because it is by
+definition a proper subset of P(inheritable).)
+.PP
+The special treatments of user ID 0 (root) described in this subsection
+can be disabled using the securebits mechanism described below.
+.\"
+.\"
+.SS Set-user-ID-root programs that have file capabilities
+There is one exception to the behavior described in
+.I Capabilities and execution of programs by root
+above.
+If (a) the binary that is being executed has capabilities attached and
+(b) the real user ID of the process is
+.I not
+0 (root) and
+(c) the effective user ID of the process
+.I is
+0 (root), then the file capability bits are honored
+(i.e., they are not notionally considered to be all ones).
+The usual way in which this situation can arise is when executing
+a set-UID-root program that also has file capabilities.
+When such a program is executed,
+the process gains just the capabilities granted by the program
+(i.e., not all capabilities,
+as would occur when executing a set-user-ID-root program
+that does not have any associated file capabilities).
+.PP
+Note that one can assign empty capability sets to a program file,
+and thus it is possible to create a set-user-ID-root program that
+changes the effective and saved set-user-ID of the process
+that executes the program to 0,
+but confers no capabilities to that process.
+.\"
+.SS Capability bounding set
+The capability bounding set is a security mechanism that can be used
+to limit the capabilities that can be gained during an
+.BR execve (2).
+The bounding set is used in the following ways:
+.IP \[bu] 3
+During an
+.BR execve (2),
+the capability bounding set is ANDed with the file permitted
+capability set, and the result of this operation is assigned to the
+thread's permitted capability set.
+The capability bounding set thus places a limit on the permitted
+capabilities that may be granted by an executable file.
+.IP \[bu]
+(Since Linux 2.6.25)
+The capability bounding set acts as a limiting superset for
+the capabilities that a thread can add to its inheritable set using
+.BR capset (2).
+This means that if a capability is not in the bounding set,
+then a thread can't add this capability to its
+inheritable set, even if it was in its permitted capabilities,
+and thereby cannot have this capability preserved in its
+permitted set when it
+.BR execve (2)s
+a file that has the capability in its inheritable set.
+.PP
+Note that the bounding set masks the file permitted capabilities,
+but not the inheritable capabilities.
+If a thread maintains a capability in its inheritable set
+that is not in its bounding set,
+then it can still gain that capability in its permitted set
+by executing a file that has the capability in its inheritable set.
+.PP
+Depending on the kernel version, the capability bounding set is either
+a system-wide attribute, or a per-process attribute.
+.PP
+.B "Capability bounding set from Linux 2.6.25 onward"
+.PP
+From Linux 2.6.25, the
+.I "capability bounding set"
+is a per-thread attribute.
+(The system-wide capability bounding set described below no longer exists.)
+.PP
+The bounding set is inherited at
+.BR fork (2)
+from the thread's parent, and is preserved across an
+.BR execve (2).
+.PP
+A thread may remove capabilities from its capability bounding set using the
+.BR prctl (2)
+.B PR_CAPBSET_DROP
+operation, provided it has the
+.B CAP_SETPCAP
+capability.
+Once a capability has been dropped from the bounding set,
+it cannot be restored to that set.
+A thread can determine if a capability is in its bounding set using the
+.BR prctl (2)
+.B PR_CAPBSET_READ
+operation.
+.PP
+Removing capabilities from the bounding set is supported only if file
+capabilities are compiled into the kernel.
+Before Linux 2.6.33,
+file capabilities were an optional feature configurable via the
+.B CONFIG_SECURITY_FILE_CAPABILITIES
+option.
+Since Linux 2.6.33,
+.\" commit b3a222e52e4d4be77cc4520a57af1a4a0d8222d1
+the configuration option has been removed
+and file capabilities are always part of the kernel.
+When file capabilities are compiled into the kernel, the
+.B init
+process (the ancestor of all processes) begins with a full bounding set.
+If file capabilities are not compiled into the kernel, then
+.B init
+begins with a full bounding set minus
+.BR CAP_SETPCAP ,
+because this capability has a different meaning when there are
+no file capabilities.
+.PP
+Removing a capability from the bounding set does not remove it
+from the thread's inheritable set.
+However it does prevent the capability from being added
+back into the thread's inheritable set in the future.
+.PP
+.B "Capability bounding set prior to Linux 2.6.25"
+.PP
+Before Linux 2.6.25, the capability bounding set is a system-wide
+attribute that affects all threads on the system.
+The bounding set is accessible via the file
+.IR /proc/sys/kernel/cap\-bound .
+(Confusingly, this bit mask parameter is expressed as a
+signed decimal number in
+.IR /proc/sys/kernel/cap\-bound .)
+.PP
+Only the
+.B init
+process may set capabilities in the capability bounding set;
+other than that, the superuser (more precisely: a process with the
+.B CAP_SYS_MODULE
+capability) may only clear capabilities from this set.
+.PP
+On a standard system the capability bounding set always masks out the
+.B CAP_SETPCAP
+capability.
+To remove this restriction (dangerous!), modify the definition of
+.B CAP_INIT_EFF_SET
+in
+.I include/linux/capability.h
+and rebuild the kernel.
+.PP
+The system-wide capability bounding set feature was added
+to Linux 2.2.11.
+.\"
+.\"
+.\"
+.SS Effect of user ID changes on capabilities
+To preserve the traditional semantics for transitions between
+0 and nonzero user IDs,
+the kernel makes the following changes to a thread's capability
+sets on changes to the thread's real, effective, saved set,
+and filesystem user IDs (using
+.BR setuid (2),
+.BR setresuid (2),
+or similar):
+.IP \[bu] 3
+If one or more of the real, effective, or saved set user IDs
+was previously 0, and as a result of the UID changes all of these IDs
+have a nonzero value,
+then all capabilities are cleared from the permitted, effective, and ambient
+capability sets.
+.IP \[bu]
+If the effective user ID is changed from 0 to nonzero,
+then all capabilities are cleared from the effective set.
+.IP \[bu]
+If the effective user ID is changed from nonzero to 0,
+then the permitted set is copied to the effective set.
+.IP \[bu]
+If the filesystem user ID is changed from 0 to nonzero (see
+.BR setfsuid (2)),
+then the following capabilities are cleared from the effective set:
+.BR CAP_CHOWN ,
+.BR CAP_DAC_OVERRIDE ,
+.BR CAP_DAC_READ_SEARCH ,
+.BR CAP_FOWNER ,
+.BR CAP_FSETID ,
+.B CAP_LINUX_IMMUTABLE
+(since Linux 2.6.30),
+.BR CAP_MAC_OVERRIDE ,
+and
+.B CAP_MKNOD
+(since Linux 2.6.30).
+If the filesystem UID is changed from nonzero to 0,
+then any of these capabilities that are enabled in the permitted set
+are enabled in the effective set.
+.PP
+If a thread that has a 0 value for one or more of its user IDs wants
+to prevent its permitted capability set being cleared when it resets
+all of its user IDs to nonzero values, it can do so using the
+.B SECBIT_KEEP_CAPS
+securebits flag described below.
+.\"
+.SS Programmatically adjusting capability sets
+A thread can retrieve and change its permitted, effective, and inheritable
+capability sets using the
+.BR capget (2)
+and
+.BR capset (2)
+system calls.
+However, the use of
+.BR cap_get_proc (3)
+and
+.BR cap_set_proc (3),
+both provided in the
+.I libcap
+package,
+is preferred for this purpose.
+The following rules govern changes to the thread capability sets:
+.IP \[bu] 3
+If the caller does not have the
+.B CAP_SETPCAP
+capability,
+the new inheritable set must be a subset of the combination
+of the existing inheritable and permitted sets.
+.IP \[bu]
+(Since Linux 2.6.25)
+The new inheritable set must be a subset of the combination of the
+existing inheritable set and the capability bounding set.
+.IP \[bu]
+The new permitted set must be a subset of the existing permitted set
+(i.e., it is not possible to acquire permitted capabilities
+that the thread does not currently have).
+.IP \[bu]
+The new effective set must be a subset of the new permitted set.
+.SS The securebits flags: establishing a capabilities-only environment
+.\" For some background:
+.\" see http://lwn.net/Articles/280279/ and
+.\" http://article.gmane.org/gmane.linux.kernel.lsm/5476/
+Starting with Linux 2.6.26,
+and with a kernel in which file capabilities are enabled,
+Linux implements a set of per-thread
+.I securebits
+flags that can be used to disable special handling of capabilities for UID 0
+.RI ( root ).
+These flags are as follows:
+.TP
+.B SECBIT_KEEP_CAPS
+Setting this flag allows a thread that has one or more 0 UIDs to retain
+capabilities in its permitted set
+when it switches all of its UIDs to nonzero values.
+If this flag is not set,
+then such a UID switch causes the thread to lose all permitted capabilities.
+This flag is always cleared on an
+.BR execve (2).
+.IP
+Note that even with the
+.B SECBIT_KEEP_CAPS
+flag set, the effective capabilities of a thread are cleared when it
+switches its effective UID to a nonzero value.
+However,
+if the thread has set this flag and its effective UID is already nonzero,
+and the thread subsequently switches all other UIDs to nonzero values,
+then the effective capabilities will not be cleared.
+.IP
+The setting of the
+.B SECBIT_KEEP_CAPS
+flag is ignored if the
+.B SECBIT_NO_SETUID_FIXUP
+flag is set.
+(The latter flag provides a superset of the effect of the former flag.)
+.IP
+This flag provides the same functionality as the older
+.BR prctl (2)
+.B PR_SET_KEEPCAPS
+operation.
+.TP
+.B SECBIT_NO_SETUID_FIXUP
+Setting this flag stops the kernel from adjusting the process's
+permitted, effective, and ambient capability sets when
+the thread's effective and filesystem UIDs are switched between
+zero and nonzero values.
+See
+.I Effect of user ID changes on capabilities
+above.
+.TP
+.B SECBIT_NOROOT
+If this bit is set, then the kernel does not grant capabilities
+when a set-user-ID-root program is executed, or when a process with
+an effective or real UID of 0 calls
+.BR execve (2).
+(See
+.I Capabilities and execution of programs by root
+above.)
+.TP
+.B SECBIT_NO_CAP_AMBIENT_RAISE
+Setting this flag disallows raising ambient capabilities via the
+.BR prctl (2)
+.B PR_CAP_AMBIENT_RAISE
+operation.
+.PP
+Each of the above "base" flags has a companion "locked" flag.
+Setting any of the "locked" flags is irreversible,
+and has the effect of preventing further changes to the
+corresponding "base" flag.
+The locked flags are:
+.BR SECBIT_KEEP_CAPS_LOCKED ,
+.BR SECBIT_NO_SETUID_FIXUP_LOCKED ,
+.BR SECBIT_NOROOT_LOCKED ,
+and
+.BR SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED .
+.PP
+The
+.I securebits
+flags can be modified and retrieved using the
+.BR prctl (2)
+.B PR_SET_SECUREBITS
+and
+.B PR_GET_SECUREBITS
+operations.
+The
+.B CAP_SETPCAP
+capability is required to modify the flags.
+Note that the
+.B SECBIT_*
+constants are available only after including the
+.I <linux/securebits.h>
+header file.
+.PP
+The
+.I securebits
+flags are inherited by child processes.
+During an
+.BR execve (2),
+all of the flags are preserved, except
+.B SECBIT_KEEP_CAPS
+which is always cleared.
+.PP
+An application can use the following call to lock itself,
+and all of its descendants,
+into an environment where the only way of gaining capabilities
+is by executing a program with associated file capabilities:
+.PP
+.in +4n
+.EX
+prctl(PR_SET_SECUREBITS,
+ /* SECBIT_KEEP_CAPS off */
+ SECBIT_KEEP_CAPS_LOCKED |
+ SECBIT_NO_SETUID_FIXUP |
+ SECBIT_NO_SETUID_FIXUP_LOCKED |
+ SECBIT_NOROOT |
+ SECBIT_NOROOT_LOCKED);
+ /* Setting/locking SECBIT_NO_CAP_AMBIENT_RAISE
+ is not required */
+.EE
+.in
+.\"
+.\"
+.SS Per-user-namespace """set-user-ID-root""" programs
+A set-user-ID program whose UID matches the UID that
+created a user namespace will confer capabilities
+in the process's permitted and effective sets
+when executed by any process inside that namespace
+or any descendant user namespace.
+.PP
+The rules about the transformation of the process's capabilities during the
+.BR execve (2)
+are exactly as described in
+.I Transformation of capabilities during execve()
+and
+.I Capabilities and execution of programs by root
+above,
+with the difference that, in the latter subsection, "root"
+is the UID of the creator of the user namespace.
+.\"
+.\"
+.SS Namespaced file capabilities
+.\" commit 8db6c34f1dbc8e06aa016a9b829b06902c3e1340
+Traditional (i.e., version 2) file capabilities associate
+only a set of capability masks with a binary executable file.
+When a process executes a binary with such capabilities,
+it gains the associated capabilities (within its user namespace)
+as per the rules described in
+.I Transformation of capabilities during execve()
+above.
+.PP
+Because version 2 file capabilities confer capabilities to
+the executing process regardless of which user namespace it resides in,
+only privileged processes are permitted to associate capabilities with a file.
+Here, "privileged" means a process that has the
+.B CAP_SETFCAP
+capability in the user namespace where the filesystem was mounted
+(normally the initial user namespace).
+This limitation renders file capabilities useless for certain use cases.
+For example, in user-namespaced containers,
+it can be desirable to be able to create a binary that
+confers capabilities only to processes executed inside that container,
+but not to processes that are executed outside the container.
+.PP
+Linux 4.14 added so-called namespaced file capabilities
+to support such use cases.
+Namespaced file capabilities are recorded as version 3 (i.e.,
+.BR VFS_CAP_REVISION_3 )
+.I security.capability
+extended attributes.
+Such an attribute is automatically created in the circumstances described
+in
+.I File capability extended attribute versioning
+above.
+When a version 3
+.I security.capability
+extended attribute is created,
+the kernel records not just the capability masks in the extended attribute,
+but also the namespace root user ID.
+.PP
+As with a binary that has
+.B VFS_CAP_REVISION_2
+file capabilities, a binary with
+.B VFS_CAP_REVISION_3
+file capabilities confers capabilities to a process during
+.BR execve ().
+However, capabilities are conferred only if the binary is executed by
+a process that resides in a user namespace whose
+UID 0 maps to the root user ID that is saved in the extended attribute,
+or when executed by a process that resides in a descendant of such a namespace.
+.\"
+.\"
+.SS Interaction with user namespaces
+For further information on the interaction of
+capabilities and user namespaces, see
+.BR user_namespaces (7).
+.SH STANDARDS
+No standards govern capabilities, but the Linux capability implementation
+is based on the withdrawn
+.UR https://archive.org\:/details\:/posix_1003.1e\-990310
+POSIX.1e draft standard
+.UE .
+.SH NOTES
+When attempting to
+.BR strace (1)
+binaries that have capabilities (or set-user-ID-root binaries),
+you may find the
+.I \-u <username>
+option useful.
+Something like:
+.PP
+.in +4n
+.EX
+$ \fBsudo strace \-o trace.log \-u ceci ./myprivprog\fP
+.EE
+.in
+.PP
+From Linux 2.5.27 to Linux 2.6.26,
+.\" commit 5915eb53861c5776cfec33ca4fcc1fd20d66dd27 removed
+.\" CONFIG_SECURITY_CAPABILITIES
+capabilities were an optional kernel component,
+and could be enabled/disabled via the
+.B CONFIG_SECURITY_CAPABILITIES
+kernel configuration option.
+.PP
+The
+.IR /proc/ pid /task/TID/status
+file can be used to view the capability sets of a thread.
+The
+.IR /proc/ pid /status
+file shows the capability sets of a process's main thread.
+Before Linux 3.8, nonexistent capabilities were shown as being
+enabled (1) in these sets.
+Since Linux 3.8,
+.\" 7b9a7ec565505699f503b4fcf61500dceb36e744
+all nonexistent capabilities (above
+.BR CAP_LAST_CAP )
+are shown as disabled (0).
+.PP
+The
+.I libcap
+package provides a suite of routines for setting and
+getting capabilities that is more comfortable and less likely
+to change than the interface provided by
+.BR capset (2)
+and
+.BR capget (2).
+This package also provides the
+.BR setcap (8)
+and
+.BR getcap (8)
+programs.
+It can be found at
+.br
+.UR https://git.kernel.org\:/pub\:/scm\:/libs\:/libcap\:/libcap.git\:/refs/
+.UE .
+.PP
+Before Linux 2.6.24, and from Linux 2.6.24 to Linux 2.6.32 if
+file capabilities are not enabled, a thread with the
+.B CAP_SETPCAP
+capability can manipulate the capabilities of threads other than itself.
+However, this is only theoretically possible,
+since no thread ever has
+.B CAP_SETPCAP
+in either of these cases:
+.IP \[bu] 3
+In the pre-2.6.25 implementation the system-wide capability bounding set,
+.IR /proc/sys/kernel/cap\-bound ,
+always masks out the
+.B CAP_SETPCAP
+capability, and this can not be changed
+without modifying the kernel source and rebuilding the kernel.
+.IP \[bu]
+If file capabilities are disabled (i.e., the kernel
+.B CONFIG_SECURITY_FILE_CAPABILITIES
+option is disabled), then
+.B init
+starts out with the
+.B CAP_SETPCAP
+capability removed from its per-process bounding
+set, and that bounding set is inherited by all other processes
+created on the system.
+.SH SEE ALSO
+.BR capsh (1),
+.BR setpriv (1),
+.BR prctl (2),
+.BR setfsuid (2),
+.BR cap_clear (3),
+.BR cap_copy_ext (3),
+.BR cap_from_text (3),
+.BR cap_get_file (3),
+.BR cap_get_proc (3),
+.BR cap_init (3),
+.BR capgetp (3),
+.BR capsetp (3),
+.BR libcap (3),
+.BR proc (5),
+.BR credentials (7),
+.BR pthreads (7),
+.BR user_namespaces (7),
+.BR captest (8), \" from libcap-ng
+.BR filecap (8), \" from libcap-ng
+.BR getcap (8),
+.BR getpcaps (8),
+.BR netcap (8), \" from libcap-ng
+.BR pscap (8), \" from libcap-ng
+.BR setcap (8)
+.PP
+.I include/linux/capability.h
+in the Linux kernel source tree
diff --git a/man7/cgroup_namespaces.7 b/man7/cgroup_namespaces.7
new file mode 100644
index 0000000..c1162fe
--- /dev/null
+++ b/man7/cgroup_namespaces.7
@@ -0,0 +1,248 @@
+.\" Copyright (c) 2016 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH cgroup_namespaces 7 2023-03-30 "Linux man-pages 6.05.01"
+.SH NAME
+cgroup_namespaces \- overview of Linux cgroup namespaces
+.SH DESCRIPTION
+For an overview of namespaces, see
+.BR namespaces (7).
+.PP
+Cgroup namespaces virtualize the view of a process's cgroups (see
+.BR cgroups (7))
+as seen via
+.IR /proc/ pid /cgroup
+and
+.IR /proc/ pid /mountinfo .
+.PP
+Each cgroup namespace has its own set of cgroup root directories.
+These root directories are the base points for the relative
+locations displayed in the corresponding records in the
+.IR /proc/ pid /cgroup
+file.
+When a process creates a new cgroup namespace using
+.BR clone (2)
+or
+.BR unshare (2)
+with the
+.B CLONE_NEWCGROUP
+flag, its current
+cgroups directories become the cgroup root directories
+of the new namespace.
+(This applies both for the cgroups version 1 hierarchies
+and the cgroups version 2 unified hierarchy.)
+.PP
+When reading the cgroup memberships of a "target" process from
+.IR /proc/ pid /cgroup ,
+the pathname shown in the third field of each record will be
+relative to the reading process's root directory
+for the corresponding cgroup hierarchy.
+If the cgroup directory of the target process lies outside
+the root directory of the reading process's cgroup namespace,
+then the pathname will show
+.I ../
+entries for each ancestor level in the cgroup hierarchy.
+.PP
+The following shell session demonstrates the effect of creating
+a new cgroup namespace.
+.PP
+First, (as superuser) in a shell in the initial cgroup namespace,
+we create a child cgroup in the
+.I freezer
+hierarchy, and place a process in that cgroup that we will
+use as part of the demonstration below:
+.PP
+.in +4n
+.EX
+# \fBmkdir \-p /sys/fs/cgroup/freezer/sub2\fP
+# \fBsleep 10000 &\fP # Create a process that lives for a while
+[1] 20124
+# \fBecho 20124 > /sys/fs/cgroup/freezer/sub2/cgroup.procs\fP
+.EE
+.in
+.PP
+We then create another child cgroup in the
+.I freezer
+hierarchy and put the shell into that cgroup:
+.PP
+.in +4n
+.EX
+# \fBmkdir \-p /sys/fs/cgroup/freezer/sub\fP
+# \fBecho $$\fP # Show PID of this shell
+30655
+# \fBecho 30655 > /sys/fs/cgroup/freezer/sub/cgroup.procs\fP
+# \fBcat /proc/self/cgroup | grep freezer\fP
+7:freezer:/sub
+.EE
+.in
+.PP
+Next, we use
+.BR unshare (1)
+to create a process running a new shell in new cgroup and mount namespaces:
+.PP
+.in +4n
+.EX
+# \fBPS1="sh2# " unshare \-Cm bash\fP
+.EE
+.in
+.PP
+From the new shell started by
+.BR unshare (1),
+we then inspect the
+.IR /proc/ pid /cgroup
+files of, respectively, the new shell,
+a process that is in the initial cgroup namespace
+.RI ( init ,
+with PID 1), and the process in the sibling cgroup
+.RI ( sub2 ):
+.PP
+.in +4n
+.EX
+sh2# \fBcat /proc/self/cgroup | grep freezer\fP
+7:freezer:/
+sh2# \fBcat /proc/1/cgroup | grep freezer\fP
+7:freezer:/..
+sh2# \fBcat /proc/20124/cgroup | grep freezer\fP
+7:freezer:/../sub2
+.EE
+.in
+.PP
+From the output of the first command,
+we see that the freezer cgroup membership of the new shell
+(which is in the same cgroup as the initial shell)
+is shown defined relative to the freezer cgroup root directory
+that was established when the new cgroup namespace was created.
+(In absolute terms,
+the new shell is in the
+.I /sub
+freezer cgroup,
+and the root directory of the freezer cgroup hierarchy
+in the new cgroup namespace is also
+.IR /sub .
+Thus, the new shell's cgroup membership is displayed as \[aq]/\[aq].)
+.PP
+However, when we look in
+.I /proc/self/mountinfo
+we see the following anomaly:
+.PP
+.in +4n
+.EX
+sh2# \fBcat /proc/self/mountinfo | grep freezer\fP
+155 145 0:32 /.. /sys/fs/cgroup/freezer ...
+.EE
+.in
+.PP
+The fourth field of this line
+.RI ( /.. )
+should show the
+directory in the cgroup filesystem which forms the root of this mount.
+Since by the definition of cgroup namespaces, the process's current
+freezer cgroup directory became its root freezer cgroup directory,
+we should see \[aq]/\[aq] in this field.
+The problem here is that we are seeing a mount entry for the cgroup
+filesystem corresponding to the initial cgroup namespace
+(whose cgroup filesystem is indeed rooted at the parent directory of
+.IR sub ).
+To fix this problem, we must remount the freezer cgroup filesystem
+from the new shell (i.e., perform the mount from a process that is in the
+new cgroup namespace), after which we see the expected results:
+.PP
+.in +4n
+.EX
+sh2# \fBmount \-\-make\-rslave /\fP # Don\[aq]t propagate mount events
+ # to other namespaces
+sh2# \fBumount /sys/fs/cgroup/freezer\fP
+sh2# \fBmount \-t cgroup \-o freezer freezer /sys/fs/cgroup/freezer\fP
+sh2# \fBcat /proc/self/mountinfo | grep freezer\fP
+155 145 0:32 / /sys/fs/cgroup/freezer rw,relatime ...
+.EE
+.in
+.\"
+.SH STANDARDS
+Linux.
+.SH NOTES
+Use of cgroup namespaces requires a kernel that is configured with the
+.B CONFIG_CGROUPS
+option.
+.PP
+The virtualization provided by cgroup namespaces serves a number of purposes:
+.IP \[bu] 3
+It prevents information leaks whereby cgroup directory paths outside of
+a container would otherwise be visible to processes in the container.
+Such leakages could, for example,
+reveal information about the container framework
+to containerized applications.
+.IP \[bu]
+It eases tasks such as container migration.
+The virtualization provided by cgroup namespaces
+allows containers to be isolated from knowledge of
+the pathnames of ancestor cgroups.
+Without such isolation, the full cgroup pathnames (displayed in
+.IR /proc/self/cgroups )
+would need to be replicated on the target system when migrating a container;
+those pathnames would also need to be unique,
+so that they don't conflict with other pathnames on the target system.
+.IP \[bu]
+It allows better confinement of containerized processes,
+because it is possible to mount the container's cgroup filesystems such that
+the container processes can't gain access to ancestor cgroup directories.
+Consider, for example, the following scenario:
+.RS
+.IP \[bu] 3
+We have a cgroup directory,
+.IR /cg/1 ,
+that is owned by user ID 9000.
+.IP \[bu]
+We have a process,
+.IR X ,
+also owned by user ID 9000,
+that is namespaced under the cgroup
+.I /cg/1/2
+(i.e.,
+.I X
+was placed in a new cgroup namespace via
+.BR clone (2)
+or
+.BR unshare (2)
+with the
+.B CLONE_NEWCGROUP
+flag).
+.RE
+.IP
+In the absence of cgroup namespacing, because the cgroup directory
+.I /cg/1
+is owned (and writable) by UID 9000 and process
+.I X
+is also owned by user ID 9000, process
+.I X
+would be able to modify the contents of cgroups files
+(i.e., change cgroup settings) not only in
+.I /cg/1/2
+but also in the ancestor cgroup directory
+.IR /cg/1 .
+Namespacing process
+.I X
+under the cgroup directory
+.IR /cg/1/2 ,
+in combination with suitable mount operations
+for the cgroup filesystem (as shown above),
+prevents it modifying files in
+.IR /cg/1 ,
+since it cannot even see the contents of that directory
+(or of further removed cgroup ancestor directories).
+Combined with correct enforcement of hierarchical limits,
+this prevents process
+.I X
+from escaping the limits imposed by ancestor cgroups.
+.SH SEE ALSO
+.BR unshare (1),
+.BR clone (2),
+.BR setns (2),
+.BR unshare (2),
+.BR proc (5),
+.BR cgroups (7),
+.BR credentials (7),
+.BR namespaces (7),
+.BR user_namespaces (7)
diff --git a/man7/cgroups.7 b/man7/cgroups.7
new file mode 100644
index 0000000..c070ca7
--- /dev/null
+++ b/man7/cgroups.7
@@ -0,0 +1,1914 @@
+.\" Copyright (C) 2015 Serge Hallyn <serge@hallyn.com>
+.\" and Copyright (C) 2016, 2017 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH cgroups 7 2023-04-03 "Linux man-pages 6.05.01"
+.SH NAME
+cgroups \- Linux control groups
+.SH DESCRIPTION
+Control groups, usually referred to as cgroups,
+are a Linux kernel feature which allow processes to
+be organized into hierarchical groups whose usage of
+various types of resources can then be limited and monitored.
+The kernel's cgroup interface is provided through
+a pseudo-filesystem called cgroupfs.
+Grouping is implemented in the core cgroup kernel code,
+while resource tracking and limits are implemented in
+a set of per-resource-type subsystems (memory, CPU, and so on).
+.\"
+.SS Terminology
+A
+.I cgroup
+is a collection of processes that are bound to a set of
+limits or parameters defined via the cgroup filesystem.
+.PP
+A
+.I subsystem
+is a kernel component that modifies the behavior of
+the processes in a cgroup.
+Various subsystems have been implemented, making it possible to do things
+such as limiting the amount of CPU time and memory available to a cgroup,
+accounting for the CPU time used by a cgroup,
+and freezing and resuming execution of the processes in a cgroup.
+Subsystems are sometimes also known as
+.I resource controllers
+(or simply, controllers).
+.PP
+The cgroups for a controller are arranged in a
+.IR hierarchy .
+This hierarchy is defined by creating, removing, and
+renaming subdirectories within the cgroup filesystem.
+At each level of the hierarchy, attributes (e.g., limits) can be defined.
+The limits, control, and accounting provided by cgroups generally have
+effect throughout the subhierarchy underneath the cgroup where the
+attributes are defined.
+Thus, for example, the limits placed on
+a cgroup at a higher level in the hierarchy cannot be exceeded
+by descendant cgroups.
+.\"
+.SS Cgroups version 1 and version 2
+The initial release of the cgroups implementation was in Linux 2.6.24.
+Over time, various cgroup controllers have been added
+to allow the management of various types of resources.
+However, the development of these controllers was largely uncoordinated,
+with the result that many inconsistencies arose between controllers
+and management of the cgroup hierarchies became rather complex.
+A longer description of these problems can be found in the kernel
+source file
+.I Documentation/admin\-guide/cgroup\-v2.rst
+(or
+.I Documentation/cgroup\-v2.txt
+in Linux 4.17 and earlier).
+.PP
+Because of the problems with the initial cgroups implementation
+(cgroups version 1),
+starting in Linux 3.10, work began on a new,
+orthogonal implementation to remedy these problems.
+Initially marked experimental, and hidden behind the
+.I "\-o\ __DEVEL__sane_behavior"
+mount option, the new version (cgroups version 2)
+was eventually made official with the release of Linux 4.5.
+Differences between the two versions are described in the text below.
+The file
+.IR cgroup.sane_behavior ,
+present in cgroups v1, is a relic of this mount option.
+The file always reports "0" and is only retained for backward compatibility.
+.PP
+Although cgroups v2 is intended as a replacement for cgroups v1,
+the older system continues to exist
+(and for compatibility reasons is unlikely to be removed).
+Currently, cgroups v2 implements only a subset of the controllers
+available in cgroups v1.
+The two systems are implemented so that both v1 controllers and
+v2 controllers can be mounted on the same system.
+Thus, for example, it is possible to use those controllers
+that are supported under version 2,
+while also using version 1 controllers
+where version 2 does not yet support those controllers.
+The only restriction here is that a controller can't be simultaneously
+employed in both a cgroups v1 hierarchy and in the cgroups v2 hierarchy.
+.\"
+.SH CGROUPS VERSION 1
+Under cgroups v1, each controller may be mounted against a separate
+cgroup filesystem that provides its own hierarchical organization of the
+processes on the system.
+It is also possible to comount multiple (or even all) cgroups v1 controllers
+against the same cgroup filesystem, meaning that the comounted controllers
+manage the same hierarchical organization of processes.
+.PP
+For each mounted hierarchy,
+the directory tree mirrors the control group hierarchy.
+Each control group is represented by a directory, with each of its child
+control cgroups represented as a child directory.
+For instance,
+.I /user/joe/1.session
+represents control group
+.IR 1.session ,
+which is a child of cgroup
+.IR joe ,
+which is a child of
+.IR /user .
+Under each cgroup directory is a set of files which can be read or
+written to, reflecting resource limits and a few general cgroup
+properties.
+.\"
+.SS Tasks (threads) versus processes
+In cgroups v1, a distinction is drawn between
+.I processes
+and
+.IR tasks .
+In this view, a process can consist of multiple tasks
+(more commonly called threads, from a user-space perspective,
+and called such in the remainder of this man page).
+In cgroups v1, it is possible to independently manipulate
+the cgroup memberships of the threads in a process.
+.PP
+The cgroups v1 ability to split threads across different cgroups
+caused problems in some cases.
+For example, it made no sense for the
+.I memory
+controller,
+since all of the threads of a process share a single address space.
+Because of these problems,
+the ability to independently manipulate the cgroup memberships
+of the threads in a process was removed in the initial cgroups v2
+implementation, and subsequently restored in a more limited form
+(see the discussion of "thread mode" below).
+.\"
+.SS Mounting v1 controllers
+The use of cgroups requires a kernel built with the
+.B CONFIG_CGROUP
+option.
+In addition, each of the v1 controllers has an associated
+configuration option that must be set in order to employ that controller.
+.PP
+In order to use a v1 controller,
+it must be mounted against a cgroup filesystem.
+The usual place for such mounts is under a
+.BR tmpfs (5)
+filesystem mounted at
+.IR /sys/fs/cgroup .
+Thus, one might mount the
+.I cpu
+controller as follows:
+.PP
+.in +4n
+.EX
+mount \-t cgroup \-o cpu none /sys/fs/cgroup/cpu
+.EE
+.in
+.PP
+It is possible to comount multiple controllers against the same hierarchy.
+For example, here the
+.I cpu
+and
+.I cpuacct
+controllers are comounted against a single hierarchy:
+.PP
+.in +4n
+.EX
+mount \-t cgroup \-o cpu,cpuacct none /sys/fs/cgroup/cpu,cpuacct
+.EE
+.in
+.PP
+Comounting controllers has the effect that a process is in the same cgroup for
+all of the comounted controllers.
+Separately mounting controllers allows a process to
+be in cgroup
+.I /foo1
+for one controller while being in
+.I /foo2/foo3
+for another.
+.PP
+It is possible to comount all v1 controllers against the same hierarchy:
+.PP
+.in +4n
+.EX
+mount \-t cgroup \-o all cgroup /sys/fs/cgroup
+.EE
+.in
+.PP
+(One can achieve the same result by omitting
+.IR "\-o all" ,
+since it is the default if no controllers are explicitly specified.)
+.PP
+It is not possible to mount the same controller
+against multiple cgroup hierarchies.
+For example, it is not possible to mount both the
+.I cpu
+and
+.I cpuacct
+controllers against one hierarchy, and to mount the
+.I cpu
+controller alone against another hierarchy.
+It is possible to create multiple mount with exactly
+the same set of comounted controllers.
+However, in this case all that results is multiple mount points
+providing a view of the same hierarchy.
+.PP
+Note that on many systems, the v1 controllers are automatically mounted under
+.IR /sys/fs/cgroup ;
+in particular,
+.BR systemd (1)
+automatically creates such mounts.
+.\"
+.SS Unmounting v1 controllers
+A mounted cgroup filesystem can be unmounted using the
+.BR umount (8)
+command, as in the following example:
+.PP
+.in +4n
+.EX
+umount /sys/fs/cgroup/pids
+.EE
+.in
+.PP
+.IR "But note well" :
+a cgroup filesystem is unmounted only if it is not busy,
+that is, it has no child cgroups.
+If this is not the case, then the only effect of the
+.BR umount (8)
+is to make the mount invisible.
+Thus, to ensure that the mount is really removed,
+one must first remove all child cgroups,
+which in turn can be done only after all member processes
+have been moved from those cgroups to the root cgroup.
+.\"
+.SS Cgroups version 1 controllers
+Each of the cgroups version 1 controllers is governed
+by a kernel configuration option (listed below).
+Additionally, the availability of the cgroups feature is governed by the
+.B CONFIG_CGROUPS
+kernel configuration option.
+.TP
+.IR cpu " (since Linux 2.6.24; " \fBCONFIG_CGROUP_SCHED\fP )
+Cgroups can be guaranteed a minimum number of "CPU shares"
+when a system is busy.
+This does not limit a cgroup's CPU usage if the CPUs are not busy.
+For further information, see
+.I Documentation/scheduler/sched\-design\-CFS.rst
+(or
+.I Documentation/scheduler/sched\-design\-CFS.txt
+in Linux 5.2 and earlier).
+.IP
+In Linux 3.2,
+this controller was extended to provide CPU "bandwidth" control.
+If the kernel is configured with
+.BR CONFIG_CFS_BANDWIDTH ,
+then within each scheduling period
+(defined via a file in the cgroup directory), it is possible to define
+an upper limit on the CPU time allocated to the processes in a cgroup.
+This upper limit applies even if there is no other competition for the CPU.
+Further information can be found in the kernel source file
+.I Documentation/scheduler/sched\-bwc.rst
+(or
+.I Documentation/scheduler/sched\-bwc.txt
+in Linux 5.2 and earlier).
+.TP
+.IR cpuacct " (since Linux 2.6.24; " \fBCONFIG_CGROUP_CPUACCT\fP )
+This provides accounting for CPU usage by groups of processes.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/cpuacct.rst
+(or
+.I Documentation/cgroup\-v1/cpuacct.txt
+in Linux 5.2 and earlier).
+.TP
+.IR cpuset " (since Linux 2.6.24; " \fBCONFIG_CPUSETS\fP )
+This cgroup can be used to bind the processes in a cgroup to
+a specified set of CPUs and NUMA nodes.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/cpusets.rst
+(or
+.I Documentation/cgroup\-v1/cpusets.txt
+in Linux 5.2 and earlier).
+.
+.TP
+.IR memory " (since Linux 2.6.25; " \fBCONFIG_MEMCG\fP )
+The memory controller supports reporting and limiting of process memory, kernel
+memory, and swap used by cgroups.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/memory.rst
+(or
+.I Documentation/cgroup\-v1/memory.txt
+in Linux 5.2 and earlier).
+.TP
+.IR devices " (since Linux 2.6.26; " \fBCONFIG_CGROUP_DEVICE\fP )
+This supports controlling which processes may create (mknod) devices as
+well as open them for reading or writing.
+The policies may be specified as allow-lists and deny-lists.
+Hierarchy is enforced, so new rules must not
+violate existing rules for the target or ancestor cgroups.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/devices.rst
+(or
+.I Documentation/cgroup\-v1/devices.txt
+in Linux 5.2 and earlier).
+.TP
+.IR freezer " (since Linux 2.6.28; " \fBCONFIG_CGROUP_FREEZER\fP )
+The
+.I freezer
+cgroup can suspend and restore (resume) all processes in a cgroup.
+Freezing a cgroup
+.I /A
+also causes its children, for example, processes in
+.IR /A/B ,
+to be frozen.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/freezer\-subsystem.rst
+(or
+.I Documentation/cgroup\-v1/freezer\-subsystem.txt
+in Linux 5.2 and earlier).
+.TP
+.IR net_cls " (since Linux 2.6.29; " \fBCONFIG_CGROUP_NET_CLASSID\fP )
+This places a classid, specified for the cgroup, on network packets
+created by a cgroup.
+These classids can then be used in firewall rules,
+as well as used to shape traffic using
+.BR tc (8).
+This applies only to packets
+leaving the cgroup, not to traffic arriving at the cgroup.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/net_cls.rst
+(or
+.I Documentation/cgroup\-v1/net_cls.txt
+in Linux 5.2 and earlier).
+.TP
+.IR blkio " (since Linux 2.6.33; " \fBCONFIG_BLK_CGROUP\fP )
+The
+.I blkio
+cgroup controls and limits access to specified block devices by
+applying IO control in the form of throttling and upper limits against leaf
+nodes and intermediate nodes in the storage hierarchy.
+.IP
+Two policies are available.
+The first is a proportional-weight time-based division
+of disk implemented with CFQ.
+This is in effect for leaf nodes using CFQ.
+The second is a throttling policy which specifies
+upper I/O rate limits on a device.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/blkio\-controller.rst
+(or
+.I Documentation/cgroup\-v1/blkio\-controller.txt
+in Linux 5.2 and earlier).
+.TP
+.IR perf_event " (since Linux 2.6.39; " \fBCONFIG_CGROUP_PERF\fP )
+This controller allows
+.I perf
+monitoring of the set of processes grouped in a cgroup.
+.IP
+Further information can be found in the kernel source files
+.TP
+.IR net_prio " (since Linux 3.3; " \fBCONFIG_CGROUP_NET_PRIO\fP )
+This allows priorities to be specified, per network interface, for cgroups.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/net_prio.rst
+(or
+.I Documentation/cgroup\-v1/net_prio.txt
+in Linux 5.2 and earlier).
+.TP
+.IR hugetlb " (since Linux 3.5; " \fBCONFIG_CGROUP_HUGETLB\fP )
+This supports limiting the use of huge pages by cgroups.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/hugetlb.rst
+(or
+.I Documentation/cgroup\-v1/hugetlb.txt
+in Linux 5.2 and earlier).
+.TP
+.IR pids " (since Linux 4.3; " \fBCONFIG_CGROUP_PIDS\fP )
+This controller permits limiting the number of process that may be created
+in a cgroup (and its descendants).
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/pids.rst
+(or
+.I Documentation/cgroup\-v1/pids.txt
+in Linux 5.2 and earlier).
+.TP
+.IR rdma " (since Linux 4.11; " \fBCONFIG_CGROUP_RDMA\fP )
+The RDMA controller permits limiting the use of
+RDMA/IB-specific resources per cgroup.
+.IP
+Further information can be found in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v1/rdma.rst
+(or
+.I Documentation/cgroup\-v1/rdma.txt
+in Linux 5.2 and earlier).
+.\"
+.SS Creating cgroups and moving processes
+A cgroup filesystem initially contains a single root cgroup, '/',
+which all processes belong to.
+A new cgroup is created by creating a directory in the cgroup filesystem:
+.PP
+.in +4n
+.EX
+mkdir /sys/fs/cgroup/cpu/cg1
+.EE
+.in
+.PP
+This creates a new empty cgroup.
+.PP
+A process may be moved to this cgroup by writing its PID into the cgroup's
+.I cgroup.procs
+file:
+.PP
+.in +4n
+.EX
+echo $$ > /sys/fs/cgroup/cpu/cg1/cgroup.procs
+.EE
+.in
+.PP
+Only one PID at a time should be written to this file.
+.PP
+Writing the value 0 to a
+.I cgroup.procs
+file causes the writing process to be moved to the corresponding cgroup.
+.PP
+When writing a PID into the
+.IR cgroup.procs ,
+all threads in the process are moved into the new cgroup at once.
+.PP
+Within a hierarchy, a process can be a member of exactly one cgroup.
+Writing a process's PID to a
+.I cgroup.procs
+file automatically removes it from the cgroup of
+which it was previously a member.
+.PP
+The
+.I cgroup.procs
+file can be read to obtain a list of the processes that are
+members of a cgroup.
+The returned list of PIDs is not guaranteed to be in order.
+Nor is it guaranteed to be free of duplicates.
+(For example, a PID may be recycled while reading from the list.)
+.PP
+In cgroups v1, an individual thread can be moved to
+another cgroup by writing its thread ID
+(i.e., the kernel thread ID returned by
+.BR clone (2)
+and
+.BR gettid (2))
+to the
+.I tasks
+file in a cgroup directory.
+This file can be read to discover the set of threads
+that are members of the cgroup.
+.\"
+.SS Removing cgroups
+To remove a cgroup,
+it must first have no child cgroups and contain no (nonzombie) processes.
+So long as that is the case, one can simply
+remove the corresponding directory pathname.
+Note that files in a cgroup directory cannot and need not be
+removed.
+.\"
+.SS Cgroups v1 release notification
+Two files can be used to determine whether the kernel provides
+notifications when a cgroup becomes empty.
+A cgroup is considered to be empty when it contains no child
+cgroups and no member processes.
+.PP
+A special file in the root directory of each cgroup hierarchy,
+.IR release_agent ,
+can be used to register the pathname of a program that may be invoked when
+a cgroup in the hierarchy becomes empty.
+The pathname of the newly empty cgroup (relative to the cgroup mount point)
+is provided as the sole command-line argument when the
+.I release_agent
+program is invoked.
+The
+.I release_agent
+program might remove the cgroup directory,
+or perhaps repopulate it with a process.
+.PP
+The default value of the
+.I release_agent
+file is empty, meaning that no release agent is invoked.
+.PP
+The content of the
+.I release_agent
+file can also be specified via a mount option when the
+cgroup filesystem is mounted:
+.PP
+.in +4n
+.EX
+mount \-o release_agent=pathname ...
+.EE
+.in
+.PP
+Whether or not the
+.I release_agent
+program is invoked when a particular cgroup becomes empty is determined
+by the value in the
+.I notify_on_release
+file in the corresponding cgroup directory.
+If this file contains the value 0, then the
+.I release_agent
+program is not invoked.
+If it contains the value 1, the
+.I release_agent
+program is invoked.
+The default value for this file in the root cgroup is 0.
+At the time when a new cgroup is created,
+the value in this file is inherited from the corresponding file
+in the parent cgroup.
+.\"
+.SS Cgroup v1 named hierarchies
+In cgroups v1,
+it is possible to mount a cgroup hierarchy that has no attached controllers:
+.PP
+.in +4n
+.EX
+mount \-t cgroup \-o none,name=somename none /some/mount/point
+.EE
+.in
+.PP
+Multiple instances of such hierarchies can be mounted;
+each hierarchy must have a unique name.
+The only purpose of such hierarchies is to track processes.
+(See the discussion of release notification below.)
+An example of this is the
+.I name=systemd
+cgroup hierarchy that is used by
+.BR systemd (1)
+to track services and user sessions.
+.PP
+Since Linux 5.0, the
+.I cgroup_no_v1
+kernel boot option (described below) can be used to disable cgroup v1
+named hierarchies, by specifying
+.IR cgroup_no_v1=named .
+.\"
+.SH CGROUPS VERSION 2
+In cgroups v2,
+all mounted controllers reside in a single unified hierarchy.
+While (different) controllers may be simultaneously
+mounted under the v1 and v2 hierarchies,
+it is not possible to mount the same controller simultaneously
+under both the v1 and the v2 hierarchies.
+.PP
+The new behaviors in cgroups v2 are summarized here,
+and in some cases elaborated in the following subsections.
+.IP \[bu] 3
+Cgroups v2 provides a unified hierarchy against
+which all controllers are mounted.
+.IP \[bu]
+"Internal" processes are not permitted.
+With the exception of the root cgroup, processes may reside
+only in leaf nodes (cgroups that do not themselves contain child cgroups).
+The details are somewhat more subtle than this, and are described below.
+.IP \[bu]
+Active cgroups must be specified via the files
+.I cgroup.controllers
+and
+.IR cgroup.subtree_control .
+.IP \[bu]
+The
+.I tasks
+file has been removed.
+In addition, the
+.I cgroup.clone_children
+file that is employed by the
+.I cpuset
+controller has been removed.
+.IP \[bu]
+An improved mechanism for notification of empty cgroups is provided by the
+.I cgroup.events
+file.
+.PP
+For more changes, see the
+.I Documentation/admin\-guide/cgroup\-v2.rst
+file in the kernel source
+(or
+.I Documentation/cgroup\-v2.txt
+in Linux 4.17 and earlier).
+.
+.PP
+Some of the new behaviors listed above saw subsequent modification with
+the addition in Linux 4.14 of "thread mode" (described below).
+.\"
+.SS Cgroups v2 unified hierarchy
+In cgroups v1, the ability to mount different controllers
+against different hierarchies was intended to allow great flexibility
+for application design.
+In practice, though,
+the flexibility turned out to be less useful than expected,
+and in many cases added complexity.
+Therefore, in cgroups v2,
+all available controllers are mounted against a single hierarchy.
+The available controllers are automatically mounted,
+meaning that it is not necessary (or possible) to specify the controllers
+when mounting the cgroup v2 filesystem using a command such as the following:
+.PP
+.in +4n
+.EX
+mount \-t cgroup2 none /mnt/cgroup2
+.EE
+.in
+.PP
+A cgroup v2 controller is available only if it is not currently in use
+via a mount against a cgroup v1 hierarchy.
+Or, to put things another way, it is not possible to employ
+the same controller against both a v1 hierarchy and the unified v2 hierarchy.
+This means that it may be necessary first to unmount a v1 controller
+(as described above) before that controller is available in v2.
+Since
+.BR systemd (1)
+makes heavy use of some v1 controllers by default,
+it can in some cases be simpler to boot the system with
+selected v1 controllers disabled.
+To do this, specify the
+.I cgroup_no_v1=list
+option on the kernel boot command line;
+.I list
+is a comma-separated list of the names of the controllers to disable,
+or the word
+.I all
+to disable all v1 controllers.
+(This situation is correctly handled by
+.BR systemd (1),
+which falls back to operating without the specified controllers.)
+.PP
+Note that on many modern systems,
+.BR systemd (1)
+automatically mounts the
+.I cgroup2
+filesystem at
+.I /sys/fs/cgroup/unified
+during the boot process.
+.\"
+.SS Cgroups v2 mount options
+The following options
+.RI ( mount\~\-o )
+can be specified when mounting the group v2 filesystem:
+.TP
+.IR nsdelegate " (since Linux 4.15)"
+Treat cgroup namespaces as delegation boundaries.
+For details, see below.
+.TP
+.IR memory_localevents " (since Linux 5.2)"
+.\" commit 9852ae3fe5293264f01c49f2571ef7688f7823ce
+The
+.I memory.events
+should show statistics only for the cgroup itself,
+and not for any descendant cgroups.
+This was the behavior before Linux 5.2.
+Starting in Linux 5.2,
+the default behavior is to include statistics for descendant cgroups in
+.IR memory.events ,
+and this mount option can be used to revert to the legacy behavior.
+This option is system wide and can be set on mount or
+modified through remount only from the initial mount namespace;
+it is silently ignored in noninitial namespaces.
+.\"
+.SS Cgroups v2 controllers
+The following controllers, documented in the kernel source file
+.I Documentation/admin\-guide/cgroup\-v2.rst
+(or
+.I Documentation/cgroup\-v2.txt
+in Linux 4.17 and earlier),
+are supported in cgroups version 2:
+.TP
+.IR cpu " (since Linux 4.15)"
+This is the successor to the version 1
+.I cpu
+and
+.I cpuacct
+controllers.
+.TP
+.IR cpuset " (since Linux 5.0)"
+This is the successor of the version 1
+.I cpuset
+controller.
+.TP
+.IR freezer " (since Linux 5.2)"
+.\" commit 76f969e8948d82e78e1bc4beb6b9465908e74873
+This is the successor of the version 1
+.I freezer
+controller.
+.TP
+.IR hugetlb " (since Linux 5.6)"
+This is the successor of the version 1
+.I hugetlb
+controller.
+.TP
+.IR io " (since Linux 4.5)"
+This is the successor of the version 1
+.I blkio
+controller.
+.TP
+.IR memory " (since Linux 4.5)"
+This is the successor of the version 1
+.I memory
+controller.
+.TP
+.IR perf_event " (since Linux 4.11)"
+This is the same as the version 1
+.I perf_event
+controller.
+.TP
+.IR pids " (since Linux 4.5)"
+This is the same as the version 1
+.I pids
+controller.
+.TP
+.IR rdma " (since Linux 4.11)"
+This is the same as the version 1
+.I rdma
+controller.
+.PP
+There is no direct equivalent of the
+.I net_cls
+and
+.I net_prio
+controllers from cgroups version 1.
+Instead, support has been added to
+.BR iptables (8)
+to allow eBPF filters that hook on cgroup v2 pathnames to make decisions
+about network traffic on a per-cgroup basis.
+.PP
+The v2
+.I devices
+controller provides no interface files;
+instead, device control is gated by attaching an eBPF
+.RB ( BPF_CGROUP_DEVICE )
+program to a v2 cgroup.
+.\"
+.SS Cgroups v2 subtree control
+Each cgroup in the v2 hierarchy contains the following two files:
+.TP
+.I cgroup.controllers
+This read-only file exposes a list of the controllers that are
+.I available
+in this cgroup.
+The contents of this file match the contents of the
+.I cgroup.subtree_control
+file in the parent cgroup.
+.TP
+.I cgroup.subtree_control
+This is a list of controllers that are
+.I active
+.RI ( enabled )
+in the cgroup.
+The set of controllers in this file is a subset of the set in the
+.I cgroup.controllers
+of this cgroup.
+The set of active controllers is modified by writing strings to this file
+containing space-delimited controller names,
+each preceded by '+' (to enable a controller)
+or '\-' (to disable a controller), as in the following example:
+.IP
+.in +4n
+.EX
+echo \[aq]+pids \-memory\[aq] > x/y/cgroup.subtree_control
+.EE
+.in
+.IP
+An attempt to enable a controller
+that is not present in
+.I cgroup.controllers
+leads to an
+.B ENOENT
+error when writing to the
+.I cgroup.subtree_control
+file.
+.PP
+Because the list of controllers in
+.I cgroup.subtree_control
+is a subset of those
+.IR cgroup.controllers ,
+a controller that has been disabled in one cgroup in the hierarchy
+can never be re-enabled in the subtree below that cgroup.
+.PP
+A cgroup's
+.I cgroup.subtree_control
+file determines the set of controllers that are exercised in the
+.I child
+cgroups.
+When a controller (e.g.,
+.IR pids )
+is present in the
+.I cgroup.subtree_control
+file of a parent cgroup,
+then the corresponding controller-interface files (e.g.,
+.IR pids.max )
+are automatically created in the children of that cgroup
+and can be used to exert resource control in the child cgroups.
+.\"
+.SS Cgroups v2 """no internal processes""" rule
+Cgroups v2 enforces a so-called "no internal processes" rule.
+Roughly speaking, this rule means that,
+with the exception of the root cgroup, processes may reside
+only in leaf nodes (cgroups that do not themselves contain child cgroups).
+This avoids the need to decide how to partition resources between
+processes which are members of cgroup A and processes in child cgroups of A.
+.PP
+For instance, if cgroup
+.I /cg1/cg2
+exists, then a process may reside in
+.IR /cg1/cg2 ,
+but not in
+.IR /cg1 .
+This is to avoid an ambiguity in cgroups v1
+with respect to the delegation of resources between processes in
+.I /cg1
+and its child cgroups.
+The recommended approach in cgroups v2 is to create a subdirectory called
+.I leaf
+for any nonleaf cgroup which should contain processes, but no child cgroups.
+Thus, processes which previously would have gone into
+.I /cg1
+would now go into
+.IR /cg1/leaf .
+This has the advantage of making explicit
+the relationship between processes in
+.I /cg1/leaf
+and
+.IR /cg1 's
+other children.
+.PP
+The "no internal processes" rule is in fact more subtle than stated above.
+More precisely, the rule is that a (nonroot) cgroup can't both
+(1) have member processes, and
+(2) distribute resources into child cgroups\[em]that is, have a nonempty
+.I cgroup.subtree_control
+file.
+Thus, it
+.I is
+possible for a cgroup to have both member processes and child cgroups,
+but before controllers can be enabled for that cgroup,
+the member processes must be moved out of the cgroup
+(e.g., perhaps into the child cgroups).
+.PP
+With the Linux 4.14 addition of "thread mode" (described below),
+the "no internal processes" rule has been relaxed in some cases.
+.\"
+.SS Cgroups v2 cgroup.events file
+Each nonroot cgroup in the v2 hierarchy contains a read-only file,
+.IR cgroup.events ,
+whose contents are key-value pairs
+(delimited by newline characters, with the key and value separated by spaces)
+providing state information about the cgroup:
+.PP
+.in +4n
+.EX
+$ \fBcat mygrp/cgroup.events\fP
+populated 1
+frozen 0
+.EE
+.in
+.PP
+The following keys may appear in this file:
+.TP
+.I populated
+The value of this key is either 1,
+if this cgroup or any of its descendants has member processes,
+or otherwise 0.
+.TP
+.IR frozen " (since Linux 5.2)"
+.\" commit 76f969e8948d82e78e1bc4beb6b9465908e7487
+The value of this key is 1 if this cgroup is currently frozen,
+or 0 if it is not.
+.PP
+The
+.I cgroup.events
+file can be monitored, in order to receive notification when the value of
+one of its keys changes.
+Such monitoring can be done using
+.BR inotify (7),
+which notifies changes as
+.B IN_MODIFY
+events, or
+.BR poll (2),
+which notifies changes by returning the
+.B POLLPRI
+and
+.B POLLERR
+bits in the
+.I revents
+field.
+.\"
+.SS Cgroup v2 release notification
+Cgroups v2 provides a new mechanism for obtaining notification
+when a cgroup becomes empty.
+The cgroups v1
+.I release_agent
+and
+.I notify_on_release
+files are removed, and replaced by the
+.I populated
+key in the
+.I cgroup.events
+file.
+This key either has the value 0,
+meaning that the cgroup (and its descendants)
+contain no (nonzombie) member processes,
+or 1, meaning that the cgroup (or one of its descendants)
+contains member processes.
+.PP
+The cgroups v2 release-notification mechanism
+offers the following advantages over the cgroups v1
+.I release_agent
+mechanism:
+.IP \[bu] 3
+It allows for cheaper notification,
+since a single process can monitor multiple
+.I cgroup.events
+files (using the techniques described earlier).
+By contrast, the cgroups v1 mechanism requires the expense of creating
+a process for each notification.
+.IP \[bu]
+Notification for different cgroup subhierarchies can be delegated
+to different processes.
+By contrast, the cgroups v1 mechanism allows only one release agent
+for an entire hierarchy.
+.\"
+.SS Cgroups v2 cgroup.stat file
+.\" commit ec39225cca42c05ac36853d11d28f877fde5c42e
+Each cgroup in the v2 hierarchy contains a read-only
+.I cgroup.stat
+file (first introduced in Linux 4.14)
+that consists of lines containing key-value pairs.
+The following keys currently appear in this file:
+.TP
+.I nr_descendants
+This is the total number of visible (i.e., living) descendant cgroups
+underneath this cgroup.
+.TP
+.I nr_dying_descendants
+This is the total number of dying descendant cgroups
+underneath this cgroup.
+A cgroup enters the dying state after being deleted.
+It remains in that state for an undefined period
+(which will depend on system load)
+while resources are freed before the cgroup is destroyed.
+Note that the presence of some cgroups in the dying state is normal,
+and is not indicative of any problem.
+.IP
+A process can't be made a member of a dying cgroup,
+and a dying cgroup can't be brought back to life.
+.\"
+.SS Limiting the number of descendant cgroups
+Each cgroup in the v2 hierarchy contains the following files,
+which can be used to view and set limits on the number
+of descendant cgroups under that cgroup:
+.TP
+.IR cgroup.max.depth " (since Linux 4.14)"
+.\" commit 1a926e0bbab83bae8207d05a533173425e0496d1
+This file defines a limit on the depth of nesting of descendant cgroups.
+A value of 0 in this file means that no descendant cgroups can be created.
+An attempt to create a descendant whose nesting level exceeds
+the limit fails
+.RI ( mkdir (2)
+fails with the error
+.BR EAGAIN ).
+.IP
+Writing the string
+.I """max"""
+to this file means that no limit is imposed.
+The default value in this file is
+.I """max""" .
+.TP
+.IR cgroup.max.descendants " (since Linux 4.14)"
+.\" commit 1a926e0bbab83bae8207d05a533173425e0496d1
+This file defines a limit on the number of live descendant cgroups that
+this cgroup may have.
+An attempt to create more descendants than allowed by the limit fails
+.RI ( mkdir (2)
+fails with the error
+.BR EAGAIN ).
+.IP
+Writing the string
+.I """max"""
+to this file means that no limit is imposed.
+The default value in this file is
+.IR """max""" .
+.\"
+.SH CGROUPS DELEGATION: DELEGATING A HIERARCHY TO A LESS PRIVILEGED USER
+In the context of cgroups,
+delegation means passing management of some subtree
+of the cgroup hierarchy to a nonprivileged user.
+Cgroups v1 provides support for delegation based on file permissions
+in the cgroup hierarchy but with less strict containment rules than v2
+(as noted below).
+Cgroups v2 supports delegation with containment by explicit design.
+The focus of the discussion in this section is on delegation in cgroups v2,
+with some differences for cgroups v1 noted along the way.
+.PP
+Some terminology is required in order to describe delegation.
+A
+.I delegater
+is a privileged user (i.e., root) who owns a parent cgroup.
+A
+.I delegatee
+is a nonprivileged user who will be granted the permissions needed
+to manage some subhierarchy under that parent cgroup,
+known as the
+.IR "delegated subtree" .
+.PP
+To perform delegation,
+the delegater makes certain directories and files writable by the delegatee,
+typically by changing the ownership of the objects to be the user ID
+of the delegatee.
+Assuming that we want to delegate the hierarchy rooted at (say)
+.I /dlgt_grp
+and that there are not yet any child cgroups under that cgroup,
+the ownership of the following is changed to the user ID of the delegatee:
+.TP
+.I /dlgt_grp
+Changing the ownership of the root of the subtree means that any new
+cgroups created under the subtree (and the files they contain)
+will also be owned by the delegatee.
+.TP
+.I /dlgt_grp/cgroup.procs
+Changing the ownership of this file means that the delegatee
+can move processes into the root of the delegated subtree.
+.TP
+.IR /dlgt_grp/cgroup.subtree_control " (cgroups v2 only)"
+Changing the ownership of this file means that the delegatee
+can enable controllers (that are present in
+.IR /dlgt_grp/cgroup.controllers )
+in order to further redistribute resources at lower levels in the subtree.
+(As an alternative to changing the ownership of this file,
+the delegater might instead add selected controllers to this file.)
+.TP
+.IR /dlgt_grp/cgroup.threads " (cgroups v2 only)"
+Changing the ownership of this file is necessary if a threaded subtree
+is being delegated (see the description of "thread mode", below).
+This permits the delegatee to write thread IDs to the file.
+(The ownership of this file can also be changed when delegating
+a domain subtree, but currently this serves no purpose,
+since, as described below, it is not possible to move a thread between
+domain cgroups by writing its thread ID to the
+.I cgroup.threads
+file.)
+.IP
+In cgroups v1, the corresponding file that should instead be delegated is the
+.I tasks
+file.
+.PP
+The delegater should
+.I not
+change the ownership of any of the controller interfaces files (e.g.,
+.IR pids.max ,
+.IR memory.high )
+in
+.IR dlgt_grp .
+Those files are used from the next level above the delegated subtree
+in order to distribute resources into the subtree,
+and the delegatee should not have permission to change
+the resources that are distributed into the delegated subtree.
+.PP
+See also the discussion of the
+.I /sys/kernel/cgroup/delegate
+file in NOTES for information about further delegatable files in cgroups v2.
+.PP
+After the aforementioned steps have been performed,
+the delegatee can create child cgroups within the delegated subtree
+(the cgroup subdirectories and the files they contain
+will be owned by the delegatee)
+and move processes between cgroups in the subtree.
+If some controllers are present in
+.IR dlgt_grp/cgroup.subtree_control ,
+or the ownership of that file was passed to the delegatee,
+the delegatee can also control the further redistribution
+of the corresponding resources into the delegated subtree.
+.\"
+.SS Cgroups v2 delegation: nsdelegate and cgroup namespaces
+Starting with Linux 4.13,
+.\" commit 5136f6365ce3eace5a926e10f16ed2a233db5ba9
+there is a second way to perform cgroup delegation in the cgroups v2 hierarchy.
+This is done by mounting or remounting the cgroup v2 filesystem with the
+.I nsdelegate
+mount option.
+For example, if the cgroup v2 filesystem has already been mounted,
+we can remount it with the
+.I nsdelegate
+option as follows:
+.PP
+.in +4n
+.EX
+mount \-t cgroup2 \-o remount,nsdelegate \e
+ none /sys/fs/cgroup/unified
+.EE
+.in
+.\"
+.\" Alternatively, we could boot the kernel with the options:
+.\"
+.\" cgroup_no_v1=all systemd.legacy_systemd_cgroup_controller
+.\"
+.\" The effect of the latter option is to prevent systemd from employing
+.\" its "hybrid" cgroup mode, where it tries to make use of cgroups v2.
+.PP
+The effect of this mount option is to cause cgroup namespaces
+to automatically become delegation boundaries.
+More specifically,
+the following restrictions apply for processes inside the cgroup namespace:
+.IP \[bu] 3
+Writes to controller interface files in the root directory of the namespace
+will fail with the error
+.BR EPERM .
+Processes inside the cgroup namespace can still write to delegatable
+files in the root directory of the cgroup namespace such as
+.I cgroup.procs
+and
+.IR cgroup.subtree_control ,
+and can create subhierarchy underneath the root directory.
+.IP \[bu]
+Attempts to migrate processes across the namespace boundary are denied
+(with the error
+.BR ENOENT ).
+Processes inside the cgroup namespace can still
+(subject to the containment rules described below)
+move processes between cgroups
+.I within
+the subhierarchy under the namespace root.
+.PP
+The ability to define cgroup namespaces as delegation boundaries
+makes cgroup namespaces more useful.
+To understand why, suppose that we already have one cgroup hierarchy
+that has been delegated to a nonprivileged user,
+.IR cecilia ,
+using the older delegation technique described above.
+Suppose further that
+.I cecilia
+wanted to further delegate a subhierarchy
+under the existing delegated hierarchy.
+(For example, the delegated hierarchy might be associated with
+an unprivileged container run by
+.IR cecilia .)
+Even if a cgroup namespace was employed,
+because both hierarchies are owned by the unprivileged user
+.IR cecilia ,
+the following illegitimate actions could be performed:
+.IP \[bu] 3
+A process in the inferior hierarchy could change the
+resource controller settings in the root directory of that hierarchy.
+(These resource controller settings are intended to allow control to
+be exercised from the
+.I parent
+cgroup;
+a process inside the child cgroup should not be allowed to modify them.)
+.IP \[bu]
+A process inside the inferior hierarchy could move processes
+into and out of the inferior hierarchy if the cgroups in the
+superior hierarchy were somehow visible.
+.PP
+Employing the
+.I nsdelegate
+mount option prevents both of these possibilities.
+.PP
+The
+.I nsdelegate
+mount option only has an effect when performed in
+the initial mount namespace;
+in other mount namespaces, the option is silently ignored.
+.PP
+.IR Note :
+On some systems,
+.BR systemd (1)
+automatically mounts the cgroup v2 filesystem.
+In order to experiment with the
+.I nsdelegate
+operation, it may be useful to boot the kernel with
+the following command-line options:
+.PP
+.in +4n
+.EX
+cgroup_no_v1=all systemd.legacy_systemd_cgroup_controller
+.EE
+.in
+.PP
+These options cause the kernel to boot with the cgroups v1 controllers
+disabled (meaning that the controllers are available in the v2 hierarchy),
+and tells
+.BR systemd (1)
+not to mount and use the cgroup v2 hierarchy,
+so that the v2 hierarchy can be manually mounted
+with the desired options after boot-up.
+.\"
+.SS Cgroup delegation containment rules
+Some delegation
+.I containment rules
+ensure that the delegatee can move processes between cgroups within the
+delegated subtree,
+but can't move processes from outside the delegated subtree into
+the subtree or vice versa.
+A nonprivileged process (i.e., the delegatee) can write the PID of
+a "target" process into a
+.I cgroup.procs
+file only if all of the following are true:
+.IP \[bu] 3
+The writer has write permission on the
+.I cgroup.procs
+file in the destination cgroup.
+.IP \[bu]
+The writer has write permission on the
+.I cgroup.procs
+file in the nearest common ancestor of the source and destination cgroups.
+Note that in some cases,
+the nearest common ancestor may be the source or destination cgroup itself.
+This requirement is not enforced for cgroups v1 hierarchies,
+with the consequence that containment in v1 is less strict than in v2.
+(For example, in cgroups v1 the user that owns two distinct
+delegated subhierarchies can move a process between the hierarchies.)
+.IP \[bu]
+If the cgroup v2 filesystem was mounted with the
+.I nsdelegate
+option, the writer must be able to see the source and destination cgroups
+from its cgroup namespace.
+.IP \[bu]
+In cgroups v1:
+the effective UID of the writer (i.e., the delegatee) matches the
+real user ID or the saved set-user-ID of the target process.
+Before Linux 4.11,
+.\" commit 576dd464505fc53d501bb94569db76f220104d28
+this requirement also applied in cgroups v2
+(This was a historical requirement inherited from cgroups v1
+that was later deemed unnecessary,
+since the other rules suffice for containment in cgroups v2.)
+.PP
+.IR Note :
+one consequence of these delegation containment rules is that the
+unprivileged delegatee can't place the first process into
+the delegated subtree;
+instead, the delegater must place the first process
+(a process owned by the delegatee) into the delegated subtree.
+.\"
+.SH CGROUPS VERSION 2 THREAD MODE
+Among the restrictions imposed by cgroups v2 that were not present
+in cgroups v1 are the following:
+.IP \[bu] 3
+.IR "No thread-granularity control" :
+all of the threads of a process must be in the same cgroup.
+.IP \[bu]
+.IR "No internal processes" :
+a cgroup can't both have member processes and
+exercise controllers on child cgroups.
+.PP
+Both of these restrictions were added because
+the lack of these restrictions had caused problems
+in cgroups v1.
+In particular, the cgroups v1 ability to allow thread-level granularity
+for cgroup membership made no sense for some controllers.
+(A notable example was the
+.I memory
+controller: since threads share an address space,
+it made no sense to split threads across different
+.I memory
+cgroups.)
+.PP
+Notwithstanding the initial design decision in cgroups v2,
+there were use cases for certain controllers, notably the
+.I cpu
+controller,
+for which thread-level granularity of control was meaningful and useful.
+To accommodate such use cases, Linux 4.14 added
+.I "thread mode"
+for cgroups v2.
+.PP
+Thread mode allows the following:
+.IP \[bu] 3
+The creation of
+.I threaded subtrees
+in which the threads of a process may
+be spread across cgroups inside the tree.
+(A threaded subtree may contain multiple multithreaded processes.)
+.IP \[bu]
+The concept of
+.IR "threaded controllers" ,
+which can distribute resources across the cgroups in a threaded subtree.
+.IP \[bu]
+A relaxation of the "no internal processes rule",
+so that, within a threaded subtree,
+a cgroup can both contain member threads and
+exercise resource control over child cgroups.
+.PP
+With the addition of thread mode,
+each nonroot cgroup now contains a new file,
+.IR cgroup.type ,
+that exposes, and in some circumstances can be used to change,
+the "type" of a cgroup.
+This file contains one of the following type values:
+.TP
+.I domain
+This is a normal v2 cgroup that provides process-granularity control.
+If a process is a member of this cgroup,
+then all threads of the process are (by definition) in the same cgroup.
+This is the default cgroup type,
+and provides the same behavior that was provided for
+cgroups in the initial cgroups v2 implementation.
+.TP
+.I threaded
+This cgroup is a member of a threaded subtree.
+Threads can be added to this cgroup,
+and controllers can be enabled for the cgroup.
+.TP
+.I domain threaded
+This is a domain cgroup that serves as the root of a threaded subtree.
+This cgroup type is also known as "threaded root".
+.TP
+.I domain invalid
+This is a cgroup inside a threaded subtree
+that is in an "invalid" state.
+Processes can't be added to the cgroup,
+and controllers can't be enabled for the cgroup.
+The only thing that can be done with this cgroup (other than deleting it)
+is to convert it to a
+.I threaded
+cgroup by writing the string
+.I """threaded"""
+to the
+.I cgroup.type
+file.
+.IP
+The rationale for the existence of this "interim" type
+during the creation of a threaded subtree
+(rather than the kernel simply immediately converting all cgroups
+under the threaded root to the type
+.IR threaded )
+is to allow for
+possible future extensions to the thread mode model
+.\"
+.SS Threaded versus domain controllers
+With the addition of threads mode,
+cgroups v2 now distinguishes two types of resource controllers:
+.IP \[bu] 3
+.I Threaded
+.\" In the kernel source, look for ".threaded[ \t]*= true" in
+.\" initializations of struct cgroup_subsys
+controllers: these controllers support thread-granularity for
+resource control and can be enabled inside threaded subtrees,
+with the result that the corresponding controller-interface files
+appear inside the cgroups in the threaded subtree.
+As at Linux 4.19, the following controllers are threaded:
+.IR cpu ,
+.IR perf_event ,
+and
+.IR pids .
+.IP \[bu]
+.I Domain
+controllers: these controllers support only process granularity
+for resource control.
+From the perspective of a domain controller,
+all threads of a process are always in the same cgroup.
+Domain controllers can't be enabled inside a threaded subtree.
+.\"
+.SS Creating a threaded subtree
+There are two pathways that lead to the creation of a threaded subtree.
+The first pathway proceeds as follows:
+.IP (1) 5
+We write the string
+.I """threaded"""
+to the
+.I cgroup.type
+file of a cgroup
+.I y/z
+that currently has the type
+.IR domain .
+This has the following effects:
+.RS
+.IP \[bu] 3
+The type of the cgroup
+.I y/z
+becomes
+.IR threaded .
+.IP \[bu]
+The type of the parent cgroup,
+.IR y ,
+becomes
+.IR "domain threaded" .
+The parent cgroup is the root of a threaded subtree
+(also known as the "threaded root").
+.IP \[bu]
+All other cgroups under
+.I y
+that were not already of type
+.I threaded
+(because they were inside already existing threaded subtrees
+under the new threaded root)
+are converted to type
+.IR "domain invalid" .
+Any subsequently created cgroups under
+.I y
+will also have the type
+.IR "domain invalid" .
+.RE
+.IP (2)
+We write the string
+.I """threaded"""
+to each of the
+.I domain invalid
+cgroups under
+.IR y ,
+in order to convert them to the type
+.IR threaded .
+As a consequence of this step, all threads under the threaded root
+now have the type
+.I threaded
+and the threaded subtree is now fully usable.
+The requirement to write
+.I """threaded"""
+to each of these cgroups is somewhat cumbersome,
+but allows for possible future extensions to the thread-mode model.
+.PP
+The second way of creating a threaded subtree is as follows:
+.IP (1) 5
+In an existing cgroup,
+.IR z ,
+that currently has the type
+.IR domain ,
+we (1.1) enable one or more threaded controllers and
+(1.2) make a process a member of
+.IR z .
+(These two steps can be done in either order.)
+This has the following consequences:
+.RS
+.IP \[bu] 3
+The type of
+.I z
+becomes
+.IR "domain threaded" .
+.IP \[bu]
+All of the descendant cgroups of
+.I x
+that were not already of type
+.I threaded
+are converted to type
+.IR "domain invalid" .
+.RE
+.IP (2)
+As before, we make the threaded subtree usable by writing the string
+.I """threaded"""
+to each of the
+.I domain invalid
+cgroups under
+.IR y ,
+in order to convert them to the type
+.IR threaded .
+.PP
+One of the consequences of the above pathways to creating a threaded subtree
+is that the threaded root cgroup can be a parent only to
+.I threaded
+(and
+.IR "domain invalid" )
+cgroups.
+The threaded root cgroup can't be a parent of a
+.I domain
+cgroups, and a
+.I threaded
+cgroup
+can't have a sibling that is a
+.I domain
+cgroup.
+.\"
+.SS Using a threaded subtree
+Within a threaded subtree, threaded controllers can be enabled
+in each subgroup whose type has been changed to
+.IR threaded ;
+upon doing so, the corresponding controller interface files
+appear in the children of that cgroup.
+.PP
+A process can be moved into a threaded subtree by writing its PID to the
+.I cgroup.procs
+file in one of the cgroups inside the tree.
+This has the effect of making all of the threads
+in the process members of the corresponding cgroup
+and makes the process a member of the threaded subtree.
+The threads of the process can then be spread across
+the threaded subtree by writing their thread IDs (see
+.BR gettid (2))
+to the
+.I cgroup.threads
+files in different cgroups inside the subtree.
+The threads of a process must all reside in the same threaded subtree.
+.PP
+As with writing to
+.IR cgroup.procs ,
+some containment rules apply when writing to the
+.I cgroup.threads
+file:
+.IP \[bu] 3
+The writer must have write permission on the
+cgroup.threads
+file in the destination cgroup.
+.IP \[bu]
+The writer must have write permission on the
+.I cgroup.procs
+file in the common ancestor of the source and destination cgroups.
+(In some cases,
+the common ancestor may be the source or destination cgroup itself.)
+.IP \[bu]
+The source and destination cgroups must be in the same threaded subtree.
+(Outside a threaded subtree, an attempt to move a thread by writing
+its thread ID to the
+.I cgroup.threads
+file in a different
+.I domain
+cgroup fails with the error
+.BR EOPNOTSUPP .)
+.PP
+The
+.I cgroup.threads
+file is present in each cgroup (including
+.I domain
+cgroups) and can be read in order to discover the set of threads
+that is present in the cgroup.
+The set of thread IDs obtained when reading this file
+is not guaranteed to be ordered or free of duplicates.
+.PP
+The
+.I cgroup.procs
+file in the threaded root shows the PIDs of all processes
+that are members of the threaded subtree.
+The
+.I cgroup.procs
+files in the other cgroups in the subtree are not readable.
+.PP
+Domain controllers can't be enabled in a threaded subtree;
+no controller-interface files appear inside the cgroups underneath the
+threaded root.
+From the point of view of a domain controller,
+threaded subtrees are invisible:
+a multithreaded process inside a threaded subtree appears to a domain
+controller as a process that resides in the threaded root cgroup.
+.PP
+Within a threaded subtree, the "no internal processes" rule does not apply:
+a cgroup can both contain member processes (or thread)
+and exercise controllers on child cgroups.
+.\"
+.SS Rules for writing to cgroup.type and creating threaded subtrees
+A number of rules apply when writing to the
+.I cgroup.type
+file:
+.IP \[bu] 3
+Only the string
+.I """threaded"""
+may be written.
+In other words, the only explicit transition that is possible is to convert a
+.I domain
+cgroup to type
+.IR threaded .
+.IP \[bu]
+The effect of writing
+.I """threaded"""
+depends on the current value in
+.IR cgroup.type ,
+as follows:
+.RS
+.IP \[bu] 3
+.I domain
+or
+.IR "domain threaded" :
+start the creation of a threaded subtree
+(whose root is the parent of this cgroup) via
+the first of the pathways described above;
+.IP \[bu]
+.IR "domain\ invalid" :
+convert this cgroup (which is inside a threaded subtree) to a usable (i.e.,
+.IR threaded )
+state;
+.IP \[bu]
+.IR threaded :
+no effect (a "no-op").
+.RE
+.IP \[bu]
+We can't write to a
+.I cgroup.type
+file if the parent's type is
+.IR "domain invalid" .
+In other words, the cgroups of a threaded subtree must be converted to the
+.I threaded
+state in a top-down manner.
+.PP
+There are also some constraints that must be satisfied
+in order to create a threaded subtree rooted at the cgroup
+.IR x :
+.IP \[bu] 3
+There can be no member processes in the descendant cgroups of
+.IR x .
+(The cgroup
+.I x
+can itself have member processes.)
+.IP \[bu]
+No domain controllers may be enabled in
+.IR x 's
+.I cgroup.subtree_control
+file.
+.PP
+If any of the above constraints is violated, then an attempt to write
+.I """threaded"""
+to a
+.I cgroup.type
+file fails with the error
+.BR ENOTSUP .
+.\"
+.SS The """domain threaded""" cgroup type
+According to the pathways described above,
+the type of a cgroup can change to
+.I domain threaded
+in either of the following cases:
+.IP \[bu] 3
+The string
+.I """threaded"""
+is written to a child cgroup.
+.IP \[bu]
+A threaded controller is enabled inside the cgroup and
+a process is made a member of the cgroup.
+.PP
+A
+.I domain threaded
+cgroup,
+.IR x ,
+can revert to the type
+.I domain
+if the above conditions no longer hold true\[em]that is, if all
+.I threaded
+child cgroups of
+.I x
+are removed and either
+.I x
+no longer has threaded controllers enabled or
+no longer has member processes.
+.PP
+When a
+.I domain threaded
+cgroup
+.I x
+reverts to the type
+.IR domain :
+.IP \[bu] 3
+All
+.I domain invalid
+descendants of
+.I x
+that are not in lower-level threaded subtrees revert to the type
+.IR domain .
+.IP \[bu]
+The root cgroups in any lower-level threaded subtrees revert to the type
+.IR "domain threaded" .
+.\"
+.SS Exceptions for the root cgroup
+The root cgroup of the v2 hierarchy is treated exceptionally:
+it can be the parent of both
+.I domain
+and
+.I threaded
+cgroups.
+If the string
+.I """threaded"""
+is written to the
+.I cgroup.type
+file of one of the children of the root cgroup, then
+.IP \[bu] 3
+The type of that cgroup becomes
+.IR threaded .
+.IP \[bu]
+The type of any descendants of that cgroup that
+are not part of lower-level threaded subtrees changes to
+.IR "domain invalid" .
+.PP
+Note that in this case, there is no cgroup whose type becomes
+.IR "domain threaded" .
+(Notionally, the root cgroup can be considered as the threaded root
+for the cgroup whose type was changed to
+.IR threaded .)
+.PP
+The aim of this exceptional treatment for the root cgroup is to
+allow a threaded cgroup that employs the
+.I cpu
+controller to be placed as high as possible in the hierarchy,
+so as to minimize the (small) cost of traversing the cgroup hierarchy.
+.\"
+.SS The cgroups v2 """cpu""" controller and realtime threads
+As at Linux 4.19, the cgroups v2
+.I cpu
+controller does not support control of realtime threads
+(specifically threads scheduled under any of the policies
+.BR SCHED_FIFO ,
+.BR SCHED_RR ,
+described
+.BR SCHED_DEADLINE ;
+see
+.BR sched (7)).
+Therefore, the
+.I cpu
+controller can be enabled in the root cgroup only
+if all realtime threads are in the root cgroup.
+(If there are realtime threads in nonroot cgroups, then a
+.BR write (2)
+of the string
+.I """+cpu"""
+to the
+.I cgroup.subtree_control
+file fails with the error
+.BR EINVAL .)
+.PP
+On some systems,
+.BR systemd (1)
+places certain realtime threads in nonroot cgroups in the v2 hierarchy.
+On such systems,
+these threads must first be moved to the root cgroup before the
+.I cpu
+controller can be enabled.
+.\"
+.SH ERRORS
+The following errors can occur for
+.BR mount (2):
+.TP
+.B EBUSY
+An attempt to mount a cgroup version 1 filesystem specified neither the
+.I name=
+option (to mount a named hierarchy) nor a controller name (or
+.IR all ).
+.SH NOTES
+A child process created via
+.BR fork (2)
+inherits its parent's cgroup memberships.
+A process's cgroup memberships are preserved across
+.BR execve (2).
+.PP
+The
+.BR clone3 (2)
+.B CLONE_INTO_CGROUP
+flag can be used to create a child process that begins its life in
+a different version 2 cgroup from the parent process.
+.\"
+.SS /proc files
+.TP
+.IR /proc/cgroups " (since Linux 2.6.24)"
+This file contains information about the controllers
+that are compiled into the kernel.
+An example of the contents of this file (reformatted for readability)
+is the following:
+.IP
+.in +4n
+.EX
+#subsys_name hierarchy num_cgroups enabled
+cpuset 4 1 1
+cpu 8 1 1
+cpuacct 8 1 1
+blkio 6 1 1
+memory 3 1 1
+devices 10 84 1
+freezer 7 1 1
+net_cls 9 1 1
+perf_event 5 1 1
+net_prio 9 1 1
+hugetlb 0 1 0
+pids 2 1 1
+.EE
+.in
+.IP
+The fields in this file are, from left to right:
+.RS
+.IP [1] 5
+The name of the controller.
+.IP [2]
+The unique ID of the cgroup hierarchy on which this controller is mounted.
+If multiple cgroups v1 controllers are bound to the same hierarchy,
+then each will show the same hierarchy ID in this field.
+The value in this field will be 0 if:
+.RS
+.IP \[bu] 3
+the controller is not mounted on a cgroups v1 hierarchy;
+.IP \[bu]
+the controller is bound to the cgroups v2 single unified hierarchy; or
+.IP \[bu]
+the controller is disabled (see below).
+.RE
+.IP [3]
+The number of control groups in this hierarchy using this controller.
+.IP [4]
+This field contains the value 1 if this controller is enabled,
+or 0 if it has been disabled (via the
+.I cgroup_disable
+kernel command-line boot parameter).
+.RE
+.TP
+.IR /proc/ pid /cgroup " (since Linux 2.6.24)"
+This file describes control groups to which the process
+with the corresponding PID belongs.
+The displayed information differs for
+cgroups version 1 and version 2 hierarchies.
+.IP
+For each cgroup hierarchy of which the process is a member,
+there is one entry containing three colon-separated fields:
+.IP
+.in +4n
+.EX
+hierarchy\-ID:controller\-list:cgroup\-path
+.EE
+.in
+.IP
+For example:
+.IP
+.in +4n
+.EX
+5:cpuacct,cpu,cpuset:/daemons
+.EE
+.in
+.IP
+The colon-separated fields are, from left to right:
+.RS
+.IP [1] 5
+For cgroups version 1 hierarchies,
+this field contains a unique hierarchy ID number
+that can be matched to a hierarchy ID in
+.IR /proc/cgroups .
+For the cgroups version 2 hierarchy, this field contains the value 0.
+.IP [2]
+For cgroups version 1 hierarchies,
+this field contains a comma-separated list of the controllers
+bound to the hierarchy.
+For the cgroups version 2 hierarchy, this field is empty.
+.IP [3]
+This field contains the pathname of the control group in the hierarchy
+to which the process belongs.
+This pathname is relative to the mount point of the hierarchy.
+.RE
+.\"
+.SS /sys/kernel/cgroup files
+.TP
+.IR /sys/kernel/cgroup/delegate " (since Linux 4.15)"
+.\" commit 01ee6cfb1483fe57c9cbd8e73817dfbf9bacffd3
+This file exports a list of the cgroups v2 files
+(one per line) that are delegatable
+(i.e., whose ownership should be changed to the user ID of the delegatee).
+In the future, the set of delegatable files may change or grow,
+and this file provides a way for the kernel to inform
+user-space applications of which files must be delegated.
+As at Linux 4.15, one sees the following when inspecting this file:
+.IP
+.in +4n
+.EX
+$ \fBcat /sys/kernel/cgroup/delegate\fP
+cgroup.procs
+cgroup.subtree_control
+cgroup.threads
+.EE
+.in
+.TP
+.IR /sys/kernel/cgroup/features " (since Linux 4.15)"
+.\" commit 5f2e673405b742be64e7c3604ed4ed3ac14f35ce
+Over time, the set of cgroups v2 features that are provided by the
+kernel may change or grow,
+or some features may not be enabled by default.
+This file provides a way for user-space applications to discover what
+features the running kernel supports and has enabled.
+Features are listed one per line:
+.IP
+.in +4n
+.EX
+$ \fBcat /sys/kernel/cgroup/features\fP
+nsdelegate
+memory_localevents
+.EE
+.in
+.IP
+The entries that can appear in this file are:
+.RS
+.TP
+.IR memory_localevents " (since Linux 5.2)"
+The kernel supports the
+.I memory_localevents
+mount option.
+.TP
+.IR nsdelegate " (since Linux 4.15)"
+The kernel supports the
+.I nsdelegate
+mount option.
+.TP
+.IR memory_recursiveprot " (since Linux 5.7)"
+.\" commit 8a931f801340c2be10552c7b5622d5f4852f3a36
+The kernel supports the
+.I memory_recursiveprot
+mount option.
+.RE
+.SH SEE ALSO
+.BR prlimit (1),
+.BR systemd (1),
+.BR systemd\-cgls (1),
+.BR systemd\-cgtop (1),
+.BR clone (2),
+.BR ioprio_set (2),
+.BR perf_event_open (2),
+.BR setrlimit (2),
+.BR cgroup_namespaces (7),
+.BR cpuset (7),
+.BR namespaces (7),
+.BR sched (7),
+.BR user_namespaces (7)
+.PP
+The kernel source file
+.IR Documentation/admin\-guide/cgroup\-v2.rst .
diff --git a/man7/charsets.7 b/man7/charsets.7
new file mode 100644
index 0000000..0692d8d
--- /dev/null
+++ b/man7/charsets.7
@@ -0,0 +1,335 @@
+.\" Copyright (c) 1996 Eric S. Raymond <esr@thyrsus.com>
+.\" and Copyright (c) Andries Brouwer <aeb@cwi.nl>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" This is combined from many sources, including notes by aeb and
+.\" research by esr. Portions derive from a writeup by Roman Czyborra.
+.\"
+.\" Changes also by David Starner <dstarner98@aasaa.ofe.org>.
+.\"
+.TH charsets 7 2023-03-12 "Linux man-pages 6.05.01"
+.SH NAME
+charsets \- character set standards and internationalization
+.SH DESCRIPTION
+This manual page gives an overview on different character set standards
+and how they were used on Linux before Unicode became ubiquitous.
+Some of this information is still helpful for people working with legacy
+systems and documents.
+.PP
+Standards discussed include such as
+ASCII, GB 2312, ISO 8859, JIS, KOI8-R, KS, and Unicode.
+.PP
+The primary emphasis is on character sets that were actually used by
+locale character sets, not the myriad others that could be found in data
+from other systems.
+.SS ASCII
+ASCII (American Standard Code For Information Interchange) is the original
+7-bit character set, originally designed for American English.
+Also known as US-ASCII.
+It is currently described by the ISO 646:1991 IRV
+(International Reference Version) standard.
+.PP
+Various ASCII variants replacing the dollar sign with other currency
+symbols and replacing punctuation with non-English alphabetic
+characters to cover German, French, Spanish, and others in 7 bits
+emerged.
+All are deprecated;
+glibc does not support locales whose character sets are not true
+supersets of ASCII.
+.PP
+As Unicode, when using UTF-8, is ASCII-compatible, plain ASCII text
+still renders properly on modern UTF-8 using systems.
+.SS ISO 8859
+ISO 8859 is a series of 15 8-bit character sets, all of which have ASCII
+in their low (7-bit) half, invisible control characters in positions
+128 to 159, and 96 fixed-width graphics in positions 160\[en]255.
+.PP
+Of these, the most important is ISO 8859-1
+("Latin Alphabet No. 1" / Latin-1).
+It was widely adopted and supported by different systems,
+and is gradually being replaced with Unicode.
+The ISO 8859-1 characters are also the first 256 characters of Unicode.
+.PP
+Console support for the other 8859 character sets is available under
+Linux through user-mode utilities (such as
+.BR setfont (8))
+that modify keyboard bindings and the EGA graphics
+table and employ the "user mapping" font table in the console
+driver.
+.PP
+Here are brief descriptions of each character set:
+.TP
+8859-1 (Latin-1)
+Latin-1 covers many European languages such as Albanian, Basque,
+Danish, English, Faroese, Galician, Icelandic, Irish, Italian,
+Norwegian, Portuguese, Spanish, and Swedish.
+The lack of the ligatures
+Dutch IJ/ij,
+French œ,
+and „German“ quotation marks
+was considered tolerable.
+.TP
+8859-2 (Latin-2)
+Latin-2 supports many Latin-written Central and East European
+languages such as Bosnian, Croatian, Czech, German, Hungarian, Polish,
+Slovak, and Slovene.
+Replacing Romanian ș/ț with ş/ţ
+was considered tolerable.
+.TP
+8859-3 (Latin-3)
+Latin-3 was designed to cover of Esperanto, Maltese, and Turkish, but
+8859-9 later superseded it for Turkish.
+.TP
+8859-4 (Latin-4)
+Latin-4 introduced letters for North European languages such as
+Estonian, Latvian, and Lithuanian, but was superseded by 8859-10 and
+8859-13.
+.TP
+8859-5
+Cyrillic letters supporting Bulgarian, Byelorussian, Macedonian,
+Russian, Serbian, and (almost completely) Ukrainian.
+It was never widely used, see the discussion of KOI8-R/KOI8-U below.
+.TP
+8859-6
+Was created for Arabic.
+The 8859-6 glyph table is a fixed font of separate
+letter forms, but a proper display engine should combine these
+using the proper initial, medial, and final forms.
+.TP
+8859-7
+Was created for Modern Greek in 1987, updated in 2003.
+.TP
+8859-8
+Supports Modern Hebrew without niqud (punctuation signs).
+Niqud and full-fledged Biblical Hebrew were outside the scope of this
+character set.
+.TP
+8859-9 (Latin-5)
+This is a variant of Latin-1 that replaces Icelandic letters with
+Turkish ones.
+.TP
+8859-10 (Latin-6)
+Latin-6 added the Inuit (Greenlandic) and Sami (Lappish) letters that were
+missing in Latin-4 to cover the entire Nordic area.
+.TP
+8859-11
+Supports the Thai alphabet and is nearly identical to the TIS-620
+standard.
+.TP
+8859-12
+This character set does not exist.
+.TP
+8859-13 (Latin-7)
+Supports the Baltic Rim languages; in particular, it includes Latvian
+characters not found in Latin-4.
+.TP
+8859-14 (Latin-8)
+This is the Celtic character set, covering Old Irish, Manx, Gaelic,
+Welsh, Cornish, and Breton.
+.TP
+8859-15 (Latin-9)
+Latin-9 is similar to the widely used Latin-1 but replaces some less
+common symbols with the Euro sign and French and Finnish letters that
+were missing in Latin-1.
+.TP
+8859-16 (Latin-10)
+This character set covers many Southeast European languages,
+and most importantly supports Romanian more completely than Latin-2.
+.SS KOI8-R / KOI8-U
+KOI8-R is a non-ISO character set popular in Russia before Unicode.
+The lower half is ASCII;
+the upper is a Cyrillic character set somewhat better designed than
+ISO 8859-5.
+KOI8-U, based on KOI8-R, has better support for Ukrainian.
+Neither of these sets are ISO-2022 compatible,
+unlike the ISO 8859 series.
+.PP
+Console support for KOI8-R is available under Linux through user-mode
+utilities that modify keyboard bindings and the EGA graphics table,
+and employ the "user mapping" font table in the console driver.
+.SS GB 2312
+GB 2312 is a mainland Chinese national standard character set used
+to express simplified Chinese.
+Just like JIS X 0208, characters are
+mapped into a 94x94 two-byte matrix used to construct EUC-CN.
+EUC-CN
+is the most important encoding for Linux and includes ASCII and
+GB 2312.
+Note that EUC-CN is often called as GB, GB 2312, or CN-GB.
+.SS Big5
+Big5 was a popular character set in Taiwan to express traditional
+Chinese.
+(Big5 is both a character set and an encoding.)
+It is a superset of ASCII.
+Non-ASCII characters are expressed in two bytes.
+Bytes 0xa1\[en]0xfe are used as leading bytes for two-byte characters.
+Big5 and its extension were widely used in Taiwan and Hong Kong.
+It is not ISO 2022 compliant.
+.\" Thanks to Tomohiro KUBOTA for the following sections about
+.\" national standards.
+.SS JIS X 0208
+JIS X 0208 is a Japanese national standard character set.
+Though there are some more Japanese national standard character sets (like
+JIS X 0201, JIS X 0212, and JIS X 0213), this is the most important one.
+Characters are mapped into a 94x94 two-byte matrix,
+whose each byte is in the range 0x21\[en]0x7e.
+Note that JIS X 0208 is a character set, not an encoding.
+This means that JIS X 0208
+itself is not used for expressing text data.
+JIS X 0208 is used
+as a component to construct encodings such as EUC-JP, Shift_JIS,
+and ISO-2022-JP.
+EUC-JP is the most important encoding for Linux
+and includes ASCII and JIS X 0208.
+In EUC-JP, JIS X 0208
+characters are expressed in two bytes, each of which is the
+JIS X 0208 code plus 0x80.
+.SS KS X 1001
+KS X 1001 is a Korean national standard character set.
+Just as
+JIS X 0208, characters are mapped into a 94x94 two-byte matrix.
+KS X 1001 is used like JIS X 0208, as a component
+to construct encodings such as EUC-KR, Johab, and ISO-2022-KR.
+EUC-KR is the most important encoding for Linux and includes
+ASCII and KS X 1001.
+KS C 5601 is an older name for KS X 1001.
+.SS ISO 2022 and ISO 4873
+The ISO 2022 and 4873 standards describe a font-control model
+based on VT100 practice.
+This model is (partially) supported
+by the Linux kernel and by
+.BR xterm (1).
+Several ISO 2022-based character encodings have been defined,
+especially for Japanese.
+.PP
+There are 4 graphic character sets, called G0, G1, G2, and G3,
+and one of them is the current character set for codes with
+high bit zero (initially G0), and one of them is the current
+character set for codes with high bit one (initially G1).
+Each graphic character set has 94 or 96 characters, and is
+essentially a 7-bit character set.
+It uses codes either
+040\[en]0177 (041\[en]0176) or 0240\[en]0377 (0241\[en]0376).
+G0 always has size 94 and uses codes 041\[en]0176.
+.PP
+Switching between character sets is done using the shift functions
+\fB\[ha]N\fP (SO or LS1), \fB\[ha]O\fP (SI or LS0), ESC n (LS2), ESC o (LS3),
+ESC N (SS2), ESC O (SS3), ESC \[ti] (LS1R), ESC } (LS2R), ESC | (LS3R).
+The function LS\fIn\fP makes character set G\fIn\fP the current one
+for codes with high bit zero.
+The function LS\fIn\fPR makes character set G\fIn\fP the current one
+for codes with high bit one.
+The function SS\fIn\fP makes character set G\fIn\fP (\fIn\fP=2 or 3)
+the current one for the next character only (regardless of the value
+of its high order bit).
+.PP
+A 94-character set is designated as G\fIn\fP character set
+by an escape sequence ESC ( xx (for G0), ESC ) xx (for G1),
+ESC * xx (for G2), ESC + xx (for G3), where xx is a symbol
+or a pair of symbols found in the ISO 2375 International
+Register of Coded Character Sets.
+For example, ESC ( @ selects the ISO 646 character set as G0,
+ESC ( A selects the UK standard character set (with pound
+instead of number sign), ESC ( B selects ASCII (with dollar
+instead of currency sign), ESC ( M selects a character set
+for African languages, ESC ( ! A selects the Cuban character
+set, and so on.
+.PP
+A 96-character set is designated as G\fIn\fP character set
+by an escape sequence ESC \- xx (for G1), ESC . xx (for G2)
+or ESC / xx (for G3).
+For example, ESC \- G selects the Hebrew alphabet as G1.
+.PP
+A multibyte character set is designated as G\fIn\fP character set
+by an escape sequence ESC $ xx or ESC $ ( xx (for G0),
+ESC $ ) xx (for G1), ESC $ * xx (for G2), ESC $ + xx (for G3).
+For example, ESC $ ( C selects the Korean character set for G0.
+The Japanese character set selected by ESC $ B has a more
+recent version selected by ESC & @ ESC $ B.
+.PP
+ISO 4873 stipulates a narrower use of character sets, where G0
+is fixed (always ASCII), so that G1, G2, and G3
+can be invoked only for codes with the high order bit set.
+In particular, \fB\[ha]N\fP and \fB\[ha]O\fP are not used anymore, ESC ( xx
+can be used only with xx=B, and ESC ) xx, ESC * xx, ESC + xx
+are equivalent to ESC \- xx, ESC . xx, ESC / xx, respectively.
+.SS TIS-620
+TIS-620 is a Thai national standard character set and a superset
+of ASCII.
+In the same fashion as the ISO 8859 series, Thai characters are mapped into
+0xa1\[en]0xfe.
+.SS Unicode
+Unicode (ISO/IEC 10646) is a standard which aims to unambiguously represent
+every character in every human language.
+Unicode's structure permits 20.1 bits to encode every character.
+Since most computers don't include 20.1-bit integers, Unicode is
+usually encoded as 32-bit integers internally and either a series of
+16-bit integers (UTF-16) (needing two 16-bit integers only when
+encoding certain rare characters) or a series of 8-bit bytes (UTF-8).
+.PP
+Linux represents Unicode using the 8-bit Unicode Transformation Format
+(UTF-8).
+UTF-8 is a variable length encoding of Unicode.
+It uses 1
+byte to code 7 bits, 2 bytes for 11 bits, 3 bytes for 16 bits, 4 bytes
+for 21 bits, 5 bytes for 26 bits, 6 bytes for 31 bits.
+.PP
+Let 0,1,x stand for a zero, one, or arbitrary bit.
+A byte 0xxxxxxx
+stands for the Unicode 00000000 0xxxxxxx which codes the same symbol
+as the ASCII 0xxxxxxx.
+Thus, ASCII goes unchanged into UTF-8, and
+people using only ASCII do not notice any change: not in code, and not
+in file size.
+.PP
+A byte 110xxxxx is the start of a 2-byte code, and 110xxxxx 10yyyyyy
+is assembled into 00000xxx xxyyyyyy.
+A byte 1110xxxx is the start
+of a 3-byte code, and 1110xxxx 10yyyyyy 10zzzzzz is assembled
+into xxxxyyyy yyzzzzzz.
+(When UTF-8 is used to code the 31-bit ISO/IEC 10646
+then this progression continues up to 6-byte codes.)
+.PP
+For most texts in ISO 8859 character sets, this means that the
+characters outside of ASCII are now coded with two bytes.
+This tends
+to expand ordinary text files by only one or two percent.
+For Russian
+or Greek texts, this expands ordinary text files by 100%, since text in
+those languages is mostly outside of ASCII.
+For Japanese users this means
+that the 16-bit codes now in common use will take three bytes.
+While there are algorithmic conversions from some character sets
+(especially ISO 8859-1) to Unicode, general conversion requires
+carrying around conversion tables, which can be quite large for 16-bit
+codes.
+.PP
+Note that UTF-8 is self-synchronizing:
+10xxxxxx is a tail,
+any other byte is the head of a code.
+Note that the only way ASCII bytes occur in a UTF-8 stream,
+is as themselves.
+In particular,
+there are no embedded NULs (\[aq]\e0\[aq]) or \[aq]/\[aq]s
+that form part of some larger code.
+.PP
+Since ASCII, and, in particular, NUL and \[aq]/\[aq], are unchanged, the
+kernel does not notice that UTF-8 is being used.
+It does not care at
+all what the bytes it is handling stand for.
+.PP
+Rendering of Unicode data streams is typically handled through
+"subfont" tables which map a subset of Unicode to glyphs.
+Internally
+the kernel uses Unicode to describe the subfont loaded in video RAM.
+This means that in the Linux console in UTF-8 mode, one can use a character
+set with 512 different symbols.
+This is not enough for Japanese, Chinese, and
+Korean, but it is enough for most other purposes.
+.SH SEE ALSO
+.BR iconv (1),
+.BR ascii (7),
+.BR iso_8859\-1 (7),
+.BR unicode (7),
+.BR utf\-8 (7)
diff --git a/man7/complex.7 b/man7/complex.7
new file mode 100644
index 0000000..3685a8d
--- /dev/null
+++ b/man7/complex.7
@@ -0,0 +1,83 @@
+.\" Copyright 2002 Walter Harms (walter.harms@informatik.uni-oldenburg.de)
+.\"
+.\" SPDX-License-Identifier: GPL-1.0-or-later
+.\"
+.TH complex 7 2023-07-18 "Linux man-pages 6.05.01"
+.SH NAME
+complex \- basics of complex mathematics
+.SH LIBRARY
+Math library
+.RI ( libm ", " \-lm )
+.SH SYNOPSIS
+.nf
+.B #include <complex.h>
+.fi
+.SH DESCRIPTION
+Complex numbers are numbers of the form z = a+b*i, where a and b are
+real numbers and i = sqrt(\-1), so that i*i = \-1.
+.PP
+There are other ways to represent that number.
+The pair (a,b) of real
+numbers may be viewed as a point in the plane, given by X- and
+Y-coordinates.
+This same point may also be described by giving
+the pair of real numbers (r,phi), where r is the distance to the origin O,
+and phi the angle between the X-axis and the line Oz.
+Now
+z = r*exp(i*phi) = r*(cos(phi)+i*sin(phi)).
+.PP
+The basic operations are defined on z = a+b*i and w = c+d*i as:
+.TP
+.B addition: z+w = (a+c) + (b+d)*i
+.TP
+.B multiplication: z*w = (a*c \- b*d) + (a*d + b*c)*i
+.TP
+.B division: z/w = ((a*c + b*d)/(c*c + d*d)) + ((b*c \- a*d)/(c*c + d*d))*i
+.PP
+Nearly all math function have a complex counterpart but there are
+some complex-only functions.
+.SH EXAMPLES
+Your C-compiler can work with complex numbers if it supports the C99 standard.
+The imaginary unit is represented by I.
+.PP
+.EX
+/* check that exp(i * pi) == \-1 */
+#include <math.h> /* for atan */
+#include <stdio.h>
+#include <complex.h>
+\&
+int
+main(void)
+{
+ double pi = 4 * atan(1.0);
+ double complex z = cexp(I * pi);
+ printf("%f + %f * i\en", creal(z), cimag(z));
+}
+.EE
+.SH SEE ALSO
+.BR cabs (3),
+.BR cacos (3),
+.BR cacosh (3),
+.BR carg (3),
+.BR casin (3),
+.BR casinh (3),
+.BR catan (3),
+.BR catanh (3),
+.BR ccos (3),
+.BR ccosh (3),
+.BR cerf (3),
+.BR cexp (3),
+.BR cexp2 (3),
+.BR cimag (3),
+.BR clog (3),
+.BR clog10 (3),
+.BR clog2 (3),
+.BR conj (3),
+.BR cpow (3),
+.BR cproj (3),
+.BR creal (3),
+.BR csin (3),
+.BR csinh (3),
+.BR csqrt (3),
+.BR ctan (3),
+.BR ctanh (3)
diff --git a/man7/cp1251.7 b/man7/cp1251.7
new file mode 100644
index 0000000..6dd88c2
--- /dev/null
+++ b/man7/cp1251.7
@@ -0,0 +1,166 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH cp1251 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+cp1251 \- CP\ 1251 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The Windows Code Pages include several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+CP\ 1251 encodes the
+characters used in Cyrillic scripts.
+.SS CP\ 1251 characters
+The following table displays the characters in CP\ 1251 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+200 128 80 Ђ CYRILLIC CAPITAL LETTER DJE
+201 129 81 Ѓ CYRILLIC CAPITAL LETTER GJE
+202 130 82 ‚ SINGLE LOW-9 QUOTATION MARK
+203 131 83 ѓ CYRILLIC SMALL LETTER GJE
+204 132 84 „ DOUBLE LOW-9 QUOTATION MARK
+205 133 85 … HORIZONTAL ELLIPSIS
+206 134 86 † DAGGER
+207 135 87 ‡ DOUBLE DAGGER
+210 136 88 € EURO SIGN
+211 137 89 ‰ PER MILLE SIGN
+212 138 8A Љ CYRILLIC CAPITAL LETTER LJE
+213 139 8B ‹ SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+214 140 8C Њ CYRILLIC CAPITAL LETTER NJE
+215 141 8D Ќ CYRILLIC CAPITAL LETTER KJE
+216 142 8E Ћ CYRILLIC CAPITAL LETTER TSHE
+217 143 8F Џ CYRILLIC CAPITAL LETTER DZHE
+220 144 90 ђ CYRILLIC SMALL LETTER DJE
+221 145 91 ‘ LEFT SINGLE QUOTATION MARK
+222 146 92 ’ RIGHT SINGLE QUOTATION MARK
+223 147 93 “ LEFT DOUBLE QUOTATION MARK
+224 148 94 ” RIGHT DOUBLE QUOTATION MARK
+225 149 95 • BULLET
+226 150 96 – EN DASH
+227 151 97 — EM DASH
+230 152 98 UNDEFINED
+231 153 99 ™ TRADE MARK SIGN
+232 154 9A љ CYRILLIC SMALL LETTER LJE
+233 155 9B › SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+234 156 9C њ CYRILLIC SMALL LETTER NJE
+235 157 9D ќ CYRILLIC SMALL LETTER KJE
+236 158 9E ћ CYRILLIC SMALL LETTER TSHE
+237 159 9F џ CYRILLIC SMALL LETTER DZHE
+240 160 A0   NO-BREAK SPACE
+241 161 A1 Ў CYRILLIC CAPITAL LETTER SHORT U
+242 162 A2 ў CYRILLIC SMALL LETTER SHORT U
+243 163 A3 Ј CYRILLIC CAPITAL LETTER JE
+244 164 A4 ¤ CURRENCY SIGN
+245 165 A5 Ґ CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+246 166 A6 ¦ BROKEN BAR
+247 167 A7 § SECTION SIGN
+250 168 A8 Ё CYRILLIC CAPITAL LETTER IO
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA Є CYRILLIC CAPITAL LETTER UKRAINIAN IE
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC ¬ NOT SIGN
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ® REGISTERED SIGN
+257 175 AF Ї CYRILLIC CAPITAL LETTER YI
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 І T{
+CYRILLIC CAPITAL LETTER
+.br
+BYELORUSSIAN-UKRAINIAN I
+T}
+263 179 B3 і CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+264 180 B4 ґ CYRILLIC SMALL LETTER GHE WITH UPTURN
+265 181 B5 µ MICRO SIGN
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 · MIDDLE DOT
+270 184 B8 ё CYRILLIC SMALL LETTER IO
+271 185 B9 № NUMERO SIGN
+272 186 BA є CYRILLIC SMALL LETTER UKRAINIAN IE
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC ј CYRILLIC SMALL LETTER JE
+275 189 BD Ѕ CYRILLIC CAPITAL LETTER DZE
+276 190 BE ѕ CYRILLIC SMALL LETTER DZE
+277 191 BF ї CYRILLIC SMALL LETTER YI
+300 192 C0 А CYRILLIC CAPITAL LETTER A
+301 193 C1 Б CYRILLIC CAPITAL LETTER BE
+302 194 C2 В CYRILLIC CAPITAL LETTER VE
+303 195 C3 Г CYRILLIC CAPITAL LETTER GHE
+304 196 C4 Д CYRILLIC CAPITAL LETTER DE
+305 197 C5 Е CYRILLIC CAPITAL LETTER IE
+306 198 C6 Ж CYRILLIC CAPITAL LETTER ZHE
+307 199 C7 З CYRILLIC CAPITAL LETTER ZE
+310 200 C8 И CYRILLIC CAPITAL LETTER I
+311 201 C9 Й CYRILLIC CAPITAL LETTER SHORT I
+312 202 CA К CYRILLIC CAPITAL LETTER KA
+313 203 CB Л CYRILLIC CAPITAL LETTER EL
+314 204 CC М CYRILLIC CAPITAL LETTER EM
+315 205 CD Н CYRILLIC CAPITAL LETTER EN
+316 206 CE О CYRILLIC CAPITAL LETTER O
+317 207 CF П CYRILLIC CAPITAL LETTER PE
+320 208 D0 Р CYRILLIC CAPITAL LETTER ER
+321 209 D1 С CYRILLIC CAPITAL LETTER ES
+322 210 D2 Т CYRILLIC CAPITAL LETTER TE
+323 211 D3 У CYRILLIC CAPITAL LETTER U
+324 212 D4 Ф CYRILLIC CAPITAL LETTER EF
+325 213 D5 Х CYRILLIC CAPITAL LETTER HA
+326 214 D6 Ц CYRILLIC CAPITAL LETTER TSE
+327 215 D7 Ч CYRILLIC CAPITAL LETTER CHE
+330 216 D8 Ш CYRILLIC CAPITAL LETTER SHA
+331 217 D9 Щ CYRILLIC CAPITAL LETTER SHCHA
+332 218 DA Ъ CYRILLIC CAPITAL LETTER HARD SIGN
+333 219 DB Ы CYRILLIC CAPITAL LETTER YERU
+334 220 DC Ь CYRILLIC CAPITAL LETTER SOFT SIGN
+335 221 DD Э CYRILLIC CAPITAL LETTER E
+336 222 DE Ю CYRILLIC CAPITAL LETTER YU
+337 223 DF Я CYRILLIC CAPITAL LETTER YA
+340 224 E0 а CYRILLIC SMALL LETTER A
+341 225 E1 б CYRILLIC SMALL LETTER BE
+342 226 E2 в CYRILLIC SMALL LETTER VE
+343 227 E3 г CYRILLIC SMALL LETTER GHE
+344 228 E4 д CYRILLIC SMALL LETTER DE
+345 229 E5 е CYRILLIC SMALL LETTER IE
+346 230 E6 ж CYRILLIC SMALL LETTER ZHE
+347 231 E7 з CYRILLIC SMALL LETTER ZE
+350 232 E8 и CYRILLIC SMALL LETTER I
+351 233 E9 й CYRILLIC SMALL LETTER SHORT I
+352 234 EA к CYRILLIC SMALL LETTER KA
+353 235 EB л CYRILLIC SMALL LETTER EL
+354 236 EC м CYRILLIC SMALL LETTER EM
+355 237 ED н CYRILLIC SMALL LETTER EN
+356 238 EE о CYRILLIC SMALL LETTER O
+357 239 EF п CYRILLIC SMALL LETTER PE
+360 240 F0 р CYRILLIC SMALL LETTER ER
+361 241 F1 с CYRILLIC SMALL LETTER ES
+362 242 F2 т CYRILLIC SMALL LETTER TE
+363 243 F3 у CYRILLIC SMALL LETTER U
+364 244 F4 ф CYRILLIC SMALL LETTER EF
+365 245 F5 х CYRILLIC SMALL LETTER HA
+366 246 F6 ц CYRILLIC SMALL LETTER TSE
+367 247 F7 ч CYRILLIC SMALL LETTER CHE
+370 248 F8 ш CYRILLIC SMALL LETTER SHA
+371 249 F9 щ CYRILLIC SMALL LETTER SHCHA
+372 250 FA ъ CYRILLIC SMALL LETTER HARD SIGN
+373 251 FB ы CYRILLIC SMALL LETTER YERU
+374 252 FC ь CYRILLIC SMALL LETTER SOFT SIGN
+375 253 FD э CYRILLIC SMALL LETTER E
+376 254 FE ю CYRILLIC SMALL LETTER YU
+377 255 FF я CYRILLIC SMALL LETTER YA
+.TE
+.SH NOTES
+CP\ 1251 is also known as Windows Cyrillic.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR cp1252 (7),
+.BR iso_8859\-5 (7),
+.BR koi8\-r (7),
+.BR koi8\-u (7),
+.BR utf\-8 (7)
diff --git a/man7/cp1252.7 b/man7/cp1252.7
new file mode 100644
index 0000000..2522b1d
--- /dev/null
+++ b/man7/cp1252.7
@@ -0,0 +1,156 @@
+'\" t
+.\" Copyright 2014 (C) Marko Myllynen <myllynen@redhat.com>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH cp1252 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+cp1252 \- CP\ 1252 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The Windows Code Pages include several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+CP\ 1252 encodes the
+characters used in many West European languages.
+.SS CP\ 1252 characters
+The following table displays the characters in CP\ 1252 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+200 128 80 € EURO SIGN
+202 130 82 ‚ SINGLE LOW-9 QUOTATION MARK
+203 131 83 ƒ LATIN SMALL LETTER F WITH HOOK
+204 132 84 „ DOUBLE LOW-9 QUOTATION MARK
+205 133 85 … HORIZONTAL ELLIPSIS
+206 134 86 † DAGGER
+207 135 87 ‡ DOUBLE DAGGER
+210 136 88 ˆ MODIFIER LETTER CIRCUMFLEX ACCENT
+211 137 89 ‰ PER MILLE SIGN
+212 138 8A Š LATIN CAPITAL LETTER S WITH CARON
+213 139 8B ‹ SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+214 140 8C ΠLATIN CAPITAL LIGATURE OE
+216 142 8E Ž LATIN CAPITAL LETTER Z WITH CARON
+221 145 91 ‘ LEFT SINGLE QUOTATION MARK
+222 146 92 ’ RIGHT SINGLE QUOTATION MARK
+223 147 93 “ LEFT DOUBLE QUOTATION MARK
+224 148 94 ” RIGHT DOUBLE QUOTATION MARK
+225 149 95 • BULLET
+226 150 96 – EN DASH
+227 151 97 — EM DASH
+230 152 98 ˜ SMALL TILDE
+231 153 99 ™ TRADE MARK SIGN
+232 154 9A š LATIN SMALL LETTER S WITH CARON
+233 155 9B › SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+234 156 9C œ LATIN SMALL LIGATURE OE
+236 158 9E ž LATIN SMALL LETTER Z WITH CARON
+237 159 9F Ÿ LATIN CAPITAL LETTER Y WITH DIAERESIS
+240 160 A0   NO-BREAK SPACE
+241 161 A1 ¡ INVERTED EXCLAMATION MARK
+242 162 A2 ¢ CENT SIGN
+243 163 A3 £ POUND SIGN
+244 164 A4 ¤ CURRENCY SIGN
+245 165 A5 ¥ YEN SIGN
+246 166 A6 ¦ BROKEN BAR
+247 167 A7 § SECTION SIGN
+250 168 A8 ¨ DIAERESIS
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA ª FEMININE ORDINAL INDICATOR
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC ¬ NOT SIGN
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ® REGISTERED SIGN
+257 175 AF ¯ MACRON
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 ² SUPERSCRIPT TWO
+263 179 B3 ³ SUPERSCRIPT THREE
+264 180 B4 ´ ACUTE ACCENT
+265 181 B5 µ MICRO SIGN
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 · MIDDLE DOT
+270 184 B8 ¸ CEDILLA
+271 185 B9 ¹ SUPERSCRIPT ONE
+272 186 BA º MASCULINE ORDINAL INDICATOR
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC ¼ VULGAR FRACTION ONE QUARTER
+275 189 BD ½ VULGAR FRACTION ONE HALF
+276 190 BE ¾ VULGAR FRACTION THREE QUARTERS
+277 191 BF ¿ INVERTED QUESTION MARK
+300 192 C0 À LATIN CAPITAL LETTER A WITH GRAVE
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ã LATIN CAPITAL LETTER A WITH TILDE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Å LATIN CAPITAL LETTER A WITH RING ABOVE
+306 198 C6 Æ LATIN CAPITAL LETTER AE
+307 199 C7 Ç LATIN CAPITAL LETTER C WITH CEDILLA
+310 200 C8 È LATIN CAPITAL LETTER E WITH GRAVE
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ì LATIN CAPITAL LETTER I WITH GRAVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ï LATIN CAPITAL LETTER I WITH DIAERESIS
+320 208 D0 Ð LATIN CAPITAL LETTER ETH
+321 209 D1 Ñ LATIN CAPITAL LETTER N WITH TILDE
+322 210 D2 Ò LATIN CAPITAL LETTER O WITH GRAVE
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Õ LATIN CAPITAL LETTER O WITH TILDE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 × MULTIPLICATION SIGN
+330 216 D8 Ø LATIN CAPITAL LETTER O WITH STROKE
+331 217 D9 Ù LATIN CAPITAL LETTER U WITH GRAVE
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ý LATIN CAPITAL LETTER Y WITH ACUTE
+336 222 DE Þ LATIN CAPITAL LETTER THORN
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 à LATIN SMALL LETTER A WITH GRAVE
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ã LATIN SMALL LETTER A WITH TILDE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 å LATIN SMALL LETTER A WITH RING ABOVE
+346 230 E6 æ LATIN SMALL LETTER AE
+347 231 E7 ç LATIN SMALL LETTER C WITH CEDILLA
+350 232 E8 è LATIN SMALL LETTER E WITH GRAVE
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ê LATIN SMALL LETTER E WITH CIRCUMFLEX
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ì LATIN SMALL LETTER I WITH GRAVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ï LATIN SMALL LETTER I WITH DIAERESIS
+360 240 F0 ð LATIN SMALL LETTER ETH
+361 241 F1 ñ LATIN SMALL LETTER N WITH TILDE
+362 242 F2 ò LATIN SMALL LETTER O WITH GRAVE
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 õ LATIN SMALL LETTER O WITH TILDE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ÷ DIVISION SIGN
+370 248 F8 ø LATIN SMALL LETTER O WITH STROKE
+371 249 F9 ù LATIN SMALL LETTER U WITH GRAVE
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ý LATIN SMALL LETTER Y WITH ACUTE
+376 254 FE þ LATIN SMALL LETTER THORN
+377 255 FF ÿ LATIN SMALL LETTER Y WITH DIAERESIS
+.TE
+.SH NOTES
+CP\ 1252 is also known as Windows-1252.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR cp1251 (7),
+.BR iso_8859\-1 (7),
+.BR iso_8859\-15 (7),
+.BR utf\-8 (7)
diff --git a/man7/cpuset.7 b/man7/cpuset.7
new file mode 100644
index 0000000..800e4da
--- /dev/null
+++ b/man7/cpuset.7
@@ -0,0 +1,1504 @@
+.\" Copyright (c) 2008 Silicon Graphics, Inc.
+.\"
+.\" Author: Paul Jackson (http://oss.sgi.com/projects/cpusets)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-only
+.\"
+.TH cpuset 7 2023-07-18 "Linux man-pages 6.05.01"
+.SH NAME
+cpuset \- confine processes to processor and memory node subsets
+.SH DESCRIPTION
+The cpuset filesystem is a pseudo-filesystem interface
+to the kernel cpuset mechanism,
+which is used to control the processor placement
+and memory placement of processes.
+It is commonly mounted at
+.IR /dev/cpuset .
+.PP
+On systems with kernels compiled with built in support for cpusets,
+all processes are attached to a cpuset, and cpusets are always present.
+If a system supports cpusets, then it will have the entry
+.B nodev cpuset
+in the file
+.IR /proc/filesystems .
+By mounting the cpuset filesystem (see the
+.B EXAMPLES
+section below),
+the administrator can configure the cpusets on a system
+to control the processor and memory placement of processes
+on that system.
+By default, if the cpuset configuration
+on a system is not modified or if the cpuset filesystem
+is not even mounted, then the cpuset mechanism,
+though present, has no effect on the system's behavior.
+.PP
+A cpuset defines a list of CPUs and memory nodes.
+.PP
+The CPUs of a system include all the logical processing
+units on which a process can execute, including, if present,
+multiple processor cores within a package and Hyper-Threads
+within a processor core.
+Memory nodes include all distinct
+banks of main memory; small and SMP systems typically have
+just one memory node that contains all the system's main memory,
+while NUMA (non-uniform memory access) systems have multiple memory nodes.
+.PP
+Cpusets are represented as directories in a hierarchical
+pseudo-filesystem, where the top directory in the hierarchy
+.RI ( /dev/cpuset )
+represents the entire system (all online CPUs and memory nodes)
+and any cpuset that is the child (descendant) of
+another parent cpuset contains a subset of that parent's
+CPUs and memory nodes.
+The directories and files representing cpusets have normal
+filesystem permissions.
+.PP
+Every process in the system belongs to exactly one cpuset.
+A process is confined to run only on the CPUs in
+the cpuset it belongs to, and to allocate memory only
+on the memory nodes in that cpuset.
+When a process
+.BR fork (2)s,
+the child process is placed in the same cpuset as its parent.
+With sufficient privilege, a process may be moved from one
+cpuset to another and the allowed CPUs and memory nodes
+of an existing cpuset may be changed.
+.PP
+When the system begins booting, a single cpuset is
+defined that includes all CPUs and memory nodes on the
+system, and all processes are in that cpuset.
+During the boot process, or later during normal system operation,
+other cpusets may be created, as subdirectories of this top cpuset,
+under the control of the system administrator,
+and processes may be placed in these other cpusets.
+.PP
+Cpusets are integrated with the
+.BR sched_setaffinity (2)
+scheduling affinity mechanism and the
+.BR mbind (2)
+and
+.BR set_mempolicy (2)
+memory-placement mechanisms in the kernel.
+Neither of these mechanisms let a process make use
+of a CPU or memory node that is not allowed by that process's cpuset.
+If changes to a process's cpuset placement conflict with these
+other mechanisms, then cpuset placement is enforced
+even if it means overriding these other mechanisms.
+The kernel accomplishes this overriding by silently
+restricting the CPUs and memory nodes requested by
+these other mechanisms to those allowed by the
+invoking process's cpuset.
+This can result in these
+other calls returning an error, if for example, such
+a call ends up requesting an empty set of CPUs or
+memory nodes, after that request is restricted to
+the invoking process's cpuset.
+.PP
+Typically, a cpuset is used to manage
+the CPU and memory-node confinement for a set of
+cooperating processes such as a batch scheduler job, and these
+other mechanisms are used to manage the placement of
+individual processes or memory regions within that set or job.
+.SH FILES
+Each directory below
+.I /dev/cpuset
+represents a cpuset and contains a fixed set of pseudo-files
+describing the state of that cpuset.
+.PP
+New cpusets are created using the
+.BR mkdir (2)
+system call or the
+.BR mkdir (1)
+command.
+The properties of a cpuset, such as its flags, allowed
+CPUs and memory nodes, and attached processes, are queried and modified
+by reading or writing to the appropriate file in that cpuset's directory,
+as listed below.
+.PP
+The pseudo-files in each cpuset directory are automatically created when
+the cpuset is created, as a result of the
+.BR mkdir (2)
+invocation.
+It is not possible to directly add or remove these pseudo-files.
+.PP
+A cpuset directory that contains no child cpuset directories,
+and has no attached processes, can be removed using
+.BR rmdir (2)
+or
+.BR rmdir (1).
+It is not necessary, or possible,
+to remove the pseudo-files inside the directory before removing it.
+.PP
+The pseudo-files in each cpuset directory are
+small text files that may be read and
+written using traditional shell utilities such as
+.BR cat (1),
+and
+.BR echo (1),
+or from a program by using file I/O library functions or system calls,
+such as
+.BR open (2),
+.BR read (2),
+.BR write (2),
+and
+.BR close (2).
+.PP
+The pseudo-files in a cpuset directory represent internal kernel
+state and do not have any persistent image on disk.
+Each of these per-cpuset files is listed and described below.
+.\" ====================== tasks ======================
+.TP
+.I tasks
+List of the process IDs (PIDs) of the processes in that cpuset.
+The list is formatted as a series of ASCII
+decimal numbers, each followed by a newline.
+A process may be added to a cpuset (automatically removing
+it from the cpuset that previously contained it) by writing its
+PID to that cpuset's
+.I tasks
+file (with or without a trailing newline).
+.IP
+.B Warning:
+only one PID may be written to the
+.I tasks
+file at a time.
+If a string is written that contains more
+than one PID, only the first one will be used.
+.\" =================== notify_on_release ===================
+.TP
+.I notify_on_release
+Flag (0 or 1).
+If set (1), that cpuset will receive special handling
+after it is released, that is, after all processes cease using
+it (i.e., terminate or are moved to a different cpuset)
+and all child cpuset directories have been removed.
+See the \fBNotify On Release\fR section, below.
+.\" ====================== cpus ======================
+.TP
+.I cpuset.cpus
+List of the physical numbers of the CPUs on which processes
+in that cpuset are allowed to execute.
+See \fBList Format\fR below for a description of the
+format of
+.IR cpus .
+.IP
+The CPUs allowed to a cpuset may be changed by
+writing a new list to its
+.I cpus
+file.
+.\" ==================== cpu_exclusive ====================
+.TP
+.I cpuset.cpu_exclusive
+Flag (0 or 1).
+If set (1), the cpuset has exclusive use of
+its CPUs (no sibling or cousin cpuset may overlap CPUs).
+By default, this is off (0).
+Newly created cpusets also initially default this to off (0).
+.IP
+Two cpusets are
+.I sibling
+cpusets if they share the same parent cpuset in the
+.I /dev/cpuset
+hierarchy.
+Two cpusets are
+.I cousin
+cpusets if neither is the ancestor of the other.
+Regardless of the
+.I cpu_exclusive
+setting, if one cpuset is the ancestor of another,
+and if both of these cpusets have nonempty
+.IR cpus ,
+then their
+.I cpus
+must overlap, because the
+.I cpus
+of any cpuset are always a subset of the
+.I cpus
+of its parent cpuset.
+.\" ====================== mems ======================
+.TP
+.I cpuset.mems
+List of memory nodes on which processes in this cpuset are
+allowed to allocate memory.
+See \fBList Format\fR below for a description of the
+format of
+.IR mems .
+.\" ==================== mem_exclusive ====================
+.TP
+.I cpuset.mem_exclusive
+Flag (0 or 1).
+If set (1), the cpuset has exclusive use of
+its memory nodes (no sibling or cousin may overlap).
+Also if set (1), the cpuset is a \fBHardwall\fR cpuset (see below).
+By default, this is off (0).
+Newly created cpusets also initially default this to off (0).
+.IP
+Regardless of the
+.I mem_exclusive
+setting, if one cpuset is the ancestor of another,
+then their memory nodes must overlap, because the memory
+nodes of any cpuset are always a subset of the memory nodes
+of that cpuset's parent cpuset.
+.\" ==================== mem_hardwall ====================
+.TP
+.IR cpuset.mem_hardwall " (since Linux 2.6.26)"
+Flag (0 or 1).
+If set (1), the cpuset is a \fBHardwall\fR cpuset (see below).
+Unlike \fBmem_exclusive\fR,
+there is no constraint on whether cpusets
+marked \fBmem_hardwall\fR may have overlapping
+memory nodes with sibling or cousin cpusets.
+By default, this is off (0).
+Newly created cpusets also initially default this to off (0).
+.\" ==================== memory_migrate ====================
+.TP
+.IR cpuset.memory_migrate " (since Linux 2.6.16)"
+Flag (0 or 1).
+If set (1), then memory migration is enabled.
+By default, this is off (0).
+See the \fBMemory Migration\fR section, below.
+.\" ==================== memory_pressure ====================
+.TP
+.IR cpuset.memory_pressure " (since Linux 2.6.16)"
+A measure of how much memory pressure the processes in this
+cpuset are causing.
+See the \fBMemory Pressure\fR section, below.
+Unless
+.I memory_pressure_enabled
+is enabled, always has value zero (0).
+This file is read-only.
+See the
+.B WARNINGS
+section, below.
+.\" ================= memory_pressure_enabled =================
+.TP
+.IR cpuset.memory_pressure_enabled " (since Linux 2.6.16)"
+Flag (0 or 1).
+This file is present only in the root cpuset, normally
+.IR /dev/cpuset .
+If set (1), the
+.I memory_pressure
+calculations are enabled for all cpusets in the system.
+By default, this is off (0).
+See the
+\fBMemory Pressure\fR section, below.
+.\" ================== memory_spread_page ==================
+.TP
+.IR cpuset.memory_spread_page " (since Linux 2.6.17)"
+Flag (0 or 1).
+If set (1), pages in the kernel page cache
+(filesystem buffers) are uniformly spread across the cpuset.
+By default, this is off (0) in the top cpuset,
+and inherited from the parent cpuset in
+newly created cpusets.
+See the \fBMemory Spread\fR section, below.
+.\" ================== memory_spread_slab ==================
+.TP
+.IR cpuset.memory_spread_slab " (since Linux 2.6.17)"
+Flag (0 or 1).
+If set (1), the kernel slab caches
+for file I/O (directory and inode structures) are
+uniformly spread across the cpuset.
+By default, is off (0) in the top cpuset,
+and inherited from the parent cpuset in
+newly created cpusets.
+See the \fBMemory Spread\fR section, below.
+.\" ================== sched_load_balance ==================
+.TP
+.IR cpuset.sched_load_balance " (since Linux 2.6.24)"
+Flag (0 or 1).
+If set (1, the default) the kernel will
+automatically load balance processes in that cpuset over
+the allowed CPUs in that cpuset.
+If cleared (0) the
+kernel will avoid load balancing processes in this cpuset,
+.I unless
+some other cpuset with overlapping CPUs has its
+.I sched_load_balance
+flag set.
+See \fBScheduler Load Balancing\fR, below, for further details.
+.\" ================== sched_relax_domain_level ==================
+.TP
+.IR cpuset.sched_relax_domain_level " (since Linux 2.6.26)"
+Integer, between \-1 and a small positive value.
+The
+.I sched_relax_domain_level
+controls the width of the range of CPUs over which the kernel scheduler
+performs immediate rebalancing of runnable tasks across CPUs.
+If
+.I sched_load_balance
+is disabled, then the setting of
+.I sched_relax_domain_level
+does not matter, as no such load balancing is done.
+If
+.I sched_load_balance
+is enabled, then the higher the value of the
+.IR sched_relax_domain_level ,
+the wider
+the range of CPUs over which immediate load balancing is attempted.
+See \fBScheduler Relax Domain Level\fR, below, for further details.
+.\" ================== proc cpuset ==================
+.PP
+In addition to the above pseudo-files in each directory below
+.IR /dev/cpuset ,
+each process has a pseudo-file,
+.IR /proc/ pid /cpuset ,
+that displays the path of the process's cpuset directory
+relative to the root of the cpuset filesystem.
+.\" ================== proc status ==================
+.PP
+Also the
+.IR /proc/ pid /status
+file for each process has four added lines,
+displaying the process's
+.I Cpus_allowed
+(on which CPUs it may be scheduled) and
+.I Mems_allowed
+(on which memory nodes it may obtain memory),
+in the two formats \fBMask Format\fR and \fBList Format\fR (see below)
+as shown in the following example:
+.PP
+.in +4n
+.EX
+Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff
+Cpus_allowed_list: 0\-127
+Mems_allowed: ffffffff,ffffffff
+Mems_allowed_list: 0\-63
+.EE
+.in
+.PP
+The "allowed" fields were added in Linux 2.6.24;
+the "allowed_list" fields were added in Linux 2.6.26.
+.\" ================== EXTENDED CAPABILITIES ==================
+.SH EXTENDED CAPABILITIES
+In addition to controlling which
+.I cpus
+and
+.I mems
+a process is allowed to use, cpusets provide the following
+extended capabilities.
+.\" ================== Exclusive Cpusets ==================
+.SS Exclusive cpusets
+If a cpuset is marked
+.I cpu_exclusive
+or
+.IR mem_exclusive ,
+no other cpuset, other than a direct ancestor or descendant,
+may share any of the same CPUs or memory nodes.
+.PP
+A cpuset that is
+.I mem_exclusive
+restricts kernel allocations for
+buffer cache pages and other internal kernel data pages
+commonly shared by the kernel across
+multiple users.
+All cpusets, whether
+.I mem_exclusive
+or not, restrict allocations of memory for user space.
+This enables configuring a
+system so that several independent jobs can share common kernel data,
+while isolating each job's user allocation in
+its own cpuset.
+To do this, construct a large
+.I mem_exclusive
+cpuset to hold all the jobs, and construct child,
+.RI non- mem_exclusive
+cpusets for each individual job.
+Only a small amount of kernel memory,
+such as requests from interrupt handlers, is allowed to be
+placed on memory nodes
+outside even a
+.I mem_exclusive
+cpuset.
+.\" ================== Hardwall ==================
+.SS Hardwall
+A cpuset that has
+.I mem_exclusive
+or
+.I mem_hardwall
+set is a
+.I hardwall
+cpuset.
+A
+.I hardwall
+cpuset restricts kernel allocations for page, buffer,
+and other data commonly shared by the kernel across multiple users.
+All cpusets, whether
+.I hardwall
+or not, restrict allocations of memory for user space.
+.PP
+This enables configuring a system so that several independent
+jobs can share common kernel data, such as filesystem pages,
+while isolating each job's user allocation in its own cpuset.
+To do this, construct a large
+.I hardwall
+cpuset to hold
+all the jobs, and construct child cpusets for each individual
+job which are not
+.I hardwall
+cpusets.
+.PP
+Only a small amount of kernel memory, such as requests from
+interrupt handlers, is allowed to be taken outside even a
+.I hardwall
+cpuset.
+.\" ================== Notify On Release ==================
+.SS Notify on release
+If the
+.I notify_on_release
+flag is enabled (1) in a cpuset,
+then whenever the last process in the cpuset leaves
+(exits or attaches to some other cpuset)
+and the last child cpuset of that cpuset is removed,
+the kernel will run the command
+.IR /sbin/cpuset_release_agent ,
+supplying the pathname (relative to the mount point of the
+cpuset filesystem) of the abandoned cpuset.
+This enables automatic removal of abandoned cpusets.
+.PP
+The default value of
+.I notify_on_release
+in the root cpuset at system boot is disabled (0).
+The default value of other cpusets at creation
+is the current value of their parent's
+.I notify_on_release
+setting.
+.PP
+The command
+.I /sbin/cpuset_release_agent
+is invoked, with the name
+.RI ( /dev/cpuset
+relative path)
+of the to-be-released cpuset in
+.IR argv[1] .
+.PP
+The usual contents of the command
+.I /sbin/cpuset_release_agent
+is simply the shell script:
+.PP
+.in +4n
+.EX
+#!/bin/sh
+rmdir /dev/cpuset/$1
+.EE
+.in
+.PP
+As with other flag values below, this flag can
+be changed by writing an ASCII
+number 0 or 1 (with optional trailing newline)
+into the file, to clear or set the flag, respectively.
+.\" ================== Memory Pressure ==================
+.SS Memory pressure
+The
+.I memory_pressure
+of a cpuset provides a simple per-cpuset running average of
+the rate that the processes in a cpuset are attempting to free up in-use
+memory on the nodes of the cpuset to satisfy additional memory requests.
+.PP
+This enables batch managers that are monitoring jobs running in dedicated
+cpusets to efficiently detect what level of memory pressure that job
+is causing.
+.PP
+This is useful both on tightly managed systems running a wide mix of
+submitted jobs, which may choose to terminate or reprioritize jobs that
+are trying to use more memory than allowed on the nodes assigned them,
+and with tightly coupled, long-running, massively parallel scientific
+computing jobs that will dramatically fail to meet required performance
+goals if they start to use more memory than allowed to them.
+.PP
+This mechanism provides a very economical way for the batch manager
+to monitor a cpuset for signs of memory pressure.
+It's up to the batch manager or other user code to decide
+what action to take if it detects signs of memory pressure.
+.PP
+Unless memory pressure calculation is enabled by setting the pseudo-file
+.IR /dev/cpuset/cpuset.memory_pressure_enabled ,
+it is not computed for any cpuset, and reads from any
+.I memory_pressure
+always return zero, as represented by the ASCII string "0\en".
+See the \fBWARNINGS\fR section, below.
+.PP
+A per-cpuset, running average is employed for the following reasons:
+.IP \[bu] 3
+Because this meter is per-cpuset rather than per-process or per virtual
+memory region, the system load imposed by a batch scheduler monitoring
+this metric is sharply reduced on large systems, because a scan of
+the tasklist can be avoided on each set of queries.
+.IP \[bu]
+Because this meter is a running average rather than an accumulating
+counter, a batch scheduler can detect memory pressure with a
+single read, instead of having to read and accumulate results
+for a period of time.
+.IP \[bu]
+Because this meter is per-cpuset rather than per-process,
+the batch scheduler can obtain the key information\[em]memory
+pressure in a cpuset\[em]with a single read, rather than having to
+query and accumulate results over all the (dynamically changing)
+set of processes in the cpuset.
+.PP
+The
+.I memory_pressure
+of a cpuset is calculated using a per-cpuset simple digital filter
+that is kept within the kernel.
+For each cpuset, this filter tracks
+the recent rate at which processes attached to that cpuset enter the
+kernel direct reclaim code.
+.PP
+The kernel direct reclaim code is entered whenever a process has to
+satisfy a memory page request by first finding some other page to
+repurpose, due to lack of any readily available already free pages.
+Dirty filesystem pages are repurposed by first writing them
+to disk.
+Unmodified filesystem buffer pages are repurposed
+by simply dropping them, though if that page is needed again, it
+will have to be reread from disk.
+.PP
+The
+.I cpuset.memory_pressure
+file provides an integer number representing the recent (half-life of
+10 seconds) rate of entries to the direct reclaim code caused by any
+process in the cpuset, in units of reclaims attempted per second,
+times 1000.
+.\" ================== Memory Spread ==================
+.SS Memory spread
+There are two Boolean flag files per cpuset that control where the
+kernel allocates pages for the filesystem buffers and related
+in-kernel data structures.
+They are called
+.I cpuset.memory_spread_page
+and
+.IR cpuset.memory_spread_slab .
+.PP
+If the per-cpuset Boolean flag file
+.I cpuset.memory_spread_page
+is set, then
+the kernel will spread the filesystem buffers (page cache) evenly
+over all the nodes that the faulting process is allowed to use, instead
+of preferring to put those pages on the node where the process is running.
+.PP
+If the per-cpuset Boolean flag file
+.I cpuset.memory_spread_slab
+is set,
+then the kernel will spread some filesystem-related slab caches,
+such as those for inodes and directory entries, evenly over all the nodes
+that the faulting process is allowed to use, instead of preferring to
+put those pages on the node where the process is running.
+.PP
+The setting of these flags does not affect the data segment
+(see
+.BR brk (2))
+or stack segment pages of a process.
+.PP
+By default, both kinds of memory spreading are off and the kernel
+prefers to allocate memory pages on the node local to where the
+requesting process is running.
+If that node is not allowed by the
+process's NUMA memory policy or cpuset configuration or if there are
+insufficient free memory pages on that node, then the kernel looks
+for the nearest node that is allowed and has sufficient free memory.
+.PP
+When new cpusets are created, they inherit the memory spread settings
+of their parent.
+.PP
+Setting memory spreading causes allocations for the affected page or
+slab caches to ignore the process's NUMA memory policy and be spread
+instead.
+However, the effect of these changes in memory placement
+caused by cpuset-specified memory spreading is hidden from the
+.BR mbind (2)
+or
+.BR set_mempolicy (2)
+calls.
+These two NUMA memory policy calls always appear to behave as if
+no cpuset-specified memory spreading is in effect, even if it is.
+If cpuset memory spreading is subsequently turned off, the NUMA
+memory policy most recently specified by these calls is automatically
+reapplied.
+.PP
+Both
+.I cpuset.memory_spread_page
+and
+.I cpuset.memory_spread_slab
+are Boolean flag files.
+By default, they contain "0", meaning that the feature is off
+for that cpuset.
+If a "1" is written to that file, that turns the named feature on.
+.PP
+Cpuset-specified memory spreading behaves similarly to what is known
+(in other contexts) as round-robin or interleave memory placement.
+.PP
+Cpuset-specified memory spreading can provide substantial performance
+improvements for jobs that:
+.IP \[bu] 3
+need to place thread-local data on
+memory nodes close to the CPUs which are running the threads that most
+frequently access that data; but also
+.IP \[bu]
+need to access large filesystem data sets that must to be spread
+across the several nodes in the job's cpuset in order to fit.
+.PP
+Without this policy,
+the memory allocation across the nodes in the job's cpuset
+can become very uneven,
+especially for jobs that might have just a single
+thread initializing or reading in the data set.
+.\" ================== Memory Migration ==================
+.SS Memory migration
+Normally, under the default setting (disabled) of
+.IR cpuset.memory_migrate ,
+once a page is allocated (given a physical page
+of main memory), then that page stays on whatever node it
+was allocated, so long as it remains allocated, even if the
+cpuset's memory-placement policy
+.I mems
+subsequently changes.
+.PP
+When memory migration is enabled in a cpuset, if the
+.I mems
+setting of the cpuset is changed, then any memory page in use by any
+process in the cpuset that is on a memory node that is no longer
+allowed will be migrated to a memory node that is allowed.
+.PP
+Furthermore, if a process is moved into a cpuset with
+.I memory_migrate
+enabled, any memory pages it uses that were on memory nodes allowed
+in its previous cpuset, but which are not allowed in its new cpuset,
+will be migrated to a memory node allowed in the new cpuset.
+.PP
+The relative placement of a migrated page within
+the cpuset is preserved during these migration operations if possible.
+For example,
+if the page was on the second valid node of the prior cpuset,
+then the page will be placed on the second valid node of the new cpuset,
+if possible.
+.\" ================== Scheduler Load Balancing ==================
+.SS Scheduler load balancing
+The kernel scheduler automatically load balances processes.
+If one CPU is underutilized,
+the kernel will look for processes on other more
+overloaded CPUs and move those processes to the underutilized CPU,
+within the constraints of such placement mechanisms as cpusets and
+.BR sched_setaffinity (2).
+.PP
+The algorithmic cost of load balancing and its impact on key shared
+kernel data structures such as the process list increases more than
+linearly with the number of CPUs being balanced.
+For example, it
+costs more to load balance across one large set of CPUs than it does
+to balance across two smaller sets of CPUs, each of half the size
+of the larger set.
+(The precise relationship between the number of CPUs being balanced
+and the cost of load balancing depends
+on implementation details of the kernel process scheduler, which is
+subject to change over time, as improved kernel scheduler algorithms
+are implemented.)
+.PP
+The per-cpuset flag
+.I sched_load_balance
+provides a mechanism to suppress this automatic scheduler load
+balancing in cases where it is not needed and suppressing it would have
+worthwhile performance benefits.
+.PP
+By default, load balancing is done across all CPUs, except those
+marked isolated using the kernel boot time "isolcpus=" argument.
+(See \fBScheduler Relax Domain Level\fR, below, to change this default.)
+.PP
+This default load balancing across all CPUs is not well suited to
+the following two situations:
+.IP \[bu] 3
+On large systems, load balancing across many CPUs is expensive.
+If the system is managed using cpusets to place independent jobs
+on separate sets of CPUs, full load balancing is unnecessary.
+.IP \[bu]
+Systems supporting real-time on some CPUs need to minimize
+system overhead on those CPUs, including avoiding process load
+balancing if that is not needed.
+.PP
+When the per-cpuset flag
+.I sched_load_balance
+is enabled (the default setting),
+it requests load balancing across
+all the CPUs in that cpuset's allowed CPUs,
+ensuring that load balancing can move a process (not otherwise pinned,
+as by
+.BR sched_setaffinity (2))
+from any CPU in that cpuset to any other.
+.PP
+When the per-cpuset flag
+.I sched_load_balance
+is disabled, then the
+scheduler will avoid load balancing across the CPUs in that cpuset,
+\fIexcept\fR in so far as is necessary because some overlapping cpuset
+has
+.I sched_load_balance
+enabled.
+.PP
+So, for example, if the top cpuset has the flag
+.I sched_load_balance
+enabled, then the scheduler will load balance across all
+CPUs, and the setting of the
+.I sched_load_balance
+flag in other cpusets has no effect,
+as we're already fully load balancing.
+.PP
+Therefore in the above two situations, the flag
+.I sched_load_balance
+should be disabled in the top cpuset, and only some of the smaller,
+child cpusets would have this flag enabled.
+.PP
+When doing this, you don't usually want to leave any unpinned processes in
+the top cpuset that might use nontrivial amounts of CPU, as such processes
+may be artificially constrained to some subset of CPUs, depending on
+the particulars of this flag setting in descendant cpusets.
+Even if such a process could use spare CPU cycles in some other CPUs,
+the kernel scheduler might not consider the possibility of
+load balancing that process to the underused CPU.
+.PP
+Of course, processes pinned to a particular CPU can be left in a cpuset
+that disables
+.I sched_load_balance
+as those processes aren't going anywhere else anyway.
+.\" ================== Scheduler Relax Domain Level ==================
+.SS Scheduler relax domain level
+The kernel scheduler performs immediate load balancing whenever
+a CPU becomes free or another task becomes runnable.
+This load
+balancing works to ensure that as many CPUs as possible are usefully
+employed running tasks.
+The kernel also performs periodic load
+balancing off the software clock described in
+.BR time (7).
+The setting of
+.I sched_relax_domain_level
+applies only to immediate load balancing.
+Regardless of the
+.I sched_relax_domain_level
+setting, periodic load balancing is attempted over all CPUs
+(unless disabled by turning off
+.IR sched_load_balance .)
+In any case, of course, tasks will be scheduled to run only on
+CPUs allowed by their cpuset, as modified by
+.BR sched_setaffinity (2)
+system calls.
+.PP
+On small systems, such as those with just a few CPUs, immediate load
+balancing is useful to improve system interactivity and to minimize
+wasteful idle CPU cycles.
+But on large systems, attempting immediate
+load balancing across a large number of CPUs can be more costly than
+it is worth, depending on the particular performance characteristics
+of the job mix and the hardware.
+.PP
+The exact meaning of the small integer values of
+.I sched_relax_domain_level
+will depend on internal
+implementation details of the kernel scheduler code and on the
+non-uniform architecture of the hardware.
+Both of these will evolve
+over time and vary by system architecture and kernel version.
+.PP
+As of this writing, when this capability was introduced in Linux
+2.6.26, on certain popular architectures, the positive values of
+.I sched_relax_domain_level
+have the following meanings.
+.PP
+.PD 0
+.TP
+.B 1
+Perform immediate load balancing across Hyper-Thread
+siblings on the same core.
+.TP
+.B 2
+Perform immediate load balancing across other cores in the same package.
+.TP
+.B 3
+Perform immediate load balancing across other CPUs
+on the same node or blade.
+.TP
+.B 4
+Perform immediate load balancing across over several
+(implementation detail) nodes [On NUMA systems].
+.TP
+.B 5
+Perform immediate load balancing across over all CPUs
+in system [On NUMA systems].
+.PD
+.PP
+The
+.I sched_relax_domain_level
+value of zero (0) always means
+don't perform immediate load balancing,
+hence that load balancing is done only periodically,
+not immediately when a CPU becomes available or another task becomes
+runnable.
+.PP
+The
+.I sched_relax_domain_level
+value of minus one (\-1)
+always means use the system default value.
+The system default value can vary by architecture and kernel version.
+This system default value can be changed by kernel
+boot-time "relax_domain_level=" argument.
+.PP
+In the case of multiple overlapping cpusets which have conflicting
+.I sched_relax_domain_level
+values, then the highest such value
+applies to all CPUs in any of the overlapping cpusets.
+In such cases,
+.B \-1
+is the lowest value,
+overridden by any other value,
+and
+.B 0
+is the next lowest value.
+.SH FORMATS
+The following formats are used to represent sets of
+CPUs and memory nodes.
+.\" ================== Mask Format ==================
+.SS Mask format
+The \fBMask Format\fR is used to represent CPU and memory-node bit masks
+in the
+.IR /proc/ pid /status
+file.
+.PP
+This format displays each 32-bit
+word in hexadecimal (using ASCII characters "0" - "9" and "a" - "f");
+words are filled with leading zeros, if required.
+For masks longer than one word, a comma separator is used between words.
+Words are displayed in big-endian
+order, which has the most significant bit first.
+The hex digits within a word are also in big-endian order.
+.PP
+The number of 32-bit words displayed is the minimum number needed to
+display all bits of the bit mask, based on the size of the bit mask.
+.PP
+Examples of the \fBMask Format\fR:
+.PP
+.in +4n
+.EX
+00000001 # just bit 0 set
+40000000,00000000,00000000 # just bit 94 set
+00000001,00000000,00000000 # just bit 64 set
+000000ff,00000000 # bits 32\-39 set
+00000000,000e3862 # 1,5,6,11\-13,17\-19 set
+.EE
+.in
+.PP
+A mask with bits 0, 1, 2, 4, 8, 16, 32, and 64 set displays as:
+.PP
+.in +4n
+.EX
+00000001,00000001,00010117
+.EE
+.in
+.PP
+The first "1" is for bit 64, the
+second for bit 32, the third for bit 16, the fourth for bit 8, the
+fifth for bit 4, and the "7" is for bits 2, 1, and 0.
+.\" ================== List Format ==================
+.SS List format
+The \fBList Format\fR for
+.I cpus
+and
+.I mems
+is a comma-separated list of CPU or memory-node
+numbers and ranges of numbers, in ASCII decimal.
+.PP
+Examples of the \fBList Format\fR:
+.PP
+.in +4n
+.EX
+0\-4,9 # bits 0, 1, 2, 3, 4, and 9 set
+0\-2,7,12\-14 # bits 0, 1, 2, 7, 12, 13, and 14 set
+.EE
+.in
+.\" ================== RULES ==================
+.SH RULES
+The following rules apply to each cpuset:
+.IP \[bu] 3
+Its CPUs and memory nodes must be a (possibly equal)
+subset of its parent's.
+.IP \[bu]
+It can be marked
+.I cpu_exclusive
+only if its parent is.
+.IP \[bu]
+It can be marked
+.I mem_exclusive
+only if its parent is.
+.IP \[bu]
+If it is
+.IR cpu_exclusive ,
+its CPUs may not overlap any sibling.
+.IP \[bu]
+If it is
+.IR mem_exclusive ,
+its memory nodes may not overlap any sibling.
+.\" ================== PERMISSIONS ==================
+.SH PERMISSIONS
+The permissions of a cpuset are determined by the permissions
+of the directories and pseudo-files in the cpuset filesystem,
+normally mounted at
+.IR /dev/cpuset .
+.PP
+For instance, a process can put itself in some other cpuset (than
+its current one) if it can write the
+.I tasks
+file for that cpuset.
+This requires execute permission on the encompassing directories
+and write permission on the
+.I tasks
+file.
+.PP
+An additional constraint is applied to requests to place some
+other process in a cpuset.
+One process may not attach another to
+a cpuset unless it would have permission to send that process
+a signal (see
+.BR kill (2)).
+.PP
+A process may create a child cpuset if it can access and write the
+parent cpuset directory.
+It can modify the CPUs or memory nodes
+in a cpuset if it can access that cpuset's directory (execute
+permissions on the each of the parent directories) and write the
+corresponding
+.I cpus
+or
+.I mems
+file.
+.PP
+There is one minor difference between the manner in which these
+permissions are evaluated and the manner in which normal filesystem
+operation permissions are evaluated.
+The kernel interprets
+relative pathnames starting at a process's current working directory.
+Even if one is operating on a cpuset file, relative pathnames
+are interpreted relative to the process's current working directory,
+not relative to the process's current cpuset.
+The only ways that
+cpuset paths relative to a process's current cpuset can be used are
+if either the process's current working directory is its cpuset
+(it first did a
+.B cd
+or
+.BR chdir (2)
+to its cpuset directory beneath
+.IR /dev/cpuset ,
+which is a bit unusual)
+or if some user code converts the relative cpuset path to a
+full filesystem path.
+.PP
+In theory, this means that user code should specify cpusets
+using absolute pathnames, which requires knowing the mount point of
+the cpuset filesystem (usually, but not necessarily,
+.IR /dev/cpuset ).
+In practice, all user level code that this author is aware of
+simply assumes that if the cpuset filesystem is mounted, then
+it is mounted at
+.IR /dev/cpuset .
+Furthermore, it is common practice for carefully written
+user code to verify the presence of the pseudo-file
+.I /dev/cpuset/tasks
+in order to verify that the cpuset pseudo-filesystem
+is currently mounted.
+.\" ================== WARNINGS ==================
+.SH WARNINGS
+.SS Enabling memory_pressure
+By default, the per-cpuset file
+.I cpuset.memory_pressure
+always contains zero (0).
+Unless this feature is enabled by writing "1" to the pseudo-file
+.IR /dev/cpuset/cpuset.memory_pressure_enabled ,
+the kernel does
+not compute per-cpuset
+.IR memory_pressure .
+.SS Using the echo command
+When using the
+.B echo
+command at the shell prompt to change the values of cpuset files,
+beware that the built-in
+.B echo
+command in some shells does not display an error message if the
+.BR write (2)
+system call fails.
+.\" Gack! csh(1)'s echo does this
+For example, if the command:
+.PP
+.in +4n
+.EX
+echo 19 > cpuset.mems
+.EE
+.in
+.PP
+failed because memory node 19 was not allowed (perhaps
+the current system does not have a memory node 19), then the
+.B echo
+command might not display any error.
+It is better to use the
+.B /bin/echo
+external command to change cpuset file settings, as this
+command will display
+.BR write (2)
+errors, as in the example:
+.PP
+.in +4n
+.EX
+/bin/echo 19 > cpuset.mems
+/bin/echo: write error: Invalid argument
+.EE
+.in
+.\" ================== EXCEPTIONS ==================
+.SH EXCEPTIONS
+.SS Memory placement
+Not all allocations of system memory are constrained by cpusets,
+for the following reasons.
+.PP
+If hot-plug functionality is used to remove all the CPUs that are
+currently assigned to a cpuset, then the kernel will automatically
+update the
+.I cpus_allowed
+of all processes attached to CPUs in that cpuset
+to allow all CPUs.
+When memory hot-plug functionality for removing
+memory nodes is available, a similar exception is expected to apply
+there as well.
+In general, the kernel prefers to violate cpuset placement,
+rather than starving a process that has had all its allowed CPUs or
+memory nodes taken offline.
+User code should reconfigure cpusets to refer only to online CPUs
+and memory nodes when using hot-plug to add or remove such resources.
+.PP
+A few kernel-critical, internal memory-allocation requests, marked
+GFP_ATOMIC, must be satisfied immediately.
+The kernel may drop some
+request or malfunction if one of these allocations fail.
+If such a request cannot be satisfied within the current process's cpuset,
+then we relax the cpuset, and look for memory anywhere we can find it.
+It's better to violate the cpuset than stress the kernel.
+.PP
+Allocations of memory requested by kernel drivers while processing
+an interrupt lack any relevant process context, and are not confined
+by cpusets.
+.SS Renaming cpusets
+You can use the
+.BR rename (2)
+system call to rename cpusets.
+Only simple renaming is supported; that is, changing the name of a cpuset
+directory is permitted, but moving a directory into
+a different directory is not permitted.
+.\" ================== ERRORS ==================
+.SH ERRORS
+The Linux kernel implementation of cpusets sets
+.I errno
+to specify the reason for a failed system call affecting cpusets.
+.PP
+The possible
+.I errno
+settings and their meaning when set on
+a failed cpuset call are as listed below.
+.TP
+.B E2BIG
+Attempted a
+.BR write (2)
+on a special cpuset file
+with a length larger than some kernel-determined upper
+limit on the length of such writes.
+.TP
+.B EACCES
+Attempted to
+.BR write (2)
+the process ID (PID) of a process to a cpuset
+.I tasks
+file when one lacks permission to move that process.
+.TP
+.B EACCES
+Attempted to add, using
+.BR write (2),
+a CPU or memory node to a cpuset, when that CPU or memory node was
+not already in its parent.
+.TP
+.B EACCES
+Attempted to set, using
+.BR write (2),
+.I cpuset.cpu_exclusive
+or
+.I cpuset.mem_exclusive
+on a cpuset whose parent lacks the same setting.
+.TP
+.B EACCES
+Attempted to
+.BR write (2)
+a
+.I cpuset.memory_pressure
+file.
+.TP
+.B EACCES
+Attempted to create a file in a cpuset directory.
+.TP
+.B EBUSY
+Attempted to remove, using
+.BR rmdir (2),
+a cpuset with attached processes.
+.TP
+.B EBUSY
+Attempted to remove, using
+.BR rmdir (2),
+a cpuset with child cpusets.
+.TP
+.B EBUSY
+Attempted to remove
+a CPU or memory node from a cpuset
+that is also in a child of that cpuset.
+.TP
+.B EEXIST
+Attempted to create, using
+.BR mkdir (2),
+a cpuset that already exists.
+.TP
+.B EEXIST
+Attempted to
+.BR rename (2)
+a cpuset to a name that already exists.
+.TP
+.B EFAULT
+Attempted to
+.BR read (2)
+or
+.BR write (2)
+a cpuset file using
+a buffer that is outside the writing processes accessible address space.
+.TP
+.B EINVAL
+Attempted to change a cpuset, using
+.BR write (2),
+in a way that would violate a
+.I cpu_exclusive
+or
+.I mem_exclusive
+attribute of that cpuset or any of its siblings.
+.TP
+.B EINVAL
+Attempted to
+.BR write (2)
+an empty
+.I cpuset.cpus
+or
+.I cpuset.mems
+list to a cpuset which has attached processes or child cpusets.
+.TP
+.B EINVAL
+Attempted to
+.BR write (2)
+a
+.I cpuset.cpus
+or
+.I cpuset.mems
+list which included a range with the second number smaller than
+the first number.
+.TP
+.B EINVAL
+Attempted to
+.BR write (2)
+a
+.I cpuset.cpus
+or
+.I cpuset.mems
+list which included an invalid character in the string.
+.TP
+.B EINVAL
+Attempted to
+.BR write (2)
+a list to a
+.I cpuset.cpus
+file that did not include any online CPUs.
+.TP
+.B EINVAL
+Attempted to
+.BR write (2)
+a list to a
+.I cpuset.mems
+file that did not include any online memory nodes.
+.TP
+.B EINVAL
+Attempted to
+.BR write (2)
+a list to a
+.I cpuset.mems
+file that included a node that held no memory.
+.TP
+.B EIO
+Attempted to
+.BR write (2)
+a string to a cpuset
+.I tasks
+file that
+does not begin with an ASCII decimal integer.
+.TP
+.B EIO
+Attempted to
+.BR rename (2)
+a cpuset into a different directory.
+.TP
+.B ENAMETOOLONG
+Attempted to
+.BR read (2)
+a
+.IR /proc/ pid /cpuset
+file for a cpuset path that is longer than the kernel page size.
+.TP
+.B ENAMETOOLONG
+Attempted to create, using
+.BR mkdir (2),
+a cpuset whose base directory name is longer than 255 characters.
+.TP
+.B ENAMETOOLONG
+Attempted to create, using
+.BR mkdir (2),
+a cpuset whose full pathname,
+including the mount point (typically "/dev/cpuset/") prefix,
+is longer than 4095 characters.
+.TP
+.B ENODEV
+The cpuset was removed by another process at the same time as a
+.BR write (2)
+was attempted on one of the pseudo-files in the cpuset directory.
+.TP
+.B ENOENT
+Attempted to create, using
+.BR mkdir (2),
+a cpuset in a parent cpuset that doesn't exist.
+.TP
+.B ENOENT
+Attempted to
+.BR access (2)
+or
+.BR open (2)
+a nonexistent file in a cpuset directory.
+.TP
+.B ENOMEM
+Insufficient memory is available within the kernel; can occur
+on a variety of system calls affecting cpusets, but only if the
+system is extremely short of memory.
+.TP
+.B ENOSPC
+Attempted to
+.BR write (2)
+the process ID (PID)
+of a process to a cpuset
+.I tasks
+file when the cpuset had an empty
+.I cpuset.cpus
+or empty
+.I cpuset.mems
+setting.
+.TP
+.B ENOSPC
+Attempted to
+.BR write (2)
+an empty
+.I cpuset.cpus
+or
+.I cpuset.mems
+setting to a cpuset that
+has tasks attached.
+.TP
+.B ENOTDIR
+Attempted to
+.BR rename (2)
+a nonexistent cpuset.
+.TP
+.B EPERM
+Attempted to remove a file from a cpuset directory.
+.TP
+.B ERANGE
+Specified a
+.I cpuset.cpus
+or
+.I cpuset.mems
+list to the kernel which included a number too large for the kernel
+to set in its bit masks.
+.TP
+.B ESRCH
+Attempted to
+.BR write (2)
+the process ID (PID) of a nonexistent process to a cpuset
+.I tasks
+file.
+.\" ================== VERSIONS ==================
+.SH VERSIONS
+Cpusets appeared in Linux 2.6.12.
+.\" ================== NOTES ==================
+.SH NOTES
+Despite its name, the
+.I pid
+parameter is actually a thread ID,
+and each thread in a threaded group can be attached to a different
+cpuset.
+The value returned from a call to
+.BR gettid (2)
+can be passed in the argument
+.IR pid .
+.\" ================== BUGS ==================
+.SH BUGS
+.I cpuset.memory_pressure
+cpuset files can be opened
+for writing, creation, or truncation, but then the
+.BR write (2)
+fails with
+.I errno
+set to
+.BR EACCES ,
+and the creation and truncation options on
+.BR open (2)
+have no effect.
+.\" ================== EXAMPLES ==================
+.SH EXAMPLES
+The following examples demonstrate querying and setting cpuset
+options using shell commands.
+.SS Creating and attaching to a cpuset.
+To create a new cpuset and attach the current command shell to it,
+the steps are:
+.PP
+.PD 0
+.IP (1) 5
+mkdir /dev/cpuset (if not already done)
+.IP (2)
+mount \-t cpuset none /dev/cpuset (if not already done)
+.IP (3)
+Create the new cpuset using
+.BR mkdir (1).
+.IP (4)
+Assign CPUs and memory nodes to the new cpuset.
+.IP (5)
+Attach the shell to the new cpuset.
+.PD
+.PP
+For example, the following sequence of commands will set up a cpuset
+named "Charlie", containing just CPUs 2 and 3, and memory node 1,
+and then attach the current shell to that cpuset.
+.PP
+.in +4n
+.EX
+.RB "$" " mkdir /dev/cpuset"
+.RB "$" " mount \-t cpuset cpuset /dev/cpuset"
+.RB "$" " cd /dev/cpuset"
+.RB "$" " mkdir Charlie"
+.RB "$" " cd Charlie"
+.RB "$" " /bin/echo 2\-3 > cpuset.cpus"
+.RB "$" " /bin/echo 1 > cpuset.mems"
+.RB "$" " /bin/echo $$ > tasks"
+# The current shell is now running in cpuset Charlie
+# The next line should display \[aq]/Charlie\[aq]
+.RB "$" " cat /proc/self/cpuset"
+.EE
+.in
+.\"
+.SS Migrating a job to different memory nodes.
+To migrate a job (the set of processes attached to a cpuset)
+to different CPUs and memory nodes in the system, including moving
+the memory pages currently allocated to that job,
+perform the following steps.
+.PP
+.PD 0
+.IP (1) 5
+Let's say we want to move the job in cpuset
+.I alpha
+(CPUs 4\[en]7 and memory nodes 2\[en]3) to a new cpuset
+.I beta
+(CPUs 16\[en]19 and memory nodes 8\[en]9).
+.IP (2)
+First create the new cpuset
+.IR beta .
+.IP (3)
+Then allow CPUs 16\[en]19 and memory nodes 8\[en]9 in
+.IR beta .
+.IP (4)
+Then enable
+.I memory_migration
+in
+.IR beta .
+.IP (5)
+Then move each process from
+.I alpha
+to
+.IR beta .
+.PD
+.PP
+The following sequence of commands accomplishes this.
+.PP
+.in +4n
+.EX
+.RB "$" " cd /dev/cpuset"
+.RB "$" " mkdir beta"
+.RB "$" " cd beta"
+.RB "$" " /bin/echo 16\-19 > cpuset.cpus"
+.RB "$" " /bin/echo 8\-9 > cpuset.mems"
+.RB "$" " /bin/echo 1 > cpuset.memory_migrate"
+.RB "$" " while read i; do /bin/echo $i; done < ../alpha/tasks > tasks"
+.EE
+.in
+.PP
+The above should move any processes in
+.I alpha
+to
+.IR beta ,
+and any memory held by these processes on memory nodes 2\[en]3 to memory
+nodes 8\[en]9, respectively.
+.PP
+Notice that the last step of the above sequence did not do:
+.PP
+.in +4n
+.EX
+.RB "$" " cp ../alpha/tasks tasks"
+.EE
+.in
+.PP
+The
+.I while
+loop, rather than the seemingly easier use of the
+.BR cp (1)
+command, was necessary because
+only one process PID at a time may be written to the
+.I tasks
+file.
+.PP
+The same effect (writing one PID at a time) as the
+.I while
+loop can be accomplished more efficiently, in fewer keystrokes and in
+syntax that works on any shell, but alas more obscurely, by using the
+.B \-u
+(unbuffered) option of
+.BR sed (1):
+.PP
+.in +4n
+.EX
+.RB "$" " sed \-un p < ../alpha/tasks > tasks"
+.EE
+.in
+.\" ================== SEE ALSO ==================
+.SH SEE ALSO
+.BR taskset (1),
+.BR get_mempolicy (2),
+.BR getcpu (2),
+.BR mbind (2),
+.BR sched_getaffinity (2),
+.BR sched_setaffinity (2),
+.BR sched_setscheduler (2),
+.BR set_mempolicy (2),
+.BR CPU_SET (3),
+.BR proc (5),
+.BR cgroups (7),
+.BR numa (7),
+.BR sched (7),
+.BR migratepages (8),
+.BR numactl (8)
+.PP
+.I Documentation/admin\-guide/cgroup\-v1/cpusets.rst
+in the Linux kernel source tree
+.\" commit 45ce80fb6b6f9594d1396d44dd7e7c02d596fef8
+(or
+.I Documentation/cgroup\-v1/cpusets.txt
+before Linux 4.18, and
+.I Documentation/cpusets.txt
+before Linux 2.6.29)
diff --git a/man7/credentials.7 b/man7/credentials.7
new file mode 100644
index 0000000..653e7a3
--- /dev/null
+++ b/man7/credentials.7
@@ -0,0 +1,379 @@
+.\" Copyright (c) 2007 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" 2007-06-13 Creation
+.\"
+.TH credentials 7 2023-03-30 "Linux man-pages 6.05.01"
+.SH NAME
+credentials \- process identifiers
+.SH DESCRIPTION
+.SS Process ID (PID)
+Each process has a unique nonnegative integer identifier
+that is assigned when the process is created using
+.BR fork (2).
+A process can obtain its PID using
+.BR getpid (2).
+A PID is represented using the type
+.I pid_t
+(defined in
+.IR <sys/types.h> ).
+.PP
+PIDs are used in a range of system calls to identify the process
+affected by the call, for example:
+.BR kill (2),
+.BR ptrace (2),
+.BR setpriority (2),
+.\" .BR sched_rr_get_interval (2),
+.\" .BR sched_getaffinity (2),
+.\" .BR sched_setaffinity (2),
+.\" .BR sched_getparam (2),
+.\" .BR sched_setparam (2),
+.\" .BR sched_setscheduler (2),
+.\" .BR sched_getscheduler (2),
+.BR setpgid (2),
+.\" .BR getsid (2),
+.BR setsid (2),
+.BR sigqueue (3),
+and
+.BR waitpid (2).
+.\" .BR waitid (2),
+.\" .BR wait4 (2),
+.PP
+A process's PID is preserved across an
+.BR execve (2).
+.SS Parent process ID (PPID)
+A process's parent process ID identifies the process that created
+this process using
+.BR fork (2).
+A process can obtain its PPID using
+.BR getppid (2).
+A PPID is represented using the type
+.IR pid_t .
+.PP
+A process's PPID is preserved across an
+.BR execve (2).
+.SS Process group ID and session ID
+Each process has a session ID and a process group ID,
+both represented using the type
+.IR pid_t .
+A process can obtain its session ID using
+.BR getsid (2),
+and its process group ID using
+.BR getpgrp (2).
+.PP
+A child created by
+.BR fork (2)
+inherits its parent's session ID and process group ID.
+A process's session ID and process group ID are preserved across an
+.BR execve (2).
+.PP
+Sessions and process groups are abstractions devised to support shell
+job control.
+A process group (sometimes called a "job") is a collection of
+processes that share the same process group ID;
+the shell creates a new process group for the process(es) used
+to execute single command or pipeline (e.g., the two processes
+created to execute the command "ls\ |\ wc" are placed in the
+same process group).
+A process's group membership can be set using
+.BR setpgid (2).
+The process whose process ID is the same as its process group ID is the
+\fIprocess group leader\fP for that group.
+.PP
+A session is a collection of processes that share the same session ID.
+All of the members of a process group also have the same session ID
+(i.e., all of the members of a process group always belong to the
+same session, so that sessions and process groups form a strict
+two-level hierarchy of processes.)
+A new session is created when a process calls
+.BR setsid (2),
+which creates a new session whose session ID is the same
+as the PID of the process that called
+.BR setsid (2).
+The creator of the session is called the \fIsession leader\fP.
+.PP
+All of the processes in a session share a
+.IR "controlling terminal" .
+The controlling terminal is established when the session leader
+first opens a terminal (unless the
+.B O_NOCTTY
+flag is specified when calling
+.BR open (2)).
+A terminal may be the controlling terminal of at most one session.
+.PP
+At most one of the jobs in a session may be the
+.IR "foreground job" ;
+other jobs in the session are
+.IR "background jobs" .
+Only the foreground job may read from the terminal;
+when a process in the background attempts to read from the terminal,
+its process group is sent a
+.B SIGTTIN
+signal, which suspends the job.
+If the
+.B TOSTOP
+flag has been set for the terminal (see
+.BR termios (3)),
+then only the foreground job may write to the terminal;
+writes from background jobs cause a
+.B SIGTTOU
+signal to be generated, which suspends the job.
+When terminal keys that generate a signal (such as the
+.I interrupt
+key, normally control-C)
+are pressed, the signal is sent to the processes in the foreground job.
+.PP
+Various system calls and library functions
+may operate on all members of a process group,
+including
+.BR kill (2),
+.BR killpg (3),
+.BR getpriority (2),
+.BR setpriority (2),
+.BR ioprio_get (2),
+.BR ioprio_set (2),
+.BR waitid (2),
+and
+.BR waitpid (2).
+See also the discussion of the
+.BR F_GETOWN ,
+.BR F_GETOWN_EX ,
+.BR F_SETOWN ,
+and
+.B F_SETOWN_EX
+operations in
+.BR fcntl (2).
+.SS User and group identifiers
+Each process has various associated user and group IDs.
+These IDs are integers, respectively represented using the types
+.I uid_t
+and
+.I gid_t
+(defined in
+.IR <sys/types.h> ).
+.PP
+On Linux, each process has the following user and group identifiers:
+.IP \[bu] 3
+Real user ID and real group ID.
+These IDs determine who owns the process.
+A process can obtain its real user (group) ID using
+.BR getuid (2)
+.RB ( getgid (2)).
+.IP \[bu]
+Effective user ID and effective group ID.
+These IDs are used by the kernel to determine the permissions
+that the process will have when accessing shared resources such
+as message queues, shared memory, and semaphores.
+On most UNIX systems, these IDs also determine the
+permissions when accessing files.
+However, Linux uses the filesystem IDs described below
+for this task.
+A process can obtain its effective user (group) ID using
+.BR geteuid (2)
+.RB ( getegid (2)).
+.IP \[bu]
+Saved set-user-ID and saved set-group-ID.
+These IDs are used in set-user-ID and set-group-ID programs to save
+a copy of the corresponding effective IDs that were set when
+the program was executed (see
+.BR execve (2)).
+A set-user-ID program can assume and drop privileges by
+switching its effective user ID back and forth between the values
+in its real user ID and saved set-user-ID.
+This switching is done via calls to
+.BR seteuid (2),
+.BR setreuid (2),
+or
+.BR setresuid (2).
+A set-group-ID program performs the analogous tasks using
+.BR setegid (2),
+.BR setregid (2),
+or
+.BR setresgid (2).
+A process can obtain its saved set-user-ID (set-group-ID) using
+.BR getresuid (2)
+.RB ( getresgid (2)).
+.IP \[bu]
+Filesystem user ID and filesystem group ID (Linux-specific).
+These IDs, in conjunction with the supplementary group IDs described
+below, are used to determine permissions for accessing files; see
+.BR path_resolution (7)
+for details.
+Whenever a process's effective user (group) ID is changed,
+the kernel also automatically changes the filesystem user (group) ID
+to the same value.
+Consequently, the filesystem IDs normally have the same values
+as the corresponding effective ID, and the semantics for file-permission
+checks are thus the same on Linux as on other UNIX systems.
+The filesystem IDs can be made to differ from the effective IDs
+by calling
+.BR setfsuid (2)
+and
+.BR setfsgid (2).
+.IP \[bu]
+Supplementary group IDs.
+This is a set of additional group IDs that are used for permission
+checks when accessing files and other shared resources.
+Before Linux 2.6.4,
+a process can be a member of up to 32 supplementary groups;
+since Linux 2.6.4,
+a process can be a member of up to 65536 supplementary groups.
+The call
+.I sysconf(_SC_NGROUPS_MAX)
+can be used to determine the number of supplementary groups
+of which a process may be a member.
+.\" Since Linux 2.6.4, the limit is visible via the read-only file
+.\" /proc/sys/kernel/ngroups_max.
+.\" As at 2.6.22-rc2, this file is still read-only.
+A process can obtain its set of supplementary group IDs using
+.BR getgroups (2).
+.PP
+A child process created by
+.BR fork (2)
+inherits copies of its parent's user and groups IDs.
+During an
+.BR execve (2),
+a process's real user and group ID and supplementary
+group IDs are preserved;
+the effective and saved set IDs may be changed, as described in
+.BR execve (2).
+.PP
+Aside from the purposes noted above,
+a process's user IDs are also employed in a number of other contexts:
+.IP \[bu] 3
+when determining the permissions for sending signals (see
+.BR kill (2));
+.IP \[bu]
+when determining the permissions for setting
+process-scheduling parameters (nice value, real time
+scheduling policy and priority, CPU affinity, I/O priority) using
+.BR setpriority (2),
+.BR sched_setaffinity (2),
+.BR sched_setscheduler (2),
+.BR sched_setparam (2),
+.BR sched_setattr (2),
+and
+.BR ioprio_set (2);
+.IP \[bu]
+when checking resource limits (see
+.BR getrlimit (2));
+.IP \[bu]
+when checking the limit on the number of inotify instances
+that the process may create (see
+.BR inotify (7)).
+.\"
+.SS Modifying process user and group IDs
+Subject to rules described in the relevant manual pages,
+a process can use the following APIs to modify its user and group IDs:
+.TP
+.BR setuid "(2) (" setgid (2))
+Modify the process's real (and possibly effective and saved-set)
+user (group) IDs.
+.TP
+.BR seteuid "(2) (" setegid (2))
+Modify the process's effective user (group) ID.
+.TP
+.BR setfsuid "(2) (" setfsgid (2))
+Modify the process's filesystem user (group) ID.
+.TP
+.BR setreuid "(2) (" setregid (2))
+Modify the process's real and effective (and possibly saved-set)
+user (group) IDs.
+.TP
+.BR setresuid "(2) (" setresgid (2))
+Modify the process's real, effective, and saved-set user (group) IDs.
+.TP
+.BR setgroups (2)
+Modify the process's supplementary group list.
+.PP
+Any changes to a process's effective user (group) ID
+are automatically carried over to the process's
+filesystem user (group) ID.
+Changes to a process's effective user or group ID can also affect the
+process "dumpable" attribute, as described in
+.BR prctl (2).
+.PP
+Changes to process user and group IDs can affect the capabilities
+of the process, as described in
+.BR capabilities (7).
+.SH STANDARDS
+Process IDs, parent process IDs, process group IDs, and session IDs
+are specified in POSIX.1.
+The real, effective, and saved set user and groups IDs,
+and the supplementary group IDs, are specified in POSIX.1.
+.PP
+The filesystem user and group IDs are a Linux extension.
+.SH NOTES
+Various fields in the
+.IR /proc/ pid /status
+file show the process credentials described above.
+See
+.BR proc (5)
+for further information.
+.PP
+The POSIX threads specification requires that
+credentials are shared by all of the threads in a process.
+However, at the kernel level, Linux maintains separate user and group
+credentials for each thread.
+The NPTL threading implementation does some work to ensure
+that any change to user or group credentials
+(e.g., calls to
+.BR setuid (2),
+.BR setresuid (2))
+is carried through to all of the POSIX threads in a process.
+See
+.BR nptl (7)
+for further details.
+.SH SEE ALSO
+.BR bash (1),
+.BR csh (1),
+.BR groups (1),
+.BR id (1),
+.BR newgrp (1),
+.BR ps (1),
+.BR runuser (1),
+.BR setpriv (1),
+.BR sg (1),
+.BR su (1),
+.BR access (2),
+.BR execve (2),
+.BR faccessat (2),
+.BR fork (2),
+.BR getgroups (2),
+.BR getpgrp (2),
+.BR getpid (2),
+.BR getppid (2),
+.BR getsid (2),
+.BR kill (2),
+.BR setegid (2),
+.BR seteuid (2),
+.BR setfsgid (2),
+.BR setfsuid (2),
+.BR setgid (2),
+.BR setgroups (2),
+.BR setpgid (2),
+.BR setresgid (2),
+.BR setresuid (2),
+.BR setsid (2),
+.BR setuid (2),
+.BR waitpid (2),
+.BR euidaccess (3),
+.BR initgroups (3),
+.BR killpg (3),
+.BR tcgetpgrp (3),
+.BR tcgetsid (3),
+.BR tcsetpgrp (3),
+.BR group (5),
+.BR passwd (5),
+.BR shadow (5),
+.BR capabilities (7),
+.BR namespaces (7),
+.BR path_resolution (7),
+.BR pid_namespaces (7),
+.BR pthreads (7),
+.BR signal (7),
+.BR system_data_types (7),
+.BR unix (7),
+.BR user_namespaces (7),
+.BR sudo (8)
diff --git a/man7/ddp.7 b/man7/ddp.7
new file mode 100644
index 0000000..0b7eb15
--- /dev/null
+++ b/man7/ddp.7
@@ -0,0 +1,245 @@
+.\" This man page is Copyright (C) 1998 Alan Cox.
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" $Id: ddp.7,v 1.3 1999/05/13 11:33:22 freitag Exp $
+.\"
+.TH ddp 7 2023-05-26 "Linux man-pages 6.05.01"
+.SH NAME
+ddp \- Linux AppleTalk protocol implementation
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <netatalk/at.h>
+.PP
+.IB ddp_socket " = socket(AF_APPLETALK, SOCK_DGRAM, 0);"
+.IB raw_socket " = socket(AF_APPLETALK, SOCK_RAW, " protocol ");"
+.fi
+.SH DESCRIPTION
+Linux implements the AppleTalk protocols described in
+.IR "Inside AppleTalk" .
+Only the DDP layer and AARP are present in
+the kernel.
+They are designed to be used via the
+.B netatalk
+protocol
+libraries.
+This page documents the interface for those who wish or need to
+use the DDP layer directly.
+.PP
+The communication between AppleTalk and the user program works using a
+BSD-compatible socket interface.
+For more information on sockets, see
+.BR socket (7).
+.PP
+An AppleTalk socket is created by calling the
+.BR socket (2)
+function with a
+.B AF_APPLETALK
+socket family argument.
+Valid socket types are
+.B SOCK_DGRAM
+to open a
+.B ddp
+socket or
+.B SOCK_RAW
+to open a
+.B raw
+socket.
+.I protocol
+is the AppleTalk protocol to be received or sent.
+For
+.B SOCK_RAW
+you must specify
+.BR ATPROTO_DDP .
+.PP
+Raw sockets may be opened only by a process with effective user ID 0
+or when the process has the
+.B CAP_NET_RAW
+capability.
+.SS Address format
+An AppleTalk socket address is defined as a combination of a network number,
+a node number, and a port number.
+.PP
+.in +4n
+.EX
+struct at_addr {
+ unsigned short s_net;
+ unsigned char s_node;
+};
+\&
+struct sockaddr_atalk {
+ sa_family_t sat_family; /* address family */
+ unsigned char sat_port; /* port */
+ struct at_addr sat_addr; /* net/node */
+};
+.EE
+.in
+.PP
+.I sat_family
+is always set to
+.BR AF_APPLETALK .
+.I sat_port
+contains the port.
+The port numbers below 129 are known as
+.IR "reserved ports" .
+Only processes with the effective user ID 0 or the
+.B CAP_NET_BIND_SERVICE
+capability may
+.BR bind (2)
+to these sockets.
+.I sat_addr
+is the host address.
+The
+.I net
+member of
+.I struct at_addr
+contains the host network in network byte order.
+The value of
+.B AT_ANYNET
+is a
+wildcard and also implies \[lq]this network.\[rq]
+The
+.I node
+member of
+.I struct at_addr
+contains the host node number.
+The value of
+.B AT_ANYNODE
+is a
+wildcard and also implies \[lq]this node.\[rq] The value of
+.B ATADDR_BCAST
+is a link
+local broadcast address.
+.\" FIXME . this doesn't make sense [johnl]
+.SS Socket options
+No protocol-specific socket options are supported.
+.SS /proc interfaces
+IP supports a set of
+.I /proc
+interfaces to configure some global AppleTalk parameters.
+The parameters can be accessed by reading or writing files in the directory
+.IR /proc/sys/net/atalk/ .
+.TP
+.I aarp\-expiry\-time
+The time interval (in seconds) before an AARP cache entry expires.
+.TP
+.I aarp\-resolve\-time
+The time interval (in seconds) before an AARP cache entry is resolved.
+.TP
+.I aarp\-retransmit\-limit
+The number of retransmissions of an AARP query before the node is declared
+dead.
+.TP
+.I aarp\-tick\-time
+The timer rate (in seconds) for the timer driving AARP.
+.PP
+The default values match the specification and should never need to be
+changed.
+.SS Ioctls
+All ioctls described in
+.BR socket (7)
+apply to DDP.
+.\" FIXME . Add a section about multicasting
+.SH ERRORS
+.TP
+.B EACCES
+The user tried to execute an operation without the necessary permissions.
+These include sending to a broadcast address without
+having the broadcast flag set,
+and trying to bind to a reserved port without effective user ID 0 or
+.BR CAP_NET_BIND_SERVICE .
+.TP
+.B EADDRINUSE
+Tried to bind to an address already in use.
+.TP
+.B EADDRNOTAVAIL
+A nonexistent interface was requested or the requested source address was
+not local.
+.TP
+.B EAGAIN
+Operation on a nonblocking socket would block.
+.TP
+.B EALREADY
+A connection operation on a nonblocking socket is already in progress.
+.TP
+.B ECONNABORTED
+A connection was closed during an
+.BR accept (2).
+.TP
+.B EHOSTUNREACH
+No routing table entry matches the destination address.
+.TP
+.B EINVAL
+Invalid argument passed.
+.TP
+.B EISCONN
+.BR connect (2)
+was called on an already connected socket.
+.TP
+.B EMSGSIZE
+Datagram is bigger than the DDP MTU.
+.TP
+.B ENODEV
+Network device not available or not capable of sending IP.
+.TP
+.B ENOENT
+.B SIOCGSTAMP
+was called on a socket where no packet arrived.
+.TP
+.BR ENOMEM " and " ENOBUFS
+Not enough memory available.
+.TP
+.B ENOPKG
+A kernel subsystem was not configured.
+.TP
+.BR ENOPROTOOPT " and " EOPNOTSUPP
+Invalid socket option passed.
+.TP
+.B ENOTCONN
+The operation is defined only on a connected socket, but the socket wasn't
+connected.
+.TP
+.B EPERM
+User doesn't have permission to set high priority,
+make a configuration change,
+or send signals to the requested process or group.
+.TP
+.B EPIPE
+The connection was unexpectedly closed or shut down by the other end.
+.TP
+.B ESOCKTNOSUPPORT
+The socket was unconfigured, or an unknown socket type was requested.
+.SH VERSIONS
+AppleTalk is supported by Linux 2.0 or higher.
+The
+.I /proc
+interfaces exist since Linux 2.2.
+.SH NOTES
+Be very careful with the
+.B SO_BROADCAST
+option; it is not privileged in Linux.
+It is easy to overload the network
+with careless sending to broadcast addresses.
+.SS Compatibility
+The basic AppleTalk socket interface is compatible with
+.B netatalk
+on BSD-derived systems.
+Many BSD systems fail to check
+.B SO_BROADCAST
+when sending broadcast frames; this can lead to compatibility problems.
+.PP
+The
+raw
+socket mode is unique to Linux and exists to support the alternative CAP
+package and AppleTalk monitoring tools more easily.
+.SH BUGS
+There are too many inconsistent error values.
+.PP
+The ioctls used to configure routing tables, devices,
+AARP tables, and other devices are not yet described.
+.SH SEE ALSO
+.BR recvmsg (2),
+.BR sendmsg (2),
+.BR capabilities (7),
+.BR socket (7)
diff --git a/man7/environ.7 b/man7/environ.7
new file mode 100644
index 0000000..345b350
--- /dev/null
+++ b/man7/environ.7
@@ -0,0 +1,354 @@
+.\" Copyright (c) 1993 Michael Haardt (michael@moria.de),
+.\" Fri Apr 2 11:32:09 MET DST 1993
+.\" and Andries Brouwer (aeb@cwi.nl), Fri Feb 14 21:47:50 1997.
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" Modified Sun Jul 25 10:45:30 1993 by Rik Faith (faith@cs.unc.edu)
+.\" Modified Sun Jul 21 21:25:26 1996 by Andries Brouwer (aeb@cwi.nl)
+.\" Modified Mon Oct 21 17:47:19 1996 by Eric S. Raymond (esr@thyrsus.com)
+.\" Modified Wed Aug 27 20:28:58 1997 by Nicolás Lichtmaier (nick@debian.org)
+.\" Modified Mon Sep 21 00:00:26 1998 by Andries Brouwer (aeb@cwi.nl)
+.\" Modified Wed Jan 24 06:37:24 2001 by Eric S. Raymond (esr@thyrsus.com)
+.\" Modified Thu Dec 13 23:53:27 2001 by Martin Schulze <joey@infodrom.org>
+.\"
+.TH environ 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+environ \- user environment
+.SH SYNOPSIS
+.nf
+.BI "extern char **" environ ;
+.fi
+.SH DESCRIPTION
+The variable
+.I environ
+points to an array of pointers to strings called the "environment".
+The last pointer in this array has the value NULL.
+This array of strings is made available to the process by the
+.BR execve (2)
+call when a new program is started.
+When a child process is created via
+.BR fork (2),
+it inherits a
+.I copy
+of its parent's environment.
+.PP
+By convention, the strings in
+.I environ
+have the form "\fIname\fP\fB=\fP\fIvalue\fP".
+The name is case-sensitive and may not contain
+the character "\fB=\fP".
+The value can be anything that can be represented as a string.
+The name and the value may not contain an embedded null byte (\[aq]\e0\[aq]),
+since this is assumed to terminate the string.
+.PP
+Environment variables may be placed in the shell's environment by the
+.I export
+command in
+.BR sh (1),
+or by the
+.I setenv
+command if you use
+.BR csh (1).
+.PP
+The initial environment of the shell is populated in various ways,
+such as definitions from
+.I /etc/environment
+that are processed by
+.BR pam_env (8)
+for all users at login time (on systems that employ
+.BR pam (8)).
+In addition, various shell initialization scripts, such as the system-wide
+.I /etc/profile
+script and per-user initializations script may include commands
+that add variables to the shell's environment;
+see the manual page of your preferred shell for details.
+.PP
+Bourne-style shells support the syntax
+.PP
+.in +4n
+.EX
+NAME=value command
+.EE
+.in
+.PP
+to create an environment variable definition only in the scope
+of the process that executes
+.IR command .
+Multiple variable definitions, separated by white space, may precede
+.IR command .
+.PP
+Arguments may also be placed in the
+environment at the point of an
+.BR exec (3).
+A C program can manipulate its environment using the functions
+.BR getenv (3),
+.BR putenv (3),
+.BR setenv (3),
+and
+.BR unsetenv (3).
+.PP
+What follows is a list of environment variables typically seen on a
+system.
+This list is incomplete and includes only common variables seen
+by average users in their day-to-day routine.
+Environment variables specific to a particular program or library function
+are documented in the ENVIRONMENT section of the appropriate manual page.
+.TP
+.B USER
+The name of the logged-in user (used by some BSD-derived programs).
+Set at login time, see section NOTES below.
+.TP
+.B LOGNAME
+The name of the logged-in user (used by some System-V derived programs).
+Set at login time, see section NOTES below.
+.TP
+.B HOME
+A user's login directory.
+Set at login time, see section NOTES below.
+.TP
+.B LANG
+The name of a locale to use for locale categories when not overridden
+by
+.B LC_ALL
+or more specific environment variables such as
+.BR LC_COLLATE ,
+.BR LC_CTYPE ,
+.BR LC_MESSAGES ,
+.BR LC_MONETARY ,
+.BR LC_NUMERIC ,
+and
+.B LC_TIME
+(see
+.BR locale (7)
+for further details of the
+.B LC_*
+environment variables).
+.TP
+.B PATH
+The sequence of directory prefixes that
+.BR sh (1)
+and many other
+programs employ when searching for an executable file that is specified
+as a simple filename (i.a., a pathname that contains no slashes).
+The prefixes are separated by colons (\fB:\fP).
+The list of prefixes is searched from beginning to end,
+by checking the pathname formed by concatenating
+a prefix, a slash, and the filename,
+until a file with execute permission is found.
+.IP
+As a legacy feature, a zero-length prefix
+(specified as two adjacent colons, or an initial or terminating colon)
+is interpreted to mean the current working directory.
+However, use of this feature is deprecated,
+and POSIX notes that a conforming application shall use
+an explicit pathname (e.g.,
+.IR . )
+to specify the current working directory.
+.IP
+Analogously to
+.BR PATH ,
+one has
+.B CDPATH
+used by some shells to find the target
+of a change directory command,
+.B MANPATH
+used by
+.BR man (1)
+to find manual pages, and so on.
+.TP
+.B PWD
+Absolute path to the current working directory;
+required to be partially canonical (no
+.I .\&
+or
+.I ..\&
+components).
+.TP
+.B SHELL
+The absolute pathname of the user's login shell.
+Set at login time, see section NOTES below.
+.TP
+.B TERM
+The terminal type for which output is to be prepared.
+.TP
+.B PAGER
+The user's preferred utility to display text files.
+Any string acceptable as a command-string operand to the
+.I sh\ \-c
+command shall be valid.
+If
+.B PAGER
+is null or is not set,
+then applications that launch a pager will default to a program such as
+.BR less (1)
+or
+.BR more (1).
+.TP
+.BR EDITOR / VISUAL
+The user's preferred utility to edit text files.
+Any string acceptable as a command_string operand to the
+.I sh\ \-c
+command shall be valid.
+.\" .TP
+.\" .B BROWSER
+.\" The user's preferred utility to browse URLs. Sequence of colon-separated
+.\" browser commands. See http://www.catb.org/\[ti]esr/BROWSER/ .
+.PP
+Note that the behavior of many programs and library routines is
+influenced by the presence or value of certain environment variables.
+Examples include the following:
+.IP \[bu] 3
+The variables
+.BR LANG ", " LANGUAGE ", " NLSPATH ", " LOCPATH ,
+.BR LC_ALL ", " LC_MESSAGES ,
+and so on influence locale handling; see
+.BR catopen (3),
+.BR gettext (3),
+and
+.BR locale (7).
+.IP \[bu]
+.B TMPDIR
+influences the path prefix of names created by
+.BR tempnam (3)
+and other routines, and the temporary directory used by
+.BR sort (1)
+and other programs.
+.IP \[bu]
+.BR LD_LIBRARY_PATH ", " LD_PRELOAD ,
+and other
+.B LD_*
+variables influence the behavior of the dynamic loader/linker.
+See also
+.BR ld.so (8).
+.IP \[bu]
+.B POSIXLY_CORRECT
+makes certain programs and library routines follow
+the prescriptions of POSIX.
+.IP \[bu]
+The behavior of
+.BR malloc (3)
+is influenced by
+.B MALLOC_*
+variables.
+.IP \[bu]
+The variable
+.B HOSTALIASES
+gives the name of a file containing aliases
+to be used with
+.BR gethostbyname (3).
+.IP \[bu]
+.BR TZ " and " TZDIR
+give timezone information used by
+.BR tzset (3)
+and through that by functions like
+.BR ctime (3),
+.BR localtime (3),
+.BR mktime (3),
+.BR strftime (3).
+See also
+.BR tzselect (8).
+.IP \[bu]
+.B TERMCAP
+gives information on how to address a given terminal
+(or gives the name of a file containing such information).
+.IP \[bu]
+.BR COLUMNS " and " LINES
+tell applications about the window size, possibly overriding the actual size.
+.IP \[bu]
+.BR PRINTER " or " LPDEST
+may specify the desired printer to use.
+See
+.BR lpr (1).
+.SH NOTES
+Historically and by standard,
+.I environ
+must be declared in the user program.
+However, as a (nonstandard) programmer convenience,
+.I environ
+is declared in the header file
+.I <unistd.h>
+if the
+.B _GNU_SOURCE
+feature test macro is defined (see
+.BR feature_test_macros (7)).
+.PP
+The
+.BR prctl (2)
+.B PR_SET_MM_ENV_START
+and
+.B PR_SET_MM_ENV_END
+operations can be used to control the location of the process's environment.
+.PP
+The
+.BR HOME ,
+.BR LOGNAME ,
+.BR SHELL ,
+and
+.B USER
+variables are set when the user is changed via a
+session management interface, typically by a program such as
+.BR login (1)
+from a user database (such as
+.BR passwd (5)).
+(Switching to the root user using
+.BR su (1)
+may result in a mixed environment where
+.B LOGNAME
+and
+.B USER
+are retained from old user; see the
+.BR su (1)
+manual page.)
+.SH BUGS
+Clearly there is a security risk here.
+Many a system command has been
+tricked into mischief by a user who specified unusual values for
+.BR IFS " or " LD_LIBRARY_PATH .
+.PP
+There is also the risk of name space pollution.
+Programs like
+.I make
+and
+.I autoconf
+allow overriding of default utility names from the
+environment with similarly named variables in all caps.
+Thus one uses
+.B CC
+to select the desired C compiler (and similarly
+.BR MAKE ,
+.BR AR ,
+.BR AS ,
+.BR FC ,
+.BR LD ,
+.BR LEX ,
+.BR RM ,
+.BR YACC ,
+etc.).
+However, in some traditional uses such an environment variable
+gives options for the program instead of a pathname.
+Thus, one has
+.B MORE
+and
+.BR LESS .
+Such usage is considered mistaken, and to be avoided in new
+programs.
+.SH SEE ALSO
+.BR bash (1),
+.BR csh (1),
+.BR env (1),
+.BR login (1),
+.BR printenv (1),
+.BR sh (1),
+.BR su (1),
+.BR tcsh (1),
+.BR execve (2),
+.BR clearenv (3),
+.BR exec (3),
+.BR getenv (3),
+.BR putenv (3),
+.BR setenv (3),
+.BR unsetenv (3),
+.BR locale (7),
+.BR ld.so (8),
+.BR pam_env (8)
diff --git a/man7/epoll.7 b/man7/epoll.7
new file mode 100644
index 0000000..02a53e9
--- /dev/null
+++ b/man7/epoll.7
@@ -0,0 +1,610 @@
+.\" Copyright (C) 2003 Davide Libenzi
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" Davide Libenzi <davidel@xmailserver.org>
+.\"
+.TH epoll 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+epoll \- I/O event notification facility
+.SH SYNOPSIS
+.nf
+.B #include <sys/epoll.h>
+.fi
+.SH DESCRIPTION
+The
+.B epoll
+API performs a similar task to
+.BR poll (2):
+monitoring multiple file descriptors to see if I/O is possible on any of them.
+The
+.B epoll
+API can be used either as an edge-triggered or a level-triggered
+interface and scales well to large numbers of watched file descriptors.
+.PP
+The central concept of the
+.B epoll
+API is the
+.B epoll
+.IR instance ,
+an in-kernel data structure which, from a user-space perspective,
+can be considered as a container for two lists:
+.IP \[bu] 3
+The
+.I interest
+list (sometimes also called the
+.B epoll
+set): the set of file descriptors that the process has registered
+an interest in monitoring.
+.IP \[bu]
+The
+.I ready
+list: the set of file descriptors that are "ready" for I/O.
+The ready list is a subset of
+(or, more precisely, a set of references to)
+the file descriptors in the interest list.
+The ready list is dynamically populated
+by the kernel as a result of I/O activity on those file descriptors.
+.PP
+The following system calls are provided to
+create and manage an
+.B epoll
+instance:
+.IP \[bu] 3
+.BR epoll_create (2)
+creates a new
+.B epoll
+instance and returns a file descriptor referring to that instance.
+(The more recent
+.BR epoll_create1 (2)
+extends the functionality of
+.BR epoll_create (2).)
+.IP \[bu]
+Interest in particular file descriptors is then registered via
+.BR epoll_ctl (2),
+which adds items to the interest list of the
+.B epoll
+instance.
+.IP \[bu]
+.BR epoll_wait (2)
+waits for I/O events,
+blocking the calling thread if no events are currently available.
+(This system call can be thought of as fetching items from
+the ready list of the
+.B epoll
+instance.)
+.\"
+.SS Level-triggered and edge-triggered
+The
+.B epoll
+event distribution interface is able to behave both as edge-triggered
+(ET) and as level-triggered (LT).
+The difference between the two mechanisms
+can be described as follows.
+Suppose that
+this scenario happens:
+.IP (1) 5
+The file descriptor that represents the read side of a pipe
+.RI ( rfd )
+is registered on the
+.B epoll
+instance.
+.IP (2)
+A pipe writer writes 2\ kB of data on the write side of the pipe.
+.IP (3)
+A call to
+.BR epoll_wait (2)
+is done that will return
+.I rfd
+as a ready file descriptor.
+.IP (4)
+The pipe reader reads 1\ kB of data from
+.IR rfd .
+.IP (5)
+A call to
+.BR epoll_wait (2)
+is done.
+.PP
+If the
+.I rfd
+file descriptor has been added to the
+.B epoll
+interface using the
+.B EPOLLET
+(edge-triggered)
+flag, the call to
+.BR epoll_wait (2)
+done in step
+.B 5
+will probably hang despite the available data still present in the file
+input buffer;
+meanwhile the remote peer might be expecting a response based on the
+data it already sent.
+The reason for this is that edge-triggered mode
+delivers events only when changes occur on the monitored file descriptor.
+So, in step
+.B 5
+the caller might end up waiting for some data that is already present inside
+the input buffer.
+In the above example, an event on
+.I rfd
+will be generated because of the write done in
+.B 2
+and the event is consumed in
+.BR 3 .
+Since the read operation done in
+.B 4
+does not consume the whole buffer data, the call to
+.BR epoll_wait (2)
+done in step
+.B 5
+might block indefinitely.
+.PP
+An application that employs the
+.B EPOLLET
+flag should use nonblocking file descriptors to avoid having a blocking
+read or write starve a task that is handling multiple file descriptors.
+The suggested way to use
+.B epoll
+as an edge-triggered
+.RB ( EPOLLET )
+interface is as follows:
+.IP (1) 5
+with nonblocking file descriptors; and
+.IP (2)
+by waiting for an event only after
+.BR read (2)
+or
+.BR write (2)
+return
+.BR EAGAIN .
+.PP
+By contrast, when used as a level-triggered interface
+(the default, when
+.B EPOLLET
+is not specified),
+.B epoll
+is simply a faster
+.BR poll (2),
+and can be used wherever the latter is used since it shares the
+same semantics.
+.PP
+Since even with edge-triggered
+.BR epoll ,
+multiple events can be generated upon receipt of multiple chunks of data,
+the caller has the option to specify the
+.B EPOLLONESHOT
+flag, to tell
+.B epoll
+to disable the associated file descriptor after the receipt of an event with
+.BR epoll_wait (2).
+When the
+.B EPOLLONESHOT
+flag is specified,
+it is the caller's responsibility to rearm the file descriptor using
+.BR epoll_ctl (2)
+with
+.BR EPOLL_CTL_MOD .
+.PP
+If multiple threads
+(or processes, if child processes have inherited the
+.B epoll
+file descriptor across
+.BR fork (2))
+are blocked in
+.BR epoll_wait (2)
+waiting on the same epoll file descriptor and a file descriptor
+in the interest list that is marked for edge-triggered
+.RB ( EPOLLET )
+notification becomes ready,
+just one of the threads (or processes) is awoken from
+.BR epoll_wait (2).
+This provides a useful optimization for avoiding "thundering herd" wake-ups
+in some scenarios.
+.\"
+.SS Interaction with autosleep
+If the system is in
+.B autosleep
+mode via
+.I /sys/power/autosleep
+and an event happens which wakes the device from sleep, the device
+driver will keep the device awake only until that event is queued.
+To keep the device awake until the event has been processed,
+it is necessary to use the
+.BR epoll_ctl (2)
+.B EPOLLWAKEUP
+flag.
+.PP
+When the
+.B EPOLLWAKEUP
+flag is set in the
+.B events
+field for a
+.IR "struct epoll_event" ,
+the system will be kept awake from the moment the event is queued,
+through the
+.BR epoll_wait (2)
+call which returns the event until the subsequent
+.BR epoll_wait (2)
+call.
+If the event should keep the system awake beyond that time,
+then a separate
+.I wake_lock
+should be taken before the second
+.BR epoll_wait (2)
+call.
+.SS /proc interfaces
+The following interfaces can be used to limit the amount of
+kernel memory consumed by epoll:
+.\" Following was added in Linux 2.6.28, but them removed in Linux 2.6.29
+.\" .TP
+.\" .IR /proc/sys/fs/epoll/max_user_instances " (since Linux 2.6.28)"
+.\" This specifies an upper limit on the number of epoll instances
+.\" that can be created per real user ID.
+.TP
+.IR /proc/sys/fs/epoll/max_user_watches " (since Linux 2.6.28)"
+This specifies a limit on the total number of
+file descriptors that a user can register across
+all epoll instances on the system.
+The limit is per real user ID.
+Each registered file descriptor costs roughly 90 bytes on a 32-bit kernel,
+and roughly 160 bytes on a 64-bit kernel.
+Currently,
+.\" Linux 2.6.29 (in Linux 2.6.28, the default was 1/32 of lowmem)
+the default value for
+.I max_user_watches
+is 1/25 (4%) of the available low memory,
+divided by the registration cost in bytes.
+.SS Example for suggested usage
+While the usage of
+.B epoll
+when employed as a level-triggered interface does have the same
+semantics as
+.BR poll (2),
+the edge-triggered usage requires more clarification to avoid stalls
+in the application event loop.
+In this example, listener is a
+nonblocking socket on which
+.BR listen (2)
+has been called.
+The function
+.I do_use_fd()
+uses the new ready file descriptor until
+.B EAGAIN
+is returned by either
+.BR read (2)
+or
+.BR write (2).
+An event-driven state machine application should, after having received
+.BR EAGAIN ,
+record its current state so that at the next call to
+.I do_use_fd()
+it will continue to
+.BR read (2)
+or
+.BR write (2)
+from where it stopped before.
+.PP
+.in +4n
+.EX
+#define MAX_EVENTS 10
+struct epoll_event ev, events[MAX_EVENTS];
+int listen_sock, conn_sock, nfds, epollfd;
+\&
+/* Code to set up listening socket, \[aq]listen_sock\[aq],
+ (socket(), bind(), listen()) omitted. */
+\&
+epollfd = epoll_create1(0);
+if (epollfd == \-1) {
+ perror("epoll_create1");
+ exit(EXIT_FAILURE);
+}
+\&
+ev.events = EPOLLIN;
+ev.data.fd = listen_sock;
+if (epoll_ctl(epollfd, EPOLL_CTL_ADD, listen_sock, &ev) == \-1) {
+ perror("epoll_ctl: listen_sock");
+ exit(EXIT_FAILURE);
+}
+\&
+for (;;) {
+ nfds = epoll_wait(epollfd, events, MAX_EVENTS, \-1);
+ if (nfds == \-1) {
+ perror("epoll_wait");
+ exit(EXIT_FAILURE);
+ }
+\&
+ for (n = 0; n < nfds; ++n) {
+ if (events[n].data.fd == listen_sock) {
+ conn_sock = accept(listen_sock,
+ (struct sockaddr *) &addr, &addrlen);
+ if (conn_sock == \-1) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+ setnonblocking(conn_sock);
+ ev.events = EPOLLIN | EPOLLET;
+ ev.data.fd = conn_sock;
+ if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn_sock,
+ &ev) == \-1) {
+ perror("epoll_ctl: conn_sock");
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ do_use_fd(events[n].data.fd);
+ }
+ }
+}
+.EE
+.in
+.PP
+When used as an edge-triggered interface, for performance reasons, it is
+possible to add the file descriptor inside the
+.B epoll
+interface
+.RB ( EPOLL_CTL_ADD )
+once by specifying
+.RB ( EPOLLIN | EPOLLOUT ).
+This allows you to avoid
+continuously switching between
+.B EPOLLIN
+and
+.B EPOLLOUT
+calling
+.BR epoll_ctl (2)
+with
+.BR EPOLL_CTL_MOD .
+.SS Questions and answers
+.IP \[bu] 3
+What is the key used to distinguish the file descriptors registered in an
+interest list?
+.IP
+The key is the combination of the file descriptor number and
+the open file description
+(also known as an "open file handle",
+the kernel's internal representation of an open file).
+.IP \[bu]
+What happens if you register the same file descriptor on an
+.B epoll
+instance twice?
+.IP
+You will probably get
+.BR EEXIST .
+However, it is possible to add a duplicate
+.RB ( dup (2),
+.BR dup2 (2),
+.BR fcntl (2)
+.BR F_DUPFD )
+file descriptor to the same
+.B epoll
+instance.
+.\" But a file descriptor duplicated by fork(2) can't be added to the
+.\" set, because the [file *, fd] pair is already in the epoll set.
+.\" That is a somewhat ugly inconsistency. On the one hand, a child process
+.\" cannot add the duplicate file descriptor to the epoll set. (In every
+.\" other case that I can think of, file descriptors duplicated by fork have
+.\" similar semantics to file descriptors duplicated by dup() and friends.) On
+.\" the other hand, the very fact that the child has a duplicate of the
+.\" file descriptor means that even if the parent closes its file descriptor,
+.\" then epoll_wait() in the parent will continue to receive notifications for
+.\" that file descriptor because of the duplicated file descriptor in the child.
+.\"
+.\" See http://thread.gmane.org/gmane.linux.kernel/596462/
+.\" "epoll design problems with common fork/exec patterns"
+.\"
+.\" mtk, Feb 2008
+This can be a useful technique for filtering events,
+if the duplicate file descriptors are registered with different
+.I events
+masks.
+.IP \[bu]
+Can two
+.B epoll
+instances wait for the same file descriptor?
+If so, are events reported to both
+.B epoll
+file descriptors?
+.IP
+Yes, and events would be reported to both.
+However, careful programming may be needed to do this correctly.
+.IP \[bu]
+Is the
+.B epoll
+file descriptor itself poll/epoll/selectable?
+.IP
+Yes.
+If an
+.B epoll
+file descriptor has events waiting, then it will
+indicate as being readable.
+.IP \[bu]
+What happens if one attempts to put an
+.B epoll
+file descriptor into its own file descriptor set?
+.IP
+The
+.BR epoll_ctl (2)
+call fails
+.RB ( EINVAL ).
+However, you can add an
+.B epoll
+file descriptor inside another
+.B epoll
+file descriptor set.
+.IP \[bu]
+Can I send an
+.B epoll
+file descriptor over a UNIX domain socket to another process?
+.IP
+Yes, but it does not make sense to do this, since the receiving process
+would not have copies of the file descriptors in the interest list.
+.IP \[bu]
+Will closing a file descriptor cause it to be removed from all
+.B epoll
+interest lists?
+.IP
+Yes, but be aware of the following point.
+A file descriptor is a reference to an open file description (see
+.BR open (2)).
+Whenever a file descriptor is duplicated via
+.BR dup (2),
+.BR dup2 (2),
+.BR fcntl (2)
+.BR F_DUPFD ,
+or
+.BR fork (2),
+a new file descriptor referring to the same open file description is
+created.
+An open file description continues to exist until all
+file descriptors referring to it have been closed.
+.IP
+A file descriptor is removed from an
+interest list only after all the file descriptors referring to the underlying
+open file description have been closed.
+This means that even after a file descriptor that is part of an
+interest list has been closed,
+events may be reported for that file descriptor if other file
+descriptors referring to the same underlying file description remain open.
+To prevent this happening,
+the file descriptor must be explicitly removed from the interest list (using
+.BR epoll_ctl (2)
+.BR EPOLL_CTL_DEL )
+before it is duplicated.
+Alternatively,
+the application must ensure that all file descriptors are closed
+(which may be difficult if file descriptors were duplicated
+behind the scenes by library functions that used
+.BR dup (2)
+or
+.BR fork (2)).
+.IP \[bu]
+If more than one event occurs between
+.BR epoll_wait (2)
+calls, are they combined or reported separately?
+.IP
+They will be combined.
+.IP \[bu]
+Does an operation on a file descriptor affect the
+already collected but not yet reported events?
+.IP
+You can do two operations on an existing file descriptor.
+Remove would be meaningless for
+this case.
+Modify will reread available I/O.
+.IP \[bu]
+Do I need to continuously read/write a file descriptor
+until
+.B EAGAIN
+when using the
+.B EPOLLET
+flag (edge-triggered behavior)?
+.IP
+Receiving an event from
+.BR epoll_wait (2)
+should suggest to you that such
+file descriptor is ready for the requested I/O operation.
+You must consider it ready until the next (nonblocking)
+read/write yields
+.BR EAGAIN .
+When and how you will use the file descriptor is entirely up to you.
+.IP
+For packet/token-oriented files (e.g., datagram socket,
+terminal in canonical mode),
+the only way to detect the end of the read/write I/O space
+is to continue to read/write until
+.BR EAGAIN .
+.IP
+For stream-oriented files (e.g., pipe, FIFO, stream socket), the
+condition that the read/write I/O space is exhausted can also be detected by
+checking the amount of data read from / written to the target file
+descriptor.
+For example, if you call
+.BR read (2)
+by asking to read a certain amount of data and
+.BR read (2)
+returns a lower number of bytes, you
+can be sure of having exhausted the read I/O space for the file
+descriptor.
+The same is true when writing using
+.BR write (2).
+(Avoid this latter technique if you cannot guarantee that
+the monitored file descriptor always refers to a stream-oriented file.)
+.SS Possible pitfalls and ways to avoid them
+.IP \[bu] 3
+.B Starvation (edge-triggered)
+.IP
+If there is a large amount of I/O space,
+it is possible that by trying to drain
+it the other files will not get processed causing starvation.
+(This problem is not specific to
+.BR epoll .)
+.IP
+The solution is to maintain a ready list
+and mark the file descriptor as ready
+in its associated data structure, thereby allowing the application to
+remember which files need to be processed but still round robin amongst
+all the ready files.
+This also supports ignoring subsequent events you
+receive for file descriptors that are already ready.
+.IP \[bu]
+.B If using an event cache...
+.IP
+If you use an event cache or store all the file descriptors returned from
+.BR epoll_wait (2),
+then make sure to provide a way to mark
+its closure dynamically (i.e., caused by
+a previous event's processing).
+Suppose you receive 100 events from
+.BR epoll_wait (2),
+and in event #47 a condition causes event #13 to be closed.
+If you remove the structure and
+.BR close (2)
+the file descriptor for event #13, then your
+event cache might still say there are events waiting for that
+file descriptor causing confusion.
+.IP
+One solution for this is to call, during the processing of event 47,
+.BR epoll_ctl ( EPOLL_CTL_DEL )
+to delete file descriptor 13 and
+.BR close (2),
+then mark its associated
+data structure as removed and link it to a cleanup list.
+If you find another
+event for file descriptor 13 in your batch processing,
+you will discover the file descriptor had been
+previously removed and there will be no confusion.
+.SH VERSIONS
+Some other systems provide similar mechanisms;
+for example,
+FreeBSD has
+.IR kqueue ,
+and Solaris has
+.IR /dev/poll .
+.SH STANDARDS
+Linux.
+.SH HISTORY
+Linux 2.5.44.
+.\" Its interface should be finalized in Linux 2.5.66.
+glibc 2.3.2.
+.SH NOTES
+The set of file descriptors that is being monitored via
+an epoll file descriptor can be viewed via the entry for
+the epoll file descriptor in the process's
+.IR /proc/ pid /fdinfo
+directory.
+See
+.BR proc (5)
+for further details.
+.PP
+The
+.BR kcmp (2)
+.B KCMP_EPOLL_TFD
+operation can be used to test whether a file descriptor
+is present in an epoll instance.
+.SH SEE ALSO
+.BR epoll_create (2),
+.BR epoll_create1 (2),
+.BR epoll_ctl (2),
+.BR epoll_wait (2),
+.BR poll (2),
+.BR select (2)
diff --git a/man7/fanotify.7 b/man7/fanotify.7
new file mode 100644
index 0000000..eea8835
--- /dev/null
+++ b/man7/fanotify.7
@@ -0,0 +1,1455 @@
+.\" Copyright (C) 2013, Heinrich Schuchardt <xypron.glpk@gmx.de>
+.\" and Copyright (C) 2014, Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.TH fanotify 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+fanotify \- monitoring filesystem events
+.SH DESCRIPTION
+The fanotify API provides notification and interception of
+filesystem events.
+Use cases include virus scanning and hierarchical storage management.
+In the original fanotify API, only a limited set of events was supported.
+In particular, there was no support for create, delete, and move events.
+The support for those events was added in Linux 5.1.
+(See
+.BR inotify (7)
+for details of an API that did notify those events pre Linux 5.1.)
+.PP
+Additional capabilities compared to the
+.BR inotify (7)
+API include the ability to monitor all of the objects
+in a mounted filesystem,
+the ability to make access permission decisions, and the
+possibility to read or modify files before access by other applications.
+.PP
+The following system calls are used with this API:
+.BR fanotify_init (2),
+.BR fanotify_mark (2),
+.BR read (2),
+.BR write (2),
+and
+.BR close (2).
+.SS fanotify_init(), fanotify_mark(), and notification groups
+The
+.BR fanotify_init (2)
+system call creates and initializes an fanotify notification group
+and returns a file descriptor referring to it.
+.PP
+An fanotify notification group is a kernel-internal object that holds
+a list of files, directories, filesystems, and mounts for which
+events shall be created.
+.PP
+For each entry in an fanotify notification group, two bit masks exist: the
+.I mark
+mask and the
+.I ignore
+mask.
+The mark mask defines file activities for which an event shall be created.
+The ignore mask defines activities for which no event shall be generated.
+Having these two types of masks permits a filesystem, mount, or
+directory to be marked for receiving events, while at the same time
+ignoring events for specific objects under a mount or directory.
+.PP
+The
+.BR fanotify_mark (2)
+system call adds a file, directory, filesystem, or mount to a
+notification group and specifies which events
+shall be reported (or ignored), or removes or modifies such an entry.
+.PP
+A possible usage of the ignore mask is for a file cache.
+Events of interest for a file cache are modification of a file and closing
+of the same.
+Hence, the cached directory or mount is to be marked to receive these
+events.
+After receiving the first event informing that a file has been modified,
+the corresponding cache entry will be invalidated.
+No further modification events for this file are of interest until the file
+is closed.
+Hence, the modify event can be added to the ignore mask.
+Upon receiving the close event, the modify event can be removed from the
+ignore mask and the file cache entry can be updated.
+.PP
+The entries in the fanotify notification groups refer to files and
+directories via their inode number and to mounts via their mount ID.
+If files or directories are renamed or moved within the same mount,
+the respective entries survive.
+If files or directories are deleted or moved to another mount or if
+filesystems or mounts are unmounted, the corresponding entries are deleted.
+.SS The event queue
+As events occur on the filesystem objects monitored by a notification group,
+the fanotify system generates events that are collected in a queue.
+These events can then be read (using
+.BR read (2)
+or similar)
+from the fanotify file descriptor
+returned by
+.BR fanotify_init (2).
+.PP
+Two types of events are generated:
+.I notification
+events and
+.I permission
+events.
+Notification events are merely informative and require no action to be taken
+by the receiving application with one exception: if a valid file descriptor
+is provided within a generic event, the file descriptor must be closed.
+Permission events are requests to the receiving application to decide
+whether permission for a file access shall be granted.
+For these events, the recipient must write a response which decides whether
+access is granted or not.
+.PP
+An event is removed from the event queue of the fanotify group
+when it has been read.
+Permission events that have been read are kept in an internal list of the
+fanotify group until either a permission decision has been taken by
+writing to the fanotify file descriptor or the fanotify file descriptor
+is closed.
+.SS Reading fanotify events
+Calling
+.BR read (2)
+for the file descriptor returned by
+.BR fanotify_init (2)
+blocks (if the flag
+.B FAN_NONBLOCK
+is not specified in the call to
+.BR fanotify_init (2))
+until either a file event occurs or the call is interrupted by a signal
+(see
+.BR signal (7)).
+.PP
+After a successful
+.BR read (2),
+the read buffer contains one or more of the following structures:
+.PP
+.in +4n
+.EX
+struct fanotify_event_metadata {
+ __u32 event_len;
+ __u8 vers;
+ __u8 reserved;
+ __u16 metadata_len;
+ __aligned_u64 mask;
+ __s32 fd;
+ __s32 pid;
+};
+.EE
+.in
+.PP
+Information records are
+supplemental pieces of information that
+may be provided alongside the generic
+.I fanotify_event_metadata
+structure.
+The
+.I flags
+passed to
+.BR fanotify_init (2)
+have influence over the type of information records that
+may be returned for an event.
+For example,
+if a notification group is initialized with
+.B FAN_REPORT_FID
+or
+.BR FAN_REPORT_DIR_FID ,
+then event listeners should also expect to receive a
+.I fanotify_event_info_fid
+structure alongside the
+.I fanotify_event_metadata
+structure,
+whereby file handles are used to
+identify filesystem objects
+rather than file descriptors.
+Information records may also be stacked,
+meaning that using the various
+.B FAN_REPORT_*
+flags in conjunction with one another is supported.
+In such cases,
+multiple information records can be returned for an event
+alongside the generic
+.I fanotify_event_metadata
+structure.
+For example,
+if a notification group is initialized with
+.B FAN_REPORT_TARGET_FID
+and
+.BR FAN_REPORT_PIDFD ,
+then an event listener should expect to receive up to two
+.I fanotify_event_info_fid
+information records and one
+.I fanotify_event_info_pidfd
+information record alongside the generic
+.I fanotify_event_metadata
+structure.
+Importantly,
+fanotify provides no guarantee around
+the ordering of information records
+when a notification group is initialized with a
+stacked based configuration.
+Each information record has a nested structure of type
+.IR fanotify_event_info_header .
+It is imperative for event listeners to inspect the
+.I info_type
+field of this structure in order to
+determine the type of information record that
+had been received for a given event.
+.PP
+In cases where an fanotify group
+identifies filesystem objects by file handles,
+event listeners should also expect to
+receive one or more of the below
+information record objects alongside the generic
+.I fanotify_event_metadata
+structure within the read buffer:
+.PP
+.in +4n
+.EX
+struct fanotify_event_info_fid {
+ struct fanotify_event_info_header hdr;
+ __kernel_fsid_t fsid;
+ unsigned char file_handle[0];
+};
+.EE
+.in
+.PP
+In cases where an fanotify group is initialized with
+.BR FAN_REPORT_PIDFD ,
+event listeners should expect to receive the below
+information record object alongside the generic
+.I fanotify_event_metadata
+structure within the read buffer:
+.PP
+.in +4n
+.EX
+struct fanotify_event_info_pidfd {
+ struct fanotify_event_info_header hdr;
+ __s32 pidfd;
+};
+.EE
+.in
+.PP
+In case of a
+.B FAN_FS_ERROR
+event,
+an additional information record describing the error that occurred
+is returned alongside the generic
+.I fanotify_event_metadata
+structure within the read buffer.
+This structure is defined as follows:
+.PP
+.in +4n
+.EX
+struct fanotify_event_info_error {
+ struct fanotify_event_info_header hdr;
+ __s32 error;
+ __u32 error_count;
+};
+.EE
+.in
+.PP
+All information records contain a nested structure of type
+.IR fanotify_event_info_header .
+This structure holds meta-information about the information record
+that may have been returned alongside the generic
+.I fanotify_event_metadata
+structure.
+This structure is defined as follows:
+.PP
+.in +4n
+.EX
+struct fanotify_event_info_header {
+ __u8 info_type;
+ __u8 pad;
+ __u16 len;
+};
+.EE
+.in
+.PP
+For performance reasons, it is recommended to use a large
+buffer size (for example, 4096 bytes),
+so that multiple events can be retrieved by a single
+.BR read (2).
+.PP
+The return value of
+.BR read (2)
+is the number of bytes placed in the buffer,
+or \-1 in case of an error (but see BUGS).
+.PP
+The fields of the
+.I fanotify_event_metadata
+structure are as follows:
+.TP
+.I event_len
+This is the length of the data for the current event and the offset
+to the next event in the buffer.
+Unless the group identifies filesystem objects by file handles, the value of
+.I event_len
+is always
+.BR FAN_EVENT_METADATA_LEN .
+For a group that identifies filesystem objects by file handles,
+.I event_len
+also includes the variable length file identifier records.
+.TP
+.I vers
+This field holds a version number for the structure.
+It must be compared to
+.B FANOTIFY_METADATA_VERSION
+to verify that the structures returned at run time match
+the structures defined at compile time.
+In case of a mismatch, the application should abandon trying to use the
+fanotify file descriptor.
+.TP
+.I reserved
+This field is not used.
+.TP
+.I metadata_len
+This is the length of the structure.
+The field was introduced to facilitate the implementation of
+optional headers per event type.
+No such optional headers exist in the current implementation.
+.TP
+.I mask
+This is a bit mask describing the event (see below).
+.TP
+.I fd
+This is an open file descriptor for the object being accessed, or
+.B FAN_NOFD
+if a queue overflow occurred.
+With an fanotify group that identifies filesystem objects by file handles,
+applications should expect this value to be set to
+.B FAN_NOFD
+for each event that is received.
+The file descriptor can be used to access the contents
+of the monitored file or directory.
+The reading application is responsible for closing this file descriptor.
+.IP
+When calling
+.BR fanotify_init (2),
+the caller may specify (via the
+.I event_f_flags
+argument) various file status flags that are to be set
+on the open file description that corresponds to this file descriptor.
+In addition, the (kernel-internal)
+.B FMODE_NONOTIFY
+file status flag is set on the open file description.
+This flag suppresses fanotify event generation.
+Hence, when the receiver of the fanotify event accesses the notified file or
+directory using this file descriptor, no additional events will be created.
+.TP
+.I pid
+If flag
+.B FAN_REPORT_TID
+was set in
+.BR fanotify_init (2),
+this is the TID of the thread that caused the event.
+Otherwise, this the PID of the process that caused the event.
+.PP
+A program listening to fanotify events can compare this PID
+to the PID returned by
+.BR getpid (2),
+to determine whether the event is caused by the listener itself,
+or is due to a file access by another process.
+.PP
+The bit mask in
+.I mask
+indicates which events have occurred for a single filesystem object.
+Multiple bits may be set in this mask,
+if more than one event occurred for the monitored filesystem object.
+In particular,
+consecutive events for the same filesystem object and originating from the
+same process may be merged into a single event, with the exception that two
+permission events are never merged into one queue entry.
+.PP
+The bits that may appear in
+.I mask
+are as follows:
+.TP
+.B FAN_ACCESS
+A file or a directory (but see BUGS) was accessed (read).
+.TP
+.B FAN_OPEN
+A file or a directory was opened.
+.TP
+.B FAN_OPEN_EXEC
+A file was opened with the intent to be executed.
+See NOTES in
+.BR fanotify_mark (2)
+for additional details.
+.TP
+.B FAN_ATTRIB
+A file or directory metadata was changed.
+.TP
+.B FAN_CREATE
+A child file or directory was created in a watched parent.
+.TP
+.B FAN_DELETE
+A child file or directory was deleted in a watched parent.
+.TP
+.B FAN_DELETE_SELF
+A watched file or directory was deleted.
+.TP
+.B FAN_FS_ERROR
+A filesystem error was detected.
+.TP
+.B FAN_RENAME
+A file or directory has been moved to or from a watched parent directory.
+.TP
+.B FAN_MOVED_FROM
+A file or directory has been moved from a watched parent directory.
+.TP
+.B FAN_MOVED_TO
+A file or directory has been moved to a watched parent directory.
+.TP
+.B FAN_MOVE_SELF
+A watched file or directory was moved.
+.TP
+.B FAN_MODIFY
+A file was modified.
+.TP
+.B FAN_CLOSE_WRITE
+A file that was opened for writing
+.RB ( O_WRONLY
+or
+.BR O_RDWR )
+was closed.
+.TP
+.B FAN_CLOSE_NOWRITE
+A file or directory that was opened read-only
+.RB ( O_RDONLY )
+was closed.
+.TP
+.B FAN_Q_OVERFLOW
+The event queue exceeded the limit on number of events.
+This limit can be overridden by specifying the
+.B FAN_UNLIMITED_QUEUE
+flag when calling
+.BR fanotify_init (2).
+.TP
+.B FAN_ACCESS_PERM
+An application wants to read a file or directory, for example using
+.BR read (2)
+or
+.BR readdir (2).
+The reader must write a response (as described below)
+that determines whether the permission to
+access the filesystem object shall be granted.
+.TP
+.B FAN_OPEN_PERM
+An application wants to open a file or directory.
+The reader must write a response that determines whether the permission to
+open the filesystem object shall be granted.
+.TP
+.B FAN_OPEN_EXEC_PERM
+An application wants to open a file for execution.
+The reader must write a response that determines whether the permission to
+open the filesystem object for execution shall be granted.
+See NOTES in
+.BR fanotify_mark (2)
+for additional details.
+.PP
+To check for any close event, the following bit mask may be used:
+.TP
+.B FAN_CLOSE
+A file was closed.
+This is a synonym for:
+.IP
+.in +4n
+.EX
+FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE
+.EE
+.in
+.PP
+To check for any move event, the following bit mask may be used:
+.TP
+.B FAN_MOVE
+A file or directory was moved.
+This is a synonym for:
+.IP
+.in +4n
+.EX
+FAN_MOVED_FROM | FAN_MOVED_TO
+.EE
+.in
+.PP
+The following bits may appear in
+.I mask
+only in conjunction with other event type bits:
+.TP
+.B FAN_ONDIR
+The events described in the
+.I mask
+have occurred on a directory object.
+Reporting events on directories requires setting this flag in the mark mask.
+See
+.BR fanotify_mark (2)
+for additional details.
+The
+.B FAN_ONDIR
+flag is reported in an event mask only if the fanotify group identifies
+filesystem objects by file handles.
+.PP
+Information records that are supplied alongside the generic
+.I fanotify_event_metadata
+structure will always contain a nested structure of type
+.IR fanotify_event_info_header .
+The fields of the
+.I fanotify_event_info_header
+are as follows:
+.TP
+.I info_type
+A unique integer value representing
+the type of information record object received for an event.
+The value of this field can be set to one of the following:
+.BR FAN_EVENT_INFO_TYPE_FID ,
+.BR FAN_EVENT_INFO_TYPE_DFID ,
+.BR FAN_EVENT_INFO_TYPE_DFID_NAME ,
+or
+.BR FAN_EVENT_INFO_TYPE_PIDFD .
+The value set for this field
+is dependent on the flags that have been supplied to
+.BR fanotify_init (2).
+Refer to the field details of each information record object type below
+to understand the different cases in which the
+.I info_type
+values can be set.
+.TP
+.I pad
+This field is currently not used by any information record object type
+and therefore is set to zero.
+.TP
+.I len
+The value of
+.I len
+is set to the size of the information record object,
+including the
+.IR fanotify_event_info_header .
+The total size of all additional information records
+is not expected to be larger than
+.RI ( event_len
+\-
+.IR metadata_len ).
+.PP
+The fields of the
+.I fanotify_event_info_fid
+structure are as follows:
+.TP
+.I hdr
+This is a structure of type
+.IR fanotify_event_info_header .
+For example, when an fanotify file descriptor is created using
+.BR FAN_REPORT_FID ,
+a single information record is expected to be attached to the event with
+.I info_type
+field value of
+.BR FAN_EVENT_INFO_TYPE_FID .
+When an fanotify file descriptor is created using the combination of
+.B FAN_REPORT_FID
+and
+.BR FAN_REPORT_DIR_FID ,
+there may be two information records attached to the event:
+one with
+.I info_type
+field value of
+.BR FAN_EVENT_INFO_TYPE_DFID ,
+identifying a parent directory object, and one with
+.I info_type
+field value of
+.BR FAN_EVENT_INFO_TYPE_FID ,
+identifying a child object.
+Note that for the directory entry modification events
+.BR FAN_CREATE ,
+.BR FAN_DELETE ,
+.BR FAN_MOVE ,
+and
+.BR FAN_RENAME ,
+an information record identifying the created/deleted/moved child object
+is reported only if an fanotify group was initialized with the flag
+.BR FAN_REPORT_TARGET_FID .
+.TP
+.I fsid
+This is a unique identifier of the filesystem containing the object
+associated with the event.
+It is a structure of type
+.I __kernel_fsid_t
+and contains the same value as
+.I f_fsid
+when calling
+.BR statfs (2).
+.TP
+.I file_handle
+This is a variable length structure of type struct file_handle.
+It is an opaque handle that corresponds to a specified object on a
+filesystem as returned by
+.BR name_to_handle_at (2).
+It can be used to uniquely identify a file on a filesystem and can be
+passed as an argument to
+.BR open_by_handle_at (2).
+If the value of
+.I info_type
+field is
+.BR FAN_EVENT_INFO_TYPE_DFID_NAME ,
+the file handle is followed by a null terminated string that identifies the
+created/deleted/moved directory entry name.
+For other events such as
+.BR FAN_OPEN ,
+.BR FAN_ATTRIB ,
+.BR FAN_DELETE_SELF ,
+and
+.BR FAN_MOVE_SELF ,
+if the value of
+.I info_type
+field is
+.BR FAN_EVENT_INFO_TYPE_FID ,
+the
+.I file_handle
+identifies the object correlated to the event.
+If the value of
+.I info_type
+field is
+.BR FAN_EVENT_INFO_TYPE_DFID ,
+the
+.I file_handle
+identifies the directory object correlated to the event or the parent directory
+of a non-directory object correlated to the event.
+If the value of
+.I info_type
+field is
+.BR FAN_EVENT_INFO_TYPE_DFID_NAME ,
+the
+.I file_handle
+identifies the same directory object that would be reported with
+.B FAN_EVENT_INFO_TYPE_DFID
+and the file handle is followed by a null terminated string that identifies the
+name of a directory entry in that directory, or '.' to identify the directory
+object itself.
+.PP
+The fields of the
+.I fanotify_event_info_pidfd
+structure are as follows:
+.TP
+.I hdr
+This is a structure of type
+.IR fanotify_event_info_header .
+When an fanotify group is initialized using
+.BR FAN_REPORT_PIDFD ,
+the
+.I info_type
+field value of the
+.I fanotify_event_info_header
+is set to
+.BR FAN_EVENT_INFO_TYPE_PIDFD .
+.TP
+.I pidfd
+This is a process file descriptor that refers to
+the process responsible for generating the event.
+The returned process file descriptor is no different from
+one which could be obtained manually if
+.BR pidfd_open (2)
+were to be called on
+.IR fanotify_event_metadata.pid .
+In the instance that an error is encountered during pidfd creation,
+one of two possible error types represented by
+a negative integer value may be returned in this
+.I pidfd
+field.
+In cases where
+the process responsible for generating the event
+has terminated prior to
+the event listener being able to
+read events from the notification queue,
+.B FAN_NOPIDFD
+is returned.
+The pidfd creation for an event is only performed at the time the
+events are read from the notification queue.
+All other possible pidfd creation failures are represented by
+.BR FAN_EPIDFD .
+Once the event listener has dealt with an event
+and the pidfd is no longer required,
+the pidfd should be closed via
+.BR close (2).
+.PP
+The fields of the
+.I fanotify_event_info_error
+structure are as follows:
+.TP
+.I hdr
+This is a structure of type
+.IR fanotify_event_info_header .
+The
+.I info_type
+field is set to
+.BR FAN_EVENT_INFO_TYPE_ERROR .
+.TP
+.I error
+Identifies the type of error that occurred.
+.TP
+.I error_count
+This is a counter of the number of errors suppressed
+since the last error was read.
+.PP
+The following macros are provided to iterate over a buffer containing
+fanotify event metadata returned by a
+.BR read (2)
+from an fanotify file descriptor:
+.TP
+.B FAN_EVENT_OK(meta, len)
+This macro checks the remaining length
+.I len
+of the buffer
+.I meta
+against the length of the metadata structure and the
+.I event_len
+field of the first metadata structure in the buffer.
+.TP
+.B FAN_EVENT_NEXT(meta, len)
+This macro uses the length indicated in the
+.I event_len
+field of the metadata structure pointed to by
+.I meta
+to calculate the address of the next metadata structure that follows
+.IR meta .
+.I len
+is the number of bytes of metadata that currently remain in the buffer.
+The macro returns a pointer to the next metadata structure that follows
+.IR meta ,
+and reduces
+.I len
+by the number of bytes in the metadata structure that
+has been skipped over (i.e., it subtracts
+.I meta\->event_len
+from
+.IR len ).
+.PP
+In addition, there is:
+.TP
+.B FAN_EVENT_METADATA_LEN
+This macro returns the size (in bytes) of the structure
+.IR fanotify_event_metadata .
+This is the minimum size (and currently the only size) of any event metadata.
+.\"
+.SS Monitoring an fanotify file descriptor for events
+When an fanotify event occurs, the fanotify file descriptor indicates as
+readable when passed to
+.BR epoll (7),
+.BR poll (2),
+or
+.BR select (2).
+.SS Dealing with permission events
+For permission events, the application must
+.BR write (2)
+a structure of the following form to the
+fanotify file descriptor:
+.PP
+.in +4n
+.EX
+struct fanotify_response {
+ __s32 fd;
+ __u32 response;
+};
+.EE
+.in
+.PP
+The fields of this structure are as follows:
+.TP
+.I fd
+This is the file descriptor from the structure
+.IR fanotify_event_metadata .
+.TP
+.I response
+This field indicates whether or not the permission is to be granted.
+Its value must be either
+.B FAN_ALLOW
+to allow the file operation or
+.B FAN_DENY
+to deny the file operation.
+.PP
+If access is denied, the requesting application call will receive an
+.B EPERM
+error.
+Additionally, if the notification group has been created with the
+.B FAN_ENABLE_AUDIT
+flag, then the
+.B FAN_AUDIT
+flag can be set in the
+.I response
+field.
+In that case, the audit subsystem will log information about the access
+decision to the audit logs.
+.\"
+.SS Monitoring filesystems for errors
+A single
+.B FAN_FS_ERROR
+event is stored per filesystem at once.
+Extra error messages are suppressed and accounted for in the
+.I error_count
+field of the existing
+.B FAN_FS_ERROR
+event record,
+but details about the errors are lost.
+.PP
+Errors reported by
+.B FAN_FS_ERROR
+are generic
+.I errno
+values,
+but not all kinds of error types are reported by all filesystems.
+.PP
+Errors not directly related to a file (i.e. super block corruption)
+are reported with an invalid
+.IR file_handle .
+For these errors, the
+.I file_handle
+will have the field
+.I handle_type
+set to
+.BR FILEID_INVALID ,
+and the handle buffer size set to
+.BR 0 .
+.\"
+.SS Closing the fanotify file descriptor
+When all file descriptors referring to the fanotify notification group are
+closed, the fanotify group is released and its resources
+are freed for reuse by the kernel.
+Upon
+.BR close (2),
+outstanding permission events will be set to allowed.
+.SS /proc interfaces
+The file
+.IR /proc/ pid /fdinfo/ fd
+contains information about fanotify marks for file descriptor
+.I fd
+of process
+.IR pid .
+See
+.BR proc (5)
+for details.
+.PP
+Since Linux 5.13,
+.\" commit 5b8fea65d197f408bb00b251c70d842826d6b70b
+the following interfaces can be used to control the amount of
+kernel resources consumed by fanotify:
+.TP
+.I /proc/sys/fs/fanotify/max_queued_events
+The value in this file is used when an application calls
+.BR fanotify_init (2)
+to set an upper limit on the number of events that can be
+queued to the corresponding fanotify group.
+Events in excess of this limit are dropped, but an
+.B FAN_Q_OVERFLOW
+event is always generated.
+Prior to Linux kernel 5.13,
+.\" commit 5b8fea65d197f408bb00b251c70d842826d6b70b
+the hardcoded limit was 16384 events.
+.TP
+.I /proc/sys/fs/fanotify/max_user_group
+This specifies an upper limit on the number of fanotify groups
+that can be created per real user ID.
+Prior to Linux kernel 5.13,
+.\" commit 5b8fea65d197f408bb00b251c70d842826d6b70b
+the hardcoded limit was 128 groups per user.
+.TP
+.I /proc/sys/fs/fanotify/max_user_marks
+This specifies an upper limit on the number of fanotify marks
+that can be created per real user ID.
+Prior to Linux kernel 5.13,
+.\" commit 5b8fea65d197f408bb00b251c70d842826d6b70b
+the hardcoded limit was 8192 marks per group (not per user).
+.SH ERRORS
+In addition to the usual errors for
+.BR read (2),
+the following errors can occur when reading from the
+fanotify file descriptor:
+.TP
+.B EINVAL
+The buffer is too small to hold the event.
+.TP
+.B EMFILE
+The per-process limit on the number of open files has been reached.
+See the description of
+.B RLIMIT_NOFILE
+in
+.BR getrlimit (2).
+.TP
+.B ENFILE
+The system-wide limit on the total number of open files has been reached.
+See
+.I /proc/sys/fs/file\-max
+in
+.BR proc (5).
+.TP
+.B ETXTBSY
+This error is returned by
+.BR read (2)
+if
+.B O_RDWR
+or
+.B O_WRONLY
+was specified in the
+.I event_f_flags
+argument when calling
+.BR fanotify_init (2)
+and an event occurred for a monitored file that is currently being executed.
+.PP
+In addition to the usual errors for
+.BR write (2),
+the following errors can occur when writing to the fanotify file descriptor:
+.TP
+.B EINVAL
+Fanotify access permissions are not enabled in the kernel configuration
+or the value of
+.I response
+in the response structure is not valid.
+.TP
+.B ENOENT
+The file descriptor
+.I fd
+in the response structure is not valid.
+This may occur when a response for the permission event has already been
+written.
+.SH STANDARDS
+Linux.
+.SH HISTORY
+The fanotify API was introduced in Linux 2.6.36 and
+enabled in Linux 2.6.37.
+fdinfo support was added in Linux 3.8.
+.SH NOTES
+The fanotify API is available only if the kernel was built with the
+.B CONFIG_FANOTIFY
+configuration option enabled.
+In addition, fanotify permission handling is available only if the
+.B CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+configuration option is enabled.
+.SS Limitations and caveats
+Fanotify reports only events that a user-space program triggers through the
+filesystem API.
+As a result,
+it does not catch remote events that occur on network filesystems.
+.PP
+The fanotify API does not report file accesses and modifications that
+may occur because of
+.BR mmap (2),
+.BR msync (2),
+and
+.BR munmap (2).
+.PP
+Events for directories are created only if the directory itself is opened,
+read, and closed.
+Adding, removing, or changing children of a marked directory does not create
+events for the monitored directory itself.
+.PP
+Fanotify monitoring of directories is not recursive:
+to monitor subdirectories under a directory,
+additional marks must be created.
+The
+.B FAN_CREATE
+event can be used for detecting when a subdirectory has been created under
+a marked directory.
+An additional mark must then be set on the newly created subdirectory.
+This approach is racy, because it can lose events that occurred inside the
+newly created subdirectory, before a mark is added on that subdirectory.
+Monitoring mounts offers the capability to monitor a whole directory tree
+in a race-free manner.
+Monitoring filesystems offers the capability to monitor changes made from
+any mount of a filesystem instance in a race-free manner.
+.PP
+The event queue can overflow.
+In this case, events are lost.
+.SH BUGS
+Before Linux 3.19,
+.BR fallocate (2)
+did not generate fanotify events.
+Since Linux 3.19,
+.\" commit 820c12d5d6c0890bc93dd63893924a13041fdc35
+calls to
+.BR fallocate (2)
+generate
+.B FAN_MODIFY
+events.
+.PP
+As of Linux 3.17,
+the following bugs exist:
+.IP \[bu] 3
+On Linux, a filesystem object may be accessible through multiple paths,
+for example, a part of a filesystem may be remounted using the
+.I \-\-bind
+option of
+.BR mount (8).
+A listener that marked a mount will be notified only of events that were
+triggered for a filesystem object using the same mount.
+Any other event will pass unnoticed.
+.IP \[bu]
+.\" FIXME . A patch was proposed.
+When an event is generated,
+no check is made to see whether the user ID of the
+receiving process has authorization to read or write the file
+before passing a file descriptor for that file.
+This poses a security risk, when the
+.B CAP_SYS_ADMIN
+capability is set for programs executed by unprivileged users.
+.IP \[bu]
+If a call to
+.BR read (2)
+processes multiple events from the fanotify queue and an error occurs,
+the return value will be the total length of the events successfully
+copied to the user-space buffer before the error occurred.
+The return value will not be \-1, and
+.I errno
+will not be set.
+Thus, the reading application has no way to detect the error.
+.SH EXAMPLES
+The two example programs below demonstrate the usage of the fanotify API.
+.SS Example program: fanotify_example.c
+The first program is an example of fanotify being
+used with its event object information passed in the form of a file
+descriptor.
+The program marks the mount passed as a command-line argument and
+waits for events of type
+.B FAN_OPEN_PERM
+and
+.BR FAN_CLOSE_WRITE .
+When a permission event occurs, a
+.B FAN_ALLOW
+response is given.
+.PP
+The following shell session shows an example of
+running this program.
+This session involved editing the file
+.IR /home/user/temp/notes .
+Before the file was opened, a
+.B FAN_OPEN_PERM
+event occurred.
+After the file was closed, a
+.B FAN_CLOSE_WRITE
+event occurred.
+Execution of the program ends when the user presses the ENTER key.
+.PP
+.in +4n
+.EX
+# \fB./fanotify_example /home\fP
+Press enter key to terminate.
+Listening for events.
+FAN_OPEN_PERM: File /home/user/temp/notes
+FAN_CLOSE_WRITE: File /home/user/temp/notes
+\&
+Listening for events stopped.
+.EE
+.in
+.SS Program source: fanotify_example.c
+\&
+.EX
+#define _GNU_SOURCE /* Needed to get O_LARGEFILE definition */
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/fanotify.h>
+#include <unistd.h>
+\&
+/* Read all available fanotify events from the file descriptor \[aq]fd\[aq]. */
+\&
+static void
+handle_events(int fd)
+{
+ const struct fanotify_event_metadata *metadata;
+ struct fanotify_event_metadata buf[200];
+ ssize_t len;
+ char path[PATH_MAX];
+ ssize_t path_len;
+ char procfd_path[PATH_MAX];
+ struct fanotify_response response;
+\&
+ /* Loop while events can be read from fanotify file descriptor. */
+\&
+ for (;;) {
+\&
+ /* Read some events. */
+\&
+ len = read(fd, buf, sizeof(buf));
+ if (len == \-1 && errno != EAGAIN) {
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Check if end of available data reached. */
+\&
+ if (len <= 0)
+ break;
+\&
+ /* Point to the first event in the buffer. */
+\&
+ metadata = buf;
+\&
+ /* Loop over all events in the buffer. */
+\&
+ while (FAN_EVENT_OK(metadata, len)) {
+\&
+ /* Check that run\-time and compile\-time structures match. */
+\&
+ if (metadata\->vers != FANOTIFY_METADATA_VERSION) {
+ fprintf(stderr,
+ "Mismatch of fanotify metadata version.\en");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* metadata\->fd contains either FAN_NOFD, indicating a
+ queue overflow, or a file descriptor (a nonnegative
+ integer). Here, we simply ignore queue overflow. */
+\&
+ if (metadata\->fd >= 0) {
+\&
+ /* Handle open permission event. */
+\&
+ if (metadata\->mask & FAN_OPEN_PERM) {
+ printf("FAN_OPEN_PERM: ");
+\&
+ /* Allow file to be opened. */
+\&
+ response.fd = metadata\->fd;
+ response.response = FAN_ALLOW;
+ write(fd, &response, sizeof(response));
+ }
+\&
+ /* Handle closing of writable file event. */
+\&
+ if (metadata\->mask & FAN_CLOSE_WRITE)
+ printf("FAN_CLOSE_WRITE: ");
+\&
+ /* Retrieve and print pathname of the accessed file. */
+\&
+ snprintf(procfd_path, sizeof(procfd_path),
+ "/proc/self/fd/%d", metadata\->fd);
+ path_len = readlink(procfd_path, path,
+ sizeof(path) \- 1);
+ if (path_len == \-1) {
+ perror("readlink");
+ exit(EXIT_FAILURE);
+ }
+\&
+ path[path_len] = \[aq]\e0\[aq];
+ printf("File %s\en", path);
+\&
+ /* Close the file descriptor of the event. */
+\&
+ close(metadata\->fd);
+ }
+\&
+ /* Advance to next event. */
+\&
+ metadata = FAN_EVENT_NEXT(metadata, len);
+ }
+ }
+}
+\&
+int
+main(int argc, char *argv[])
+{
+ char buf;
+ int fd, poll_num;
+ nfds_t nfds;
+ struct pollfd fds[2];
+\&
+ /* Check mount point is supplied. */
+\&
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s MOUNT\en", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+\&
+ printf("Press enter key to terminate.\en");
+\&
+ /* Create the file descriptor for accessing the fanotify API. */
+\&
+ fd = fanotify_init(FAN_CLOEXEC | FAN_CLASS_CONTENT | FAN_NONBLOCK,
+ O_RDONLY | O_LARGEFILE);
+ if (fd == \-1) {
+ perror("fanotify_init");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Mark the mount for:
+ \- permission events before opening files
+ \- notification events after closing a write\-enabled
+ file descriptor. */
+\&
+ if (fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_MOUNT,
+ FAN_OPEN_PERM | FAN_CLOSE_WRITE, AT_FDCWD,
+ argv[1]) == \-1) {
+ perror("fanotify_mark");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Prepare for polling. */
+\&
+ nfds = 2;
+\&
+ fds[0].fd = STDIN_FILENO; /* Console input */
+ fds[0].events = POLLIN;
+\&
+ fds[1].fd = fd; /* Fanotify input */
+ fds[1].events = POLLIN;
+\&
+ /* This is the loop to wait for incoming events. */
+\&
+ printf("Listening for events.\en");
+\&
+ while (1) {
+ poll_num = poll(fds, nfds, \-1);
+ if (poll_num == \-1) {
+ if (errno == EINTR) /* Interrupted by a signal */
+ continue; /* Restart poll() */
+\&
+ perror("poll"); /* Unexpected error */
+ exit(EXIT_FAILURE);
+ }
+\&
+ if (poll_num > 0) {
+ if (fds[0].revents & POLLIN) {
+\&
+ /* Console input is available: empty stdin and quit. */
+\&
+ while (read(STDIN_FILENO, &buf, 1) > 0 && buf != \[aq]\en\[aq])
+ continue;
+ break;
+ }
+\&
+ if (fds[1].revents & POLLIN) {
+\&
+ /* Fanotify events are available. */
+\&
+ handle_events(fd);
+ }
+ }
+ }
+\&
+ printf("Listening for events stopped.\en");
+ exit(EXIT_SUCCESS);
+}
+.EE
+.\"
+.SS Example program: fanotify_fid.c
+The second program is an example of fanotify being used with a group that
+identifies objects by file handles.
+The program marks the filesystem object that is passed as
+a command-line argument
+and waits until an event of type
+.B FAN_CREATE
+has occurred.
+The event mask indicates which type of filesystem object\[em]either
+a file or a directory\[em]was created.
+Once all events have been read from the buffer and processed accordingly,
+the program simply terminates.
+.PP
+The following shell sessions show two different invocations of
+this program, with different actions performed on a watched object.
+.PP
+The first session shows a mark being placed on
+.IR /home/user .
+This is followed by the creation of a regular file,
+.IR /home/user/testfile.txt .
+This results in a
+.B FAN_CREATE
+event being generated and reported against the file's parent watched
+directory object and with the created file name.
+Program execution ends once all events captured within the buffer have
+been processed.
+.PP
+.in +4n
+.EX
+# \fB./fanotify_fid /home/user\fP
+Listening for events.
+FAN_CREATE (file created):
+ Directory /home/user has been modified.
+ Entry \[aq]testfile.txt\[aq] is not a subdirectory.
+All events processed successfully. Program exiting.
+\&
+$ \fBtouch /home/user/testfile.txt\fP # In another terminal
+.EE
+.in
+.PP
+The second session shows a mark being placed on
+.IR /home/user .
+This is followed by the creation of a directory,
+.IR /home/user/testdir .
+This specific action results in a
+.B FAN_CREATE
+event being generated and is reported with the
+.B FAN_ONDIR
+flag set and with the created directory name.
+.PP
+.in +4n
+.EX
+# \fB./fanotify_fid /home/user\fP
+Listening for events.
+FAN_CREATE | FAN_ONDIR (subdirectory created):
+ Directory /home/user has been modified.
+ Entry \[aq]testdir\[aq] is a subdirectory.
+All events processed successfully. Program exiting.
+\&
+$ \fBmkdir \-p /home/user/testdir\fP # In another terminal
+.EE
+.in
+.SS Program source: fanotify_fid.c
+\&
+.EX
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/fanotify.h>
+#include <unistd.h>
+\&
+#define BUF_SIZE 256
+\&
+int
+main(int argc, char *argv[])
+{
+ int fd, ret, event_fd, mount_fd;
+ ssize_t len, path_len;
+ char path[PATH_MAX];
+ char procfd_path[PATH_MAX];
+ char events_buf[BUF_SIZE];
+ struct file_handle *file_handle;
+ struct fanotify_event_metadata *metadata;
+ struct fanotify_event_info_fid *fid;
+ const char *file_name;
+ struct stat sb;
+\&
+ if (argc != 2) {
+ fprintf(stderr, "Invalid number of command line arguments.\en");
+ exit(EXIT_FAILURE);
+ }
+\&
+ mount_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+ if (mount_fd == \-1) {
+ perror(argv[1]);
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Create an fanotify file descriptor with FAN_REPORT_DFID_NAME as
+ a flag so that program can receive fid events with directory
+ entry name. */
+\&
+ fd = fanotify_init(FAN_CLASS_NOTIF | FAN_REPORT_DFID_NAME, 0);
+ if (fd == \-1) {
+ perror("fanotify_init");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Place a mark on the filesystem object supplied in argv[1]. */
+\&
+ ret = fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_ONLYDIR,
+ FAN_CREATE | FAN_ONDIR,
+ AT_FDCWD, argv[1]);
+ if (ret == \-1) {
+ perror("fanotify_mark");
+ exit(EXIT_FAILURE);
+ }
+\&
+ printf("Listening for events.\en");
+\&
+ /* Read events from the event queue into a buffer. */
+\&
+ len = read(fd, events_buf, sizeof(events_buf));
+ if (len == \-1 && errno != EAGAIN) {
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Process all events within the buffer. */
+\&
+ for (metadata = (struct fanotify_event_metadata *) events_buf;
+ FAN_EVENT_OK(metadata, len);
+ metadata = FAN_EVENT_NEXT(metadata, len)) {
+ fid = (struct fanotify_event_info_fid *) (metadata + 1);
+ file_handle = (struct file_handle *) fid\->handle;
+\&
+ /* Ensure that the event info is of the correct type. */
+\&
+ if (fid\->hdr.info_type == FAN_EVENT_INFO_TYPE_FID ||
+ fid\->hdr.info_type == FAN_EVENT_INFO_TYPE_DFID) {
+ file_name = NULL;
+ } else if (fid\->hdr.info_type == FAN_EVENT_INFO_TYPE_DFID_NAME) {
+ file_name = file_handle\->f_handle +
+ file_handle\->handle_bytes;
+ } else {
+ fprintf(stderr, "Received unexpected event info type.\en");
+ exit(EXIT_FAILURE);
+ }
+\&
+ if (metadata\->mask == FAN_CREATE)
+ printf("FAN_CREATE (file created):\en");
+\&
+ if (metadata\->mask == (FAN_CREATE | FAN_ONDIR))
+ printf("FAN_CREATE | FAN_ONDIR (subdirectory created):\en");
+\&
+ /* metadata\->fd is set to FAN_NOFD when the group identifies
+ objects by file handles. To obtain a file descriptor for
+ the file object corresponding to an event you can use the
+ struct file_handle that\[aq]s provided within the
+ fanotify_event_info_fid in conjunction with the
+ open_by_handle_at(2) system call. A check for ESTALE is
+ done to accommodate for the situation where the file handle
+ for the object was deleted prior to this system call. */
+\&
+ event_fd = open_by_handle_at(mount_fd, file_handle, O_RDONLY);
+ if (event_fd == \-1) {
+ if (errno == ESTALE) {
+ printf("File handle is no longer valid. "
+ "File has been deleted\en");
+ continue;
+ } else {
+ perror("open_by_handle_at");
+ exit(EXIT_FAILURE);
+ }
+ }
+\&
+ snprintf(procfd_path, sizeof(procfd_path), "/proc/self/fd/%d",
+ event_fd);
+\&
+ /* Retrieve and print the path of the modified dentry. */
+\&
+ path_len = readlink(procfd_path, path, sizeof(path) \- 1);
+ if (path_len == \-1) {
+ perror("readlink");
+ exit(EXIT_FAILURE);
+ }
+\&
+ path[path_len] = \[aq]\e0\[aq];
+ printf("\etDirectory \[aq]%s\[aq] has been modified.\en", path);
+\&
+ if (file_name) {
+ ret = fstatat(event_fd, file_name, &sb, 0);
+ if (ret == \-1) {
+ if (errno != ENOENT) {
+ perror("fstatat");
+ exit(EXIT_FAILURE);
+ }
+ printf("\etEntry \[aq]%s\[aq] does not exist.\en", file_name);
+ } else if ((sb.st_mode & S_IFMT) == S_IFDIR) {
+ printf("\etEntry \[aq]%s\[aq] is a subdirectory.\en", file_name);
+ } else {
+ printf("\etEntry \[aq]%s\[aq] is not a subdirectory.\en",
+ file_name);
+ }
+ }
+\&
+ /* Close associated file descriptor for this event. */
+\&
+ close(event_fd);
+ }
+\&
+ printf("All events processed successfully. Program exiting.\en");
+ exit(EXIT_SUCCESS);
+}
+.EE
+.SH SEE ALSO
+.ad l
+.BR fanotify_init (2),
+.BR fanotify_mark (2),
+.BR inotify (7)
diff --git a/man7/feature_test_macros.7 b/man7/feature_test_macros.7
new file mode 100644
index 0000000..4e264d8
--- /dev/null
+++ b/man7/feature_test_macros.7
@@ -0,0 +1,937 @@
+.\" This manpage is Copyright (C) 2006, Michael Kerrisk
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH feature_test_macros 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+feature_test_macros \- feature test macros
+.SH DESCRIPTION
+Feature test macros allow the programmer to control the definitions that
+are exposed by system header files when a program is compiled.
+.PP
+.B NOTE:
+In order to be effective, a feature test macro
+.IR "must be defined before including any header files" .
+This can be done either in the compilation command
+.RI ( "cc \-DMACRO=value" )
+or by defining the macro within the source code before
+including any headers.
+The requirement that the macro must be defined before including any
+header file exists because header files may freely include one another.
+Thus, for example, in the following lines, defining the
+.B _GNU_SOURCE
+macro may have no effect because the header
+.I <abc.h>
+itself includes
+.I <xyz.h>
+(POSIX explicitly allows this):
+.PP
+.in +4n
+.EX
+#include <abc.h>
+#define _GNU_SOURCE
+#include <xyz.h>
+.EE
+.in
+.PP
+Some feature test macros are useful for creating portable applications,
+by preventing nonstandard definitions from being exposed.
+Other macros can be used to expose nonstandard definitions that
+are not exposed by default.
+.PP
+The precise effects of each of the feature test macros described below
+can be ascertained by inspecting the
+.I <features.h>
+header file.
+.BR Note :
+applications do
+.I not
+need to directly include
+.IR <features.h> ;
+indeed, doing so is actively discouraged.
+See NOTES.
+.SS Specification of feature test macro requirements in manual pages
+When a function requires that a feature test macro is defined,
+the manual page SYNOPSIS typically includes a note of the following form
+(this example from the
+.BR acct (2)
+manual page):
+.PP
+.RS
+.B #include <unistd.h>
+.PP
+.BI "int acct(const char *" filename );
+.PP
+.RS -4
+.EX
+Feature Test Macro Requirements for glibc (see
+.BR feature_test_macros (7)):
+.EE
+.RE
+.PP
+.BR acct ():
+_BSD_SOURCE || (_XOPEN_SOURCE && _XOPEN_SOURCE < 500)
+.RE
+.PP
+The
+.B ||
+means that in order to obtain the declaration of
+.BR acct (2)
+from
+.IR <unistd.h> ,
+.I either
+of the following macro
+definitions must be made before including any header files:
+.PP
+.in +4n
+.EX
+#define _BSD_SOURCE
+#define _XOPEN_SOURCE /* or any value < 500 */
+.EE
+.in
+.PP
+Alternatively, equivalent definitions can be included in the
+compilation command:
+.PP
+.in +4n
+.EX
+cc \-D_BSD_SOURCE
+cc \-D_XOPEN_SOURCE # Or any value < 500
+.EE
+.in
+.PP
+Note that, as described below,
+.BR "some feature test macros are defined by default" ,
+so that it may not always be necessary to
+explicitly specify the feature test macro(s) shown in the
+SYNOPSIS.
+.PP
+In a few cases, manual pages use a shorthand for expressing the
+feature test macro requirements (this example from
+.BR readahead (2)):
+.PP
+.RS +4
+.EX
+.B #define _GNU_SOURCE
+.B #define _FILE_OFFSET_BITS 64
+.B #include <fcntl.h>
+.PP
+.BI "ssize_t readahead(int " fd ", off_t *" offset ", size_t " count );
+.EE
+.RE
+.PP
+This format is employed when the feature test macros ensure
+that the proper function declarations are visible,
+and the macros are not defined by default.
+.SS Feature test macros understood by glibc
+The paragraphs below explain how feature test macros are handled
+in glibc 2.\fIx\fP,
+.I x
+> 0.
+.PP
+First, though, a summary of a few details for the impatient:
+.IP \[bu] 3
+The macros that you most likely need to use in modern source code are
+.B _POSIX_C_SOURCE
+(for definitions from various versions of POSIX.1),
+.B _XOPEN_SOURCE
+(for definitions from various versions of SUS),
+.B _GNU_SOURCE
+(for GNU and/or Linux specific stuff), and
+.B _DEFAULT_SOURCE
+(to get definitions that would normally be provided by default).
+.IP \[bu]
+Certain macros are defined with default values.
+Thus, although one or more macros may be indicated as being
+required in the SYNOPSIS of a man page,
+it may not be necessary to define them explicitly.
+Full details of the defaults are given later in this man page.
+.IP \[bu]
+Defining
+.B _XOPEN_SOURCE
+with a value of 600 or greater produces the same effects as defining
+.B _POSIX_C_SOURCE
+with a value of 200112L or greater.
+Where one sees
+.IP
+.in +4n
+.EX
+_POSIX_C_SOURCE >= 200112L
+.EE
+.in
+.IP
+in the feature test macro requirements in the SYNOPSIS of a man page,
+it is implicit that the following has the same effect:
+.IP
+.in +4n
+.EX
+_XOPEN_SOURCE >= 600
+.EE
+.in
+.IP \[bu]
+Defining
+.B _XOPEN_SOURCE
+with a value of 700 or greater produces the same effects as defining
+.B _POSIX_C_SOURCE
+with a value of 200809L or greater.
+Where one sees
+.IP
+.in +4n
+.EX
+_POSIX_C_SOURCE >= 200809L
+.EE
+.in
+.IP
+in the feature test macro requirements in the SYNOPSIS of a man page,
+it is implicit that the following has the same effect:
+.IP
+.in +4n
+.EX
+_XOPEN_SOURCE >= 700
+.EE
+.in
+.\" The details in glibc 2.0 are simpler, but combining a
+.\" a description of them with the details in later glibc versions
+.\" would make for a complicated description.
+.PP
+glibc understands the following feature test macros:
+.TP
+.B __STRICT_ANSI__
+ISO Standard C.
+This macro is implicitly defined by
+.BR gcc (1)
+when invoked with, for example, the
+.I \-std=c99
+or
+.I \-ansi
+flag.
+.TP
+.B _POSIX_C_SOURCE
+Defining this macro causes header files to expose definitions as follows:
+.RS
+.IP \[bu] 3
+The value 1 exposes definitions conforming to POSIX.1-1990 and
+ISO C (1990).
+.IP \[bu]
+The value 2 or greater additionally exposes
+definitions for POSIX.2-1992.
+.IP \[bu]
+The value 199309L or greater additionally exposes
+definitions for POSIX.1b (real-time extensions).
+.\" 199506L functionality is available only since glibc 2.1
+.IP \[bu]
+The value 199506L or greater additionally exposes
+definitions for POSIX.1c (threads).
+.IP \[bu]
+(Since glibc 2.3.3)
+The value 200112L or greater additionally exposes definitions corresponding
+to the POSIX.1-2001 base specification (excluding the XSI extension).
+This value also causes C95 (since glibc 2.12) and
+C99 (since glibc 2.10) features to be exposed
+(in other words, the equivalent of defining
+.BR _ISOC99_SOURCE ).
+.IP \[bu]
+(Since glibc 2.10)
+The value 200809L or greater additionally exposes definitions corresponding
+to the POSIX.1-2008 base specification (excluding the XSI extension).
+.RE
+.TP
+.B _POSIX_SOURCE
+Defining this obsolete macro with any value is equivalent to defining
+.B _POSIX_C_SOURCE
+with the value 1.
+.IP
+Since this macro is obsolete,
+its usage is generally not documented when discussing
+feature test macro requirements in the man pages.
+.TP
+.B _XOPEN_SOURCE
+Defining this macro causes header files to expose definitions as follows:
+.RS
+.IP \[bu] 3
+Defining with any value exposes
+definitions conforming to POSIX.1, POSIX.2, and XPG4.
+.IP \[bu]
+The value 500 or greater additionally exposes
+definitions for SUSv2 (UNIX 98).
+.IP \[bu]
+(Since glibc 2.2) The value 600 or greater additionally exposes
+definitions for SUSv3 (UNIX 03; i.e., the POSIX.1-2001 base specification
+plus the XSI extension) and C99 definitions.
+.IP \[bu]
+(Since glibc 2.10) The value 700 or greater additionally exposes
+definitions for SUSv4 (i.e., the POSIX.1-2008 base specification
+plus the XSI extension).
+.RE
+.IP
+If
+.B __STRICT_ANSI__
+is not defined, or
+.B _XOPEN_SOURCE
+is defined with a value greater than or equal to 500
+.I and
+neither
+.B _POSIX_SOURCE
+nor
+.B _POSIX_C_SOURCE
+is explicitly defined, then
+the following macros are implicitly defined:
+.RS
+.IP \[bu] 3
+.B _POSIX_SOURCE
+is defined with the value 1.
+.IP \[bu]
+.B _POSIX_C_SOURCE
+is defined, according to the value of
+.BR _XOPEN_SOURCE :
+.RS
+.TP
+.BR _XOPEN_SOURCE " < 500"
+.B _POSIX_C_SOURCE
+is defined with the value 2.
+.TP
+.RB "500 <= " _XOPEN_SOURCE " < 600"
+.B _POSIX_C_SOURCE
+is defined with the value 199506L.
+.TP
+.RB "600 <= " _XOPEN_SOURCE " < 700"
+.B _POSIX_C_SOURCE
+is defined with the value 200112L.
+.TP
+.RB "700 <= " _XOPEN_SOURCE " (since glibc 2.10)"
+.B _POSIX_C_SOURCE
+is defined with the value 200809L.
+.RE
+.RE
+.IP
+In addition, defining
+.B _XOPEN_SOURCE
+with a value of 500 or greater produces the same effects as defining
+.BR _XOPEN_SOURCE_EXTENDED .
+.TP
+.B _XOPEN_SOURCE_EXTENDED
+If this macro is defined,
+.I and
+.B _XOPEN_SOURCE
+is defined, then expose definitions corresponding to the XPG4v2
+(SUSv1) UNIX extensions (UNIX 95).
+Defining
+.B _XOPEN_SOURCE
+with a value of 500 or more also produces the same effect as defining
+.BR _XOPEN_SOURCE_EXTENDED .
+Use of
+.B _XOPEN_SOURCE_EXTENDED
+in new source code should be avoided.
+.IP
+Since defining
+.B _XOPEN_SOURCE
+with a value of 500 or more has the same effect as defining
+.BR _XOPEN_SOURCE_EXTENDED ,
+the latter (obsolete) feature test macro is generally not described in the
+SYNOPSIS in man pages.
+.TP
+.BR _ISOC99_SOURCE " (since glibc 2.1.3)"
+Exposes declarations consistent with the ISO C99 standard.
+.IP
+Earlier glibc 2.1.x versions recognized an equivalent macro named
+.B _ISOC9X_SOURCE
+(because the C99 standard had not then been finalized).
+Although the use of this macro is obsolete, glibc continues
+to recognize it for backward compatibility.
+.IP
+Defining
+.B _ISOC99_SOURCE
+also exposes ISO C (1990) Amendment 1 ("C95") definitions.
+(The primary change in C95 was support for international character sets.)
+.IP
+Invoking the C compiler with the option
+.I \-std=c99
+produces the same effects as defining this macro.
+.TP
+.BR _ISOC11_SOURCE " (since glibc 2.16)"
+Exposes declarations consistent with the ISO C11 standard.
+Defining this macro also enables C99 and C95 features (like
+.BR _ISOC99_SOURCE ).
+.IP
+Invoking the C compiler with the option
+.I \-std=c11
+produces the same effects as defining this macro.
+.TP
+.B _LARGEFILE64_SOURCE
+Expose definitions for the alternative API specified by the
+LFS (Large File Summit) as a "transitional extension" to the
+Single UNIX Specification.
+(See
+.UR http:\:/\:/opengroup.org\:/platform\:/lfs.html
+.UE .)
+The alternative API consists of a set of new objects
+(i.e., functions and types) whose names are suffixed with "64"
+(e.g.,
+.I off64_t
+versus
+.IR off_t ,
+.BR lseek64 ()
+versus
+.BR lseek (),
+etc.).
+New programs should not employ this macro; instead
+.I _FILE_OFFSET_BITS=64
+should be employed.
+.TP
+.B _LARGEFILE_SOURCE
+This macro was historically used to expose certain functions (specifically
+.BR fseeko (3)
+and
+.BR ftello (3))
+that address limitations of earlier APIs
+.RB ( fseek (3)
+and
+.BR ftell (3))
+that use
+.I long
+for file offsets.
+This macro is implicitly defined if
+.B _XOPEN_SOURCE
+is defined with a value greater than or equal to 500.
+New programs should not employ this macro;
+defining
+.B _XOPEN_SOURCE
+as just described or defining
+.B _FILE_OFFSET_BITS
+with the value 64 is the preferred mechanism to achieve the same result.
+.TP
+.B _FILE_OFFSET_BITS
+Defining this macro with the value 64
+automatically converts references to 32-bit functions and data types
+related to file I/O and filesystem operations into references to
+their 64-bit counterparts.
+This is useful for performing I/O on large files (> 2 Gigabytes)
+on 32-bit systems.
+It is also useful when calling functions like
+.BR copy_file_range (2)
+that were added more recently and that come only in 64-bit flavors.
+(Defining this macro permits correctly written programs to use
+large files with only a recompilation being required.)
+.IP
+64-bit systems naturally permit file sizes greater than 2 Gigabytes,
+and on those systems this macro has no effect.
+.TP
+.B _TIME_BITS
+Defining this macro with the value 64
+changes the width of
+.BR time_t (3type)
+to 64-bit which allows handling of timestamps beyond
+2038.
+It is closely related to
+.B _FILE_OFFSET_BITS
+and depending on implementation, may require it set.
+This macro is available as of glibc 2.34.
+.TP
+.BR _BSD_SOURCE " (deprecated since glibc 2.20)"
+Defining this macro with any value causes header files to expose
+BSD-derived definitions.
+.IP
+In glibc versions up to and including 2.18,
+defining this macro also causes BSD definitions to be preferred in
+some situations where standards conflict, unless one or more of
+.BR _SVID_SOURCE ,
+.BR _POSIX_SOURCE ,
+.BR _POSIX_C_SOURCE ,
+.BR _XOPEN_SOURCE ,
+.BR _XOPEN_SOURCE_EXTENDED ,
+or
+.B _GNU_SOURCE
+is defined, in which case BSD definitions are disfavored.
+Since glibc 2.19,
+.B _BSD_SOURCE
+no longer causes BSD definitions to be preferred in case of conflicts.
+.IP
+Since glibc 2.20, this macro is deprecated.
+.\" commit c941736c92fa3a319221f65f6755659b2a5e0a20
+.\" commit 498afc54dfee41d33ba519f496e96480badace8e
+.\" commit acd7f096d79c181866d56d4aaf3b043e741f1e2c
+It now has the same effect as defining
+.BR _DEFAULT_SOURCE ,
+but generates a compile-time warning (unless
+.B _DEFAULT_SOURCE
+.\" commit ade40b10ff5fa59a318cf55b9d8414b758e8df78
+is also defined).
+Use
+.B _DEFAULT_SOURCE
+instead.
+To allow code that requires
+.B _BSD_SOURCE
+in glibc 2.19 and earlier and
+.B _DEFAULT_SOURCE
+in glibc 2.20 and later to compile without warnings, define
+.I both
+.B _BSD_SOURCE
+and
+.BR _DEFAULT_SOURCE .
+.TP
+.BR _SVID_SOURCE " (deprecated since glibc 2.20)"
+Defining this macro with any value causes header files to expose
+System V-derived definitions.
+(SVID == System V Interface Definition; see
+.BR standards (7).)
+.IP
+Since glibc 2.20, this macro is deprecated in the same fashion as
+.BR _BSD_SOURCE .
+.TP
+.BR _DEFAULT_SOURCE " (since glibc 2.19)"
+This macro can be defined to ensure that the "default"
+definitions are provided even when the defaults would otherwise
+be disabled,
+as happens when individual macros are explicitly defined,
+or the compiler is invoked in one of its "standard" modes (e.g.,
+.IR cc\~\-std=c99 ).
+Defining
+.B _DEFAULT_SOURCE
+without defining other individual macros
+or invoking the compiler in one of its "standard" modes has no effect.
+.IP
+The "default" definitions comprise those required by POSIX.1-2008 and ISO C99,
+as well as various definitions originally derived from BSD and System V.
+On glibc 2.19 and earlier, these defaults were approximately equivalent
+to explicitly defining the following:
+.IP
+.in +4n
+.EX
+cc \-D_BSD_SOURCE \-D_SVID_SOURCE \-D_POSIX_C_SOURCE=200809
+.EE
+.in
+.TP
+.BR _ATFILE_SOURCE " (since glibc 2.4)"
+Defining this macro with any value causes header files to expose
+declarations of a range of functions with the suffix "at";
+see
+.BR openat (2).
+Since glibc 2.10, this macro is also implicitly defined if
+.B _POSIX_C_SOURCE
+is defined with a value greater than or equal to 200809L.
+.TP
+.B _GNU_SOURCE
+Defining this macro (with any value) implicitly defines
+.BR _ATFILE_SOURCE ,
+.BR _LARGEFILE64_SOURCE ,
+.BR _ISOC99_SOURCE ,
+.BR _XOPEN_SOURCE_EXTENDED ,
+.BR _POSIX_SOURCE ,
+.B _POSIX_C_SOURCE
+with the value 200809L
+(200112L before glibc 2.10;
+199506L before glibc 2.5;
+199309L before glibc 2.1)
+and
+.B _XOPEN_SOURCE
+with the value 700
+(600 before glibc 2.10;
+500 before glibc 2.2).
+In addition, various GNU-specific extensions are also exposed.
+.IP
+Since glibc 2.19, defining
+.B _GNU_SOURCE
+also has the effect of implicitly defining
+.BR _DEFAULT_SOURCE .
+Before glibc 2.20, defining
+.B _GNU_SOURCE
+also had the effect of implicitly defining
+.B _BSD_SOURCE
+and
+.BR _SVID_SOURCE .
+.TP
+.B _REENTRANT
+Historically, on various C libraries
+it was necessary to define this macro in all
+multithreaded code.
+.\" Zack Weinberg
+.\" There did once exist C libraries where it was necessary. The ones
+.\" I remember were proprietary Unix vendor libcs from the mid-1990s
+.\" You would get completely unlocked stdio without _REENTRANT.
+(Some C libraries may still require this.)
+In glibc,
+this macro also exposed definitions of certain reentrant functions.
+.IP
+However, glibc has been thread-safe by default for many years;
+since glibc 2.3, the only effect of defining
+.B _REENTRANT
+has been to enable one or two of the same declarations that
+are also enabled by defining
+.B _POSIX_C_SOURCE
+with a value of 199606L or greater.
+.IP
+.B _REENTRANT
+is now obsolete.
+In glibc 2.25 and later, defining
+.B _REENTRANT
+is equivalent to defining
+.B _POSIX_C_SOURCE
+with the value 199606L.
+If a higher POSIX conformance level is
+selected by any other means (such as
+.B _POSIX_C_SOURCE
+itself,
+.BR _XOPEN_SOURCE ,
+.BR _DEFAULT_SOURCE ,
+or
+.BR _GNU_SOURCE ),
+then defining
+.B _REENTRANT
+has no effect.
+.IP
+This macro is automatically defined if one compiles with
+.IR cc\~\-pthread .
+.TP
+.B _THREAD_SAFE
+Synonym for the (deprecated)
+.BR _REENTRANT ,
+provided for compatibility with some other implementations.
+.TP
+.BR _FORTIFY_SOURCE " (since glibc 2.3.4)"
+.\" For more detail, see:
+.\" http://gcc.gnu.org/ml/gcc-patches/2004-09/msg02055.html
+.\" [PATCH] Object size checking to prevent (some) buffer overflows
+.\" * From: Jakub Jelinek <jakub at redhat dot com>
+.\" * To: gcc-patches at gcc dot gnu dot org
+.\" * Date: Tue, 21 Sep 2004 04:16:40 -0400
+Defining this macro causes some lightweight checks to be performed
+to detect some buffer overflow errors when employing
+various string and memory manipulation functions (for example,
+.BR memcpy (3),
+.BR memset (3),
+.BR stpcpy (3),
+.BR strcpy (3),
+.BR strncpy (3),
+.BR strcat (3),
+.BR strncat (3),
+.BR sprintf (3),
+.BR snprintf (3),
+.BR vsprintf (3),
+.BR vsnprintf (3),
+.BR gets (3),
+and wide character variants thereof).
+For some functions, argument consistency is checked;
+for example, a check is made that
+.BR open (2)
+has been supplied with a
+.I mode
+argument when the specified flags include
+.BR O_CREAT .
+Not all problems are detected, just some common cases.
+.\" Look for __USE_FORTIFY_LEVEL in the header files
+.IP
+If
+.B _FORTIFY_SOURCE
+is set to 1, with compiler optimization level 1
+.RI ( "gcc\ \-O1" )
+and above, checks that shouldn't change the behavior of
+conforming programs are performed.
+With
+.B _FORTIFY_SOURCE
+set to 2, some more checking is added, but
+some conforming programs might fail.
+.\" For example, given the following code
+.\" int d;
+.\" char buf[1000], buf[1000];
+.\" strcpy(fmt, "Hello world\n%n");
+.\" snprintf(buf, sizeof(buf), fmt, &d);
+.\"
+.\" Compiling with "gcc -D_FORTIFY_SOURCE=2 -O1" and then running will
+.\" cause the following diagnostic at run time at the snprintf() call
+.\"
+.\" *** %n in writable segment detected ***
+.\" Aborted (core dumped)
+.\"
+.IP
+Some of the checks can be performed at compile time
+(via macros logic implemented in header files),
+and result in compiler warnings;
+other checks take place at run time,
+and result in a run-time error if the check fails.
+.IP
+With
+.B _FORTIFY_SOURCE
+set to 3, additional checking is added to intercept
+some function calls used with an argument of variable size
+where the compiler can deduce an upper bound for its value.
+For example, a program where
+.BR malloc (3)'s
+size argument is variable
+can now be fortified.
+.IP
+Use of this macro requires compiler support, available since
+gcc 4.0 and clang 2.6.
+Use of
+.B _FORTIFY_SOURCE
+set to 3 requires gcc 12.0 or later, or clang 9.0 or later,
+in conjunction with glibc 2.33 or later.
+.\" glibc is not an absolute requirement (gcc has libssp; NetBSD/newlib
+.\" and Darwin each have their own implementation), but let's keep it
+.\" simple.
+.SS Default definitions, implicit definitions, and combining definitions
+If no feature test macros are explicitly defined,
+then the following feature test macros are defined by default:
+.B _BSD_SOURCE
+(in glibc 2.19 and earlier),
+.B _SVID_SOURCE
+(in glibc 2.19 and earlier),
+.B _DEFAULT_SOURCE
+(since glibc 2.19),
+.BR _POSIX_SOURCE ,
+and
+.BR _POSIX_C_SOURCE =200809L
+(200112L before glibc 2.10;
+199506L before glibc 2.4;
+199309L before glibc 2.1).
+.PP
+If any of
+.BR __STRICT_ANSI__ ,
+.BR _ISOC99_SOURCE ,
+.B _ISOC11_SOURCE
+(since glibc 2.18),
+.BR _POSIX_SOURCE ,
+.BR _POSIX_C_SOURCE ,
+.BR _XOPEN_SOURCE ,
+.B _XOPEN_SOURCE_EXTENDED
+(in glibc 2.11 and earlier),
+.B _BSD_SOURCE
+(in glibc 2.19 and earlier),
+or
+.B _SVID_SOURCE
+(in glibc 2.19 and earlier)
+is explicitly defined, then
+.BR _BSD_SOURCE ,
+.BR _SVID_SOURCE ,
+and
+.B _DEFAULT_SOURCE
+are not defined by default.
+.PP
+If
+.B _POSIX_SOURCE
+and
+.B _POSIX_C_SOURCE
+are not explicitly defined,
+and either
+.B __STRICT_ANSI__
+is not defined or
+.B _XOPEN_SOURCE
+is defined with a value of 500 or more, then
+.IP \[bu] 3
+.B _POSIX_SOURCE
+is defined with the value 1; and
+.IP \[bu]
+.B _POSIX_C_SOURCE
+is defined with one of the following values:
+.RS 3
+.IP \[bu] 3
+2,
+if
+.B _XOPEN_SOURCE
+is defined with a value less than 500;
+.IP \[bu]
+199506L,
+if
+.B _XOPEN_SOURCE
+is defined with a value greater than or equal to 500 and less than 600;
+or
+.IP \[bu]
+(since glibc 2.4) 200112L,
+if
+.B _XOPEN_SOURCE
+is defined with a value greater than or equal to 600 and less than 700.
+.IP \[bu]
+(Since glibc 2.10)
+200809L,
+if
+.B _XOPEN_SOURCE
+is defined with a value greater than or equal to 700.
+.IP \[bu]
+Older versions of glibc do not know about the values
+200112L and 200809L for
+.BR _POSIX_C_SOURCE ,
+and the setting of this macro will depend on the glibc version.
+.IP \[bu]
+If
+.B _XOPEN_SOURCE
+is undefined, then the setting of
+.B _POSIX_C_SOURCE
+depends on the glibc version:
+199506L, before glibc 2.4;
+200112L, since glibc 2.4 to glibc 2.9; and
+200809L, since glibc 2.10.
+.RE
+.PP
+Multiple macros can be defined; the results are additive.
+.SH STANDARDS
+POSIX.1 specifies
+.BR _POSIX_C_SOURCE ,
+.BR _POSIX_SOURCE ,
+and
+.BR _XOPEN_SOURCE .
+.PP
+.B _FILE_OFFSET_BITS
+is not specified by any standard,
+but is employed on some other implementations.
+.PP
+.BR _BSD_SOURCE ,
+.BR _SVID_SOURCE ,
+.BR _DEFAULT_SOURCE ,
+.BR _ATFILE_SOURCE ,
+.BR _GNU_SOURCE ,
+.BR _FORTIFY_SOURCE ,
+.BR _REENTRANT ,
+and
+.B _THREAD_SAFE
+are specific to glibc.
+.SH HISTORY
+.B _XOPEN_SOURCE_EXTENDED
+was specified by XPG4v2 (aka SUSv1), but is not present in SUSv2 and later.
+.SH NOTES
+.I <features.h>
+is a Linux/glibc-specific header file.
+Other systems have an analogous file, but typically with a different name.
+This header file is automatically included by other header files as
+required: it is not necessary to explicitly include it in order to
+employ feature test macros.
+.PP
+According to which of the above feature test macros are defined,
+.I <features.h>
+internally defines various other macros that are checked by
+other glibc header files.
+These macros have names prefixed by two underscores (e.g.,
+.BR __USE_MISC ).
+Programs should
+.I never
+define these macros directly:
+instead, the appropriate feature test macro(s) from the
+list above should be employed.
+.SH EXAMPLES
+The program below can be used to explore how the various
+feature test macros are set depending on the glibc version
+and what feature test macros are explicitly set.
+The following shell session, on a system with glibc 2.10,
+shows some examples of what we would see:
+.PP
+.in +4n
+.EX
+$ \fBcc ftm.c\fP
+$ \fB./a.out\fP
+_POSIX_SOURCE defined
+_POSIX_C_SOURCE defined: 200809L
+_BSD_SOURCE defined
+_SVID_SOURCE defined
+_ATFILE_SOURCE defined
+$ \fBcc \-D_XOPEN_SOURCE=500 ftm.c\fP
+$ \fB./a.out\fP
+_POSIX_SOURCE defined
+_POSIX_C_SOURCE defined: 199506L
+_XOPEN_SOURCE defined: 500
+$ \fBcc \-D_GNU_SOURCE ftm.c\fP
+$ \fB./a.out\fP
+_POSIX_SOURCE defined
+_POSIX_C_SOURCE defined: 200809L
+_ISOC99_SOURCE defined
+_XOPEN_SOURCE defined: 700
+_XOPEN_SOURCE_EXTENDED defined
+_LARGEFILE64_SOURCE defined
+_BSD_SOURCE defined
+_SVID_SOURCE defined
+_ATFILE_SOURCE defined
+_GNU_SOURCE defined
+.EE
+.in
+.SS Program source
+\&
+.EX
+/* ftm.c */
+\&
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+\&
+int
+main(int argc, char *argv[])
+{
+#ifdef _POSIX_SOURCE
+ printf("_POSIX_SOURCE defined\en");
+#endif
+\&
+#ifdef _POSIX_C_SOURCE
+ printf("_POSIX_C_SOURCE defined: %jdL\en",
+ (intmax_t) _POSIX_C_SOURCE);
+#endif
+\&
+#ifdef _ISOC99_SOURCE
+ printf("_ISOC99_SOURCE defined\en");
+#endif
+\&
+#ifdef _ISOC11_SOURCE
+ printf("_ISOC11_SOURCE defined\en");
+#endif
+\&
+#ifdef _XOPEN_SOURCE
+ printf("_XOPEN_SOURCE defined: %d\en", _XOPEN_SOURCE);
+#endif
+\&
+#ifdef _XOPEN_SOURCE_EXTENDED
+ printf("_XOPEN_SOURCE_EXTENDED defined\en");
+#endif
+\&
+#ifdef _LARGEFILE64_SOURCE
+ printf("_LARGEFILE64_SOURCE defined\en");
+#endif
+\&
+#ifdef _FILE_OFFSET_BITS
+ printf("_FILE_OFFSET_BITS defined: %d\en", _FILE_OFFSET_BITS);
+#endif
+\&
+#ifdef _TIME_BITS
+ printf("_TIME_BITS defined: %d\en", _TIME_BITS);
+#endif
+\&
+#ifdef _BSD_SOURCE
+ printf("_BSD_SOURCE defined\en");
+#endif
+\&
+#ifdef _SVID_SOURCE
+ printf("_SVID_SOURCE defined\en");
+#endif
+\&
+#ifdef _DEFAULT_SOURCE
+ printf("_DEFAULT_SOURCE defined\en");
+#endif
+\&
+#ifdef _ATFILE_SOURCE
+ printf("_ATFILE_SOURCE defined\en");
+#endif
+\&
+#ifdef _GNU_SOURCE
+ printf("_GNU_SOURCE defined\en");
+#endif
+\&
+#ifdef _REENTRANT
+ printf("_REENTRANT defined\en");
+#endif
+\&
+#ifdef _THREAD_SAFE
+ printf("_THREAD_SAFE defined\en");
+#endif
+\&
+#ifdef _FORTIFY_SOURCE
+ printf("_FORTIFY_SOURCE defined\en");
+#endif
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.SH SEE ALSO
+.BR libc (7),
+.BR standards (7),
+.BR system_data_types (7)
+.PP
+The section "Feature Test Macros" under
+.IR "info libc" .
+.\" But beware: the info libc document is out of date (Jul 07, mtk)
+.PP
+.I /usr/include/features.h
diff --git a/man7/fifo.7 b/man7/fifo.7
new file mode 100644
index 0000000..f27dcc7
--- /dev/null
+++ b/man7/fifo.7
@@ -0,0 +1,70 @@
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Claus Fischer.
+.\"
+.\" 990620 - page created - aeb@cwi.nl
+.\"
+.TH fifo 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+fifo \- first-in first-out special file, named pipe
+.SH DESCRIPTION
+A FIFO special file (a named pipe) is similar to a pipe,
+except that it is accessed as part of the filesystem.
+It can be opened by multiple processes for reading or
+writing.
+When processes are exchanging data via the FIFO,
+the kernel passes all data internally without writing it
+to the filesystem.
+Thus, the FIFO special file has no
+contents on the filesystem; the filesystem entry merely
+serves as a reference point so that processes can access
+the pipe using a name in the filesystem.
+.PP
+The kernel maintains exactly one pipe object for each
+FIFO special file that is opened by at least one process.
+The FIFO must be opened on both ends (reading and writing)
+before data can be passed.
+Normally, opening the FIFO blocks
+until the other end is opened also.
+.PP
+A process can open a FIFO in nonblocking mode.
+In this
+case, opening for read-only succeeds even if no one has
+opened on the write side yet and opening for write-only
+fails with
+.B ENXIO
+(no such device or address) unless the other
+end has already been opened.
+.PP
+Under Linux, opening a FIFO for read and write will succeed
+both in blocking and nonblocking mode.
+POSIX leaves this
+behavior undefined.
+This can be used to open a FIFO for
+writing while there are no readers available.
+A process
+that uses both ends of the connection in order to communicate
+with itself should be very careful to avoid deadlocks.
+.SH NOTES
+For details of the semantics of I/O on FIFOs, see
+.BR pipe (7).
+.PP
+When a process tries to write to a FIFO that is not opened
+for read on the other side, the process is sent a
+.B SIGPIPE
+signal.
+.PP
+FIFO special files can be created by
+.BR mkfifo (3),
+and are indicated by
+.I ls\~\-l
+with the file type \[aq]p\[aq].
+.SH SEE ALSO
+.BR mkfifo (1),
+.BR open (2),
+.BR pipe (2),
+.BR sigaction (2),
+.BR signal (2),
+.BR socketpair (2),
+.BR mkfifo (3),
+.BR pipe (7)
diff --git a/man7/futex.7 b/man7/futex.7
new file mode 100644
index 0000000..233933b
--- /dev/null
+++ b/man7/futex.7
@@ -0,0 +1,121 @@
+.\" This manpage has been automatically generated by docbook2man
+.\" from a DocBook document. This tool can be found at:
+.\" <http://shell.ipoline.com/~elmert/comp/docbook2X/>
+.\" Please send any bug reports, improvements, comments, patches,
+.\" etc. to Steve Cheng <steve@ggi-project.org>.
+.\"
+.\" SPDX-License-Identifier: MIT
+.\"
+.TH futex 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+futex \- fast user-space locking
+.SH SYNOPSIS
+.nf
+.B #include <linux/futex.h>
+.fi
+.SH DESCRIPTION
+The Linux kernel provides futexes ("Fast user-space mutexes")
+as a building block for fast user-space
+locking and semaphores.
+Futexes are very basic and lend themselves well for building higher-level
+locking abstractions such as
+mutexes, condition variables, read-write locks, barriers, and semaphores.
+.PP
+Most programmers will in fact not be using futexes directly but will
+instead rely on system libraries built on them,
+such as the Native POSIX Thread Library (NPTL) (see
+.BR pthreads (7)).
+.PP
+A futex is identified by a piece of memory which can be
+shared between processes or threads.
+In these different processes, the futex need not have identical addresses.
+In its bare form, a futex has semaphore semantics;
+it is a counter that can be incremented and decremented atomically;
+processes can wait for the value to become positive.
+.PP
+Futex operation occurs entirely in user space for the noncontended case.
+The kernel is involved only to arbitrate the contended case.
+As any sane design will strive for noncontention,
+futexes are also optimized for this situation.
+.PP
+In its bare form, a futex is an aligned integer which is
+touched only by atomic assembler instructions.
+This integer is four bytes long on all platforms.
+Processes can share this integer using
+.BR mmap (2),
+via shared memory segments, or because they share memory space,
+in which case the application is commonly called multithreaded.
+.SS Semantics
+Any futex operation starts in user space,
+but it may be necessary to communicate with the kernel using the
+.BR futex (2)
+system call.
+.PP
+To "up" a futex, execute the proper assembler instructions that
+will cause the host CPU to atomically increment the integer.
+Afterward, check if it has in fact changed from 0 to 1, in which case
+there were no waiters and the operation is done.
+This is the noncontended case which is fast and should be common.
+.PP
+In the contended case, the atomic increment changed the counter
+from \-1 (or some other negative number).
+If this is detected, there are waiters.
+User space should now set the counter to 1 and instruct the
+kernel to wake up any waiters using the
+.B FUTEX_WAKE
+operation.
+.PP
+Waiting on a futex, to "down" it, is the reverse operation.
+Atomically decrement the counter and check if it changed to 0,
+in which case the operation is done and the futex was uncontended.
+In all other circumstances, the process should set the counter to \-1
+and request that the kernel wait for another process to up the futex.
+This is done using the
+.B FUTEX_WAIT
+operation.
+.PP
+The
+.BR futex (2)
+system call can optionally be passed a timeout specifying how long
+the kernel should
+wait for the futex to be upped.
+In this case, semantics are more complex and the programmer is referred
+to
+.BR futex (2)
+for
+more details.
+The same holds for asynchronous futex waiting.
+.SH VERSIONS
+Initial futex support was merged in Linux 2.5.7
+but with different semantics from those described above.
+Current semantics are available from Linux 2.5.40 onward.
+.SH NOTES
+To reiterate, bare futexes are not intended as an easy-to-use
+abstraction for end users.
+Implementors are expected to be assembly literate and to have read
+the sources of the futex user-space library referenced
+below.
+.PP
+This man page illustrates the most common use of the
+.BR futex (2)
+primitives; it is by no means the only one.
+.\" .SH AUTHORS
+.\" .PP
+.\" Futexes were designed and worked on by Hubertus Franke
+.\" (IBM Thomas J. Watson Research Center),
+.\" Matthew Kirkwood, Ingo Molnar (Red Hat) and
+.\" Rusty Russell (IBM Linux Technology Center).
+.\" This page written by bert hubert.
+.SH SEE ALSO
+.BR clone (2),
+.BR futex (2),
+.BR get_robust_list (2),
+.BR set_robust_list (2),
+.BR set_tid_address (2),
+.BR pthreads (7)
+.PP
+.I Fuss, Futexes and Furwocks: Fast Userlevel Locking in Linux
+(proceedings of the Ottawa Linux Symposium 2002),
+futex example library, futex-*.tar.bz2
+.UR https://mirrors.kernel.org\:/pub\:/linux\:/kernel\:/people\:/rusty/
+.UE .
diff --git a/man7/glibc.7 b/man7/glibc.7
new file mode 100644
index 0000000..0d1ed26
--- /dev/null
+++ b/man7/glibc.7
@@ -0,0 +1 @@
+.so man7/libc.7
diff --git a/man7/glob.7 b/man7/glob.7
new file mode 100644
index 0000000..466701c
--- /dev/null
+++ b/man7/glob.7
@@ -0,0 +1,205 @@
+.\" Copyright (c) 1998 Andries Brouwer
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" 2003-08-24 fix for / by John Kristoff + joey
+.\"
+.TH glob 7 2023-03-08 "Linux man-pages 6.05.01"
+.SH NAME
+glob \- globbing pathnames
+.SH DESCRIPTION
+Long ago, in UNIX\ V6, there was a program
+.I /etc/glob
+that would expand wildcard patterns.
+Soon afterward this became a shell built-in.
+.PP
+These days there is also a library routine
+.BR glob (3)
+that will perform this function for a user program.
+.PP
+The rules are as follows (POSIX.2, 3.13).
+.SS Wildcard matching
+A string is a wildcard pattern if it contains one of the
+characters \[aq]?\[aq], \[aq]*\[aq], or \[aq][\[aq].
+Globbing is the operation
+that expands a wildcard pattern into the list of pathnames
+matching the pattern.
+Matching is defined by:
+.PP
+A \[aq]?\[aq] (not between brackets) matches any single character.
+.PP
+A \[aq]*\[aq] (not between brackets) matches any string,
+including the empty string.
+.PP
+.B "Character classes"
+.PP
+An expression "\fI[...]\fP" where the first character after the
+leading \[aq][\[aq] is not an \[aq]!\[aq] matches a single character,
+namely any of the characters enclosed by the brackets.
+The string enclosed by the brackets cannot be empty;
+therefore \[aq]]\[aq] can be allowed between the brackets, provided
+that it is the first character.
+(Thus, "\fI[][!]\fP" matches the
+three characters \[aq][\[aq], \[aq]]\[aq], and \[aq]!\[aq].)
+.PP
+.B Ranges
+.PP
+There is one special convention:
+two characters separated by \[aq]\-\[aq] denote a range.
+(Thus,
+"\fI[A\-Fa\-f0\-9]\fP" is equivalent to "\fI[ABCDEFabcdef0123456789]\fP".)
+One may include \[aq]\-\[aq] in its literal meaning
+by making it the first or last character between the brackets.
+(Thus,
+"\fI[]\-]\fP" matches just the two characters \[aq]]\[aq] and \[aq]\-\[aq],
+and "\fI[\-\-0]\fP" matches the
+three characters \[aq]\-\[aq], \[aq].\[aq], and \[aq]0\[aq],
+since \[aq]/\[aq] cannot be matched.)
+.PP
+.B Complementation
+.PP
+An expression "\fI[!...]\fP" matches a single character, namely
+any character that is not matched by the expression obtained
+by removing the first \[aq]!\[aq] from it.
+(Thus, "\fI[!]a\-]\fP" matches any
+single character except \[aq]]\[aq], \[aq]a\[aq], and \[aq]\-\[aq].)
+.PP
+One can remove the special meaning of \[aq]?\[aq], \[aq]*\[aq], and \[aq][\[aq]
+by preceding them by a backslash,
+or,
+in case this is part of a shell command line,
+enclosing them in quotes.
+Between brackets these characters stand for themselves.
+Thus, "\fI[[?*\e]\fP" matches the
+four characters \[aq][\[aq], \[aq]?\[aq], \[aq]*\[aq], and \[aq]\e\[aq].
+.SS Pathnames
+Globbing is applied on each of the components of a pathname
+separately.
+A \[aq]/\[aq] in a pathname cannot be matched by a \[aq]?\[aq] or \[aq]*\[aq]
+wildcard, or by a range like "\fI[.\-0]\fP".
+A range containing an explicit \[aq]/\[aq] character is syntactically incorrect.
+(POSIX requires that syntactically incorrect patterns are left unchanged.)
+.PP
+If a filename starts with a \[aq].\[aq],
+this character must be matched explicitly.
+(Thus, \fIrm\ *\fP will not remove .profile, and \fItar\ c\ *\fP will not
+archive all your files; \fItar\ c\ .\fP is better.)
+.SS Empty lists
+The nice and simple rule given above: "expand a wildcard pattern
+into the list of matching pathnames" was the original UNIX
+definition.
+It allowed one to have patterns that expand into
+an empty list, as in
+.PP
+.nf
+ xv \-wait 0 *.gif *.jpg
+.fi
+.PP
+where perhaps no *.gif files are present (and this is not
+an error).
+However, POSIX requires that a wildcard pattern is left
+unchanged when it is syntactically incorrect, or the list of
+matching pathnames is empty.
+With
+.I bash
+one can force the classical behavior using this command:
+.PP
+.in +4n
+.EX
+shopt \-s nullglob
+.EE
+.in
+.\" In Bash v1, by setting allow_null_glob_expansion=true
+.PP
+(Similar problems occur elsewhere.
+For example, where old scripts have
+.PP
+.in +4n
+.EX
+rm \`find . \-name "*\[ti]"\`
+.EE
+.in
+.PP
+new scripts require
+.PP
+.in +4n
+.EX
+rm \-f nosuchfile \`find . \-name "*\[ti]"\`
+.EE
+.in
+.PP
+to avoid error messages from
+.I rm
+called with an empty argument list.)
+.SH NOTES
+.SS Regular expressions
+Note that wildcard patterns are not regular expressions,
+although they are a bit similar.
+First of all, they match
+filenames, rather than text, and secondly, the conventions
+are not the same: for example, in a regular expression \[aq]*\[aq] means zero or
+more copies of the preceding thing.
+.PP
+Now that regular expressions have bracket expressions where
+the negation is indicated by a \[aq]\[ha]\[aq], POSIX has declared the
+effect of a wildcard pattern "\fI[\[ha]...]\fP" to be undefined.
+.SS Character classes and internationalization
+Of course ranges were originally meant to be ASCII ranges,
+so that "\fI[\ \-%]\fP" stands for "\fI[\ !"#$%]\fP" and "\fI[a\-z]\fP" stands
+for "any lowercase letter".
+Some UNIX implementations generalized this so that a range X\-Y
+stands for the set of characters with code between the codes for
+X and for Y.
+However, this requires the user to know the
+character coding in use on the local system, and moreover, is
+not convenient if the collating sequence for the local alphabet
+differs from the ordering of the character codes.
+Therefore, POSIX extended the bracket notation greatly,
+both for wildcard patterns and for regular expressions.
+In the above we saw three types of items that can occur in a bracket
+expression: namely (i) the negation, (ii) explicit single characters,
+and (iii) ranges.
+POSIX specifies ranges in an internationally
+more useful way and adds three more types:
+.PP
+(iii) Ranges X\-Y comprise all characters that fall between X
+and Y (inclusive) in the current collating sequence as defined
+by the
+.B LC_COLLATE
+category in the current locale.
+.PP
+(iv) Named character classes, like
+.PP
+.nf
+[:alnum:] [:alpha:] [:blank:] [:cntrl:]
+[:digit:] [:graph:] [:lower:] [:print:]
+[:punct:] [:space:] [:upper:] [:xdigit:]
+.fi
+.PP
+so that one can say "\fI[[:lower:]]\fP" instead of "\fI[a\-z]\fP", and have
+things work in Denmark, too, where there are three letters past \[aq]z\[aq]
+in the alphabet.
+These character classes are defined by the
+.B LC_CTYPE
+category
+in the current locale.
+.PP
+(v) Collating symbols, like "\fI[.ch.]\fP" or "\fI[.a-acute.]\fP",
+where the string between "\fI[.\fP" and "\fI.]\fP" is a collating
+element defined for the current locale.
+Note that this may
+be a multicharacter element.
+.PP
+(vi) Equivalence class expressions, like "\fI[=a=]\fP",
+where the string between "\fI[=\fP" and "\fI=]\fP" is any collating
+element from its equivalence class, as defined for the
+current locale.
+For example, "\fI[[=a=]]\fP" might be equivalent
+to "\fI[a\('a\(`a\(:a\(^a]\fP", that is,
+to "\fI[a[.a-acute.][.a-grave.][.a-umlaut.][.a-circumflex.]]\fP".
+.SH SEE ALSO
+.BR sh (1),
+.BR fnmatch (3),
+.BR glob (3),
+.BR locale (7),
+.BR regex (7)
diff --git a/man7/hier.7 b/man7/hier.7
new file mode 100644
index 0000000..314d28a
--- /dev/null
+++ b/man7/hier.7
@@ -0,0 +1,654 @@
+.\" Copyright (c) 1993 by Thomas Koenig (ig25@rz.uni-karlsruhe.de)
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" Modified Sun Jul 25 11:05:58 1993 by Rik Faith (faith@cs.unc.edu)
+.\" Modified Sat Feb 10 16:18:03 1996 by Urs Thuermann (urs@isnogud.escape.de)
+.\" Modified Mon Jun 16 20:02:00 1997 by Nicolás Lichtmaier <nick@debian.org>
+.\" Modified Mon Feb 6 16:41:00 1999 by Nicolás Lichtmaier <nick@debian.org>
+.\" Modified Tue Feb 8 16:46:45 2000 by Chris Pepper <pepper@tgg.com>
+.\" Modified Fri Sep 7 20:32:45 2001 by Tammy Fox <tfox@redhat.com>
+.TH hier 7 2023-04-18 "Linux man-pages 6.05.01"
+.SH NAME
+hier \- description of the filesystem hierarchy
+.SH DESCRIPTION
+A typical Linux system has, among others, the following directories:
+.TP
+.I /
+This is the root directory.
+This is where the whole tree starts.
+.TP
+.I /bin
+This directory contains executable programs which are needed in
+single user mode and to bring the system up or repair it.
+.TP
+.I /boot
+Contains static files for the boot loader.
+This directory holds only
+the files which are needed during the boot process.
+The map installer
+and configuration files should go to
+.I /sbin
+and
+.IR /etc .
+The operating system kernel (initrd for example) must be located in either
+.I /
+or
+.IR /boot .
+.TP
+.I /dev
+Special or device files, which refer to physical devices.
+See
+.BR mknod (1).
+.TP
+.I /etc
+Contains configuration files which are local to the machine.
+Some
+larger software packages, like X11, can have their own subdirectories
+below
+.IR /etc .
+Site-wide configuration files may be placed here or in
+.IR /usr/etc .
+Nevertheless, programs should always look for these files in
+.I /etc
+and you may have links for these files to
+.IR /usr/etc .
+.TP
+.I /etc/opt
+Host-specific configuration files for add-on applications installed
+in
+.IR /opt .
+.TP
+.I /etc/sgml
+This directory contains the configuration files for SGML (optional).
+.TP
+.I /etc/skel
+When a new user account is created, files from this directory are
+usually copied into the user's home directory.
+.TP
+.I /etc/X11
+Configuration files for the X11 window system (optional).
+.TP
+.I /etc/xml
+This directory contains the configuration files for XML (optional).
+.TP
+.I /home
+On machines with home directories for users, these are usually beneath
+this directory, directly or not.
+The structure of this directory
+depends on local administration decisions (optional).
+.TP
+.I /lib
+This directory should hold those shared libraries that are necessary
+to boot the system and to run the commands in the root filesystem.
+.TP
+.I /lib<qual>
+These directories are variants of
+.I /lib
+on system which support more than one binary format requiring separate
+libraries (optional).
+.TP
+.I /lib/modules
+Loadable kernel modules (optional).
+.TP
+.I /lost+found
+This directory contains items lost in the filesystem.
+These items are usually chunks of files mangled as a consequence of
+a faulty disk or a system crash.
+.TP
+.I /media
+This directory contains mount points for removable media such as CD
+and DVD disks or USB sticks.
+On systems where more than one device exists
+for mounting a certain type of media,
+mount directories can be created by appending a digit
+to the name of those available above starting with '0',
+but the unqualified name must also exist.
+.TP
+.I /media/floppy[1\-9]
+Floppy drive (optional).
+.TP
+.I /media/cdrom[1\-9]
+CD-ROM drive (optional).
+.TP
+.I /media/cdrecorder[1\-9]
+CD writer (optional).
+.TP
+.I /media/zip[1\-9]
+Zip drive (optional).
+.TP
+.I /media/usb[1\-9]
+USB drive (optional).
+.TP
+.I /mnt
+This directory is a mount point for a temporarily mounted filesystem.
+In some distributions,
+.I /mnt
+contains subdirectories intended to be used as mount points for several
+temporary filesystems.
+.TP
+.I /opt
+This directory should contain add-on packages that contain static files.
+.TP
+.I /proc
+This is a mount point for the
+.I proc
+filesystem, which provides information about running processes and
+the kernel.
+This pseudo-filesystem is described in more detail in
+.BR proc (5).
+.TP
+.I /root
+This directory is usually the home directory for the root user (optional).
+.TP
+.I /run
+This directory contains information which
+describes the system since it was booted.
+Once this purpose was served by
+.I /var/run
+and programs may continue to use it.
+.TP
+.I /sbin
+Like
+.IR /bin ,
+this directory holds commands needed to boot the system, but which are
+usually not executed by normal users.
+.TP
+.I /srv
+This directory contains site-specific data that is served by this system.
+.TP
+.I /sys
+This is a mount point for the sysfs filesystem, which provides information
+about the kernel like
+.IR /proc ,
+but better structured, following the formalism of kobject infrastructure.
+.TP
+.I /tmp
+This directory contains temporary files which may be deleted with no
+notice, such as by a regular job or at system boot up.
+.TP
+.I /usr
+This directory is usually mounted from a separate partition.
+It should hold only shareable, read-only data, so that it can be mounted
+by various machines running Linux.
+.TP
+.I /usr/X11R6
+The X\-Window system, version 11 release 6 (present in FHS 2.3, removed
+in FHS 3.0).
+.TP
+.I /usr/X11R6/bin
+Binaries which belong to the X\-Window system; often, there is a
+symbolic link from the more traditional
+.I /usr/bin/X11
+to here.
+.TP
+.I /usr/X11R6/lib
+Data files associated with the X\-Window system.
+.TP
+.I /usr/X11R6/lib/X11
+These contain miscellaneous files needed to run X; Often, there is a
+symbolic link from
+.I /usr/lib/X11
+to this directory.
+.TP
+.I /usr/X11R6/include/X11
+Contains include files needed for compiling programs using the X11
+window system.
+Often, there is a symbolic link from
+.I /usr/include/X11
+to this directory.
+.TP
+.I /usr/bin
+This is the primary directory for executable programs.
+Most programs
+executed by normal users which are not needed for booting or for
+repairing the system and which are not installed locally should be
+placed in this directory.
+.TP
+.I /usr/bin/mh
+Commands for the MH mail handling system (optional).
+.TP
+.I /usr/bin/X11
+This is the traditional place to look for X11 executables; on Linux, it
+usually is a symbolic link to
+.IR /usr/X11R6/bin .
+.TP
+.I /usr/dict
+Replaced by
+.IR /usr/share/dict .
+.TP
+.I /usr/doc
+Replaced by
+.IR /usr/share/doc .
+.TP
+.I /usr/etc
+Site-wide configuration files to be shared between several machines
+may be stored in this directory.
+However, commands should always
+reference those files using the
+.I /etc
+directory.
+Links from files in
+.I /etc
+should point to the appropriate files in
+.IR /usr/etc .
+.TP
+.I /usr/games
+Binaries for games and educational programs (optional).
+.TP
+.I /usr/include
+Include files for the C compiler.
+.TP
+.I /usr/include/bsd
+BSD compatibility include files (optional).
+.TP
+.I /usr/include/X11
+Include files for the C compiler and the X\-Window system.
+This is
+usually a symbolic link to
+.IR /usr/X11R6/include/X11 .
+.TP
+.I /usr/include/asm
+Include files which declare some assembler functions.
+This used to be a
+symbolic link to
+.IR /usr/src/linux/include/asm .
+.TP
+.I /usr/include/linux
+This contains information which may change from system release to
+system release and used to be a symbolic link to
+.I /usr/src/linux/include/linux
+to get at operating-system-specific information.
+.IP
+(Note that one should have include files there that work correctly with
+the current libc and in user space.
+However, Linux kernel source is not
+designed to be used with user programs and does not know anything
+about the libc you are using.
+It is very likely that things will break
+if you let
+.I /usr/include/asm
+and
+.I /usr/include/linux
+point at a random kernel tree.
+Debian systems don't do this
+and use headers from a known good kernel
+version, provided in the libc*\-dev package.)
+.TP
+.I /usr/include/g++
+Include files to use with the GNU C++ compiler.
+.TP
+.I /usr/lib
+Object libraries, including dynamic libraries, plus some executables
+which usually are not invoked directly.
+More complicated programs may
+have whole subdirectories there.
+.TP
+.I /usr/libexec
+Directory contains binaries for internal use only and they are not meant
+to be executed directly by users shell or scripts.
+.TP
+.I /usr/lib<qual>
+These directories are variants of
+.I /usr/lib
+on system which support more than one binary format requiring separate
+libraries, except that the symbolic link
+.IR /usr/lib qual /X11
+is not required (optional).
+.TP
+.I /usr/lib/X11
+The usual place for data files associated with X programs, and
+configuration files for the X system itself.
+On Linux, it usually is
+a symbolic link to
+.IR /usr/X11R6/lib/X11 .
+.TP
+.I /usr/lib/gcc\-lib
+contains executables and include files for the GNU C compiler,
+.BR gcc (1).
+.TP
+.I /usr/lib/groff
+Files for the GNU groff document formatting system.
+.TP
+.I /usr/lib/uucp
+Files for
+.BR uucp (1).
+.TP
+.I /usr/local
+This is where programs which are local to the site typically go.
+.TP
+.I /usr/local/bin
+Binaries for programs local to the site.
+.TP
+.I /usr/local/doc
+Local documentation.
+.TP
+.I /usr/local/etc
+Configuration files associated with locally installed programs.
+.TP
+.I /usr/local/games
+Binaries for locally installed games.
+.TP
+.I /usr/local/lib
+Files associated with locally installed programs.
+.TP
+.I /usr/local/lib<qual>
+These directories are variants of
+.I /usr/local/lib
+on system which support more than one binary format requiring separate
+libraries (optional).
+.TP
+.I /usr/local/include
+Header files for the local C compiler.
+.TP
+.I /usr/local/info
+Info pages associated with locally installed programs.
+.TP
+.I /usr/local/man
+Man pages associated with locally installed programs.
+.TP
+.I /usr/local/sbin
+Locally installed programs for system administration.
+.TP
+.I /usr/local/share
+Local application data that can be shared among different architectures
+of the same OS.
+.TP
+.I /usr/local/src
+Source code for locally installed software.
+.TP
+.I /usr/man
+Replaced by
+.IR /usr/share/man .
+.TP
+.I /usr/sbin
+This directory contains program binaries for system administration
+which are not essential for the boot process, for mounting
+.IR /usr ,
+or for system repair.
+.TP
+.I /usr/share
+This directory contains subdirectories with specific application data, that
+can be shared among different architectures of the same OS.
+Often one finds stuff here that used to live in
+.I /usr/doc
+or
+.I /usr/lib
+or
+.IR /usr/man .
+.TP
+.I /usr/share/color
+Contains color management information, like International Color Consortium (ICC)
+Color profiles (optional).
+.TP
+.I /usr/share/dict
+Contains the word lists used by spell checkers (optional).
+.TP
+.I /usr/share/dict/words
+List of English words (optional).
+.TP
+.I /usr/share/doc
+Documentation about installed programs (optional).
+.TP
+.I /usr/share/games
+Static data files for games in
+.I /usr/games
+(optional).
+.TP
+.I /usr/share/info
+Info pages go here (optional).
+.TP
+.I /usr/share/locale
+Locale information goes here (optional).
+.TP
+.I /usr/share/man
+Manual pages go here in subdirectories according to the man page sections.
+.TP
+.IR /usr/share/man/ locale /man[1\-9]
+These directories contain manual pages for the
+specific locale in source code form.
+Systems which use a unique language and code set for all manual pages
+may omit the <locale> substring.
+.TP
+.I /usr/share/misc
+Miscellaneous data that can be shared among different architectures of the
+same OS.
+.TP
+.I /usr/share/nls
+The message catalogs for native language support go here (optional).
+.TP
+.I /usr/share/ppd
+Postscript Printer Definition (PPD) files (optional).
+.TP
+.I /usr/share/sgml
+Files for SGML (optional).
+.TP
+.I /usr/share/sgml/docbook
+DocBook DTD (optional).
+.TP
+.I /usr/share/sgml/tei
+TEI DTD (optional).
+.TP
+.I /usr/share/sgml/html
+HTML DTD (optional).
+.TP
+.I /usr/share/sgml/mathml
+MathML DTD (optional).
+.TP
+.I /usr/share/terminfo
+The database for terminfo (optional).
+.TP
+.I /usr/share/tmac
+Troff macros that are not distributed with groff (optional).
+.TP
+.I /usr/share/xml
+Files for XML (optional).
+.TP
+.I /usr/share/xml/docbook
+DocBook DTD (optional).
+.TP
+.I /usr/share/xml/xhtml
+XHTML DTD (optional).
+.TP
+.I /usr/share/xml/mathml
+MathML DTD (optional).
+.TP
+.I /usr/share/zoneinfo
+Files for timezone information (optional).
+.TP
+.I /usr/src
+Source files for different parts of the system, included with some packages
+for reference purposes.
+Don't work here with your own projects, as files
+below /usr should be read-only except when installing software (optional).
+.TP
+.I /usr/src/linux
+This was the traditional place for the kernel source.
+Some distributions put here the source for the default kernel they ship.
+You should probably use another directory when building your own kernel.
+.TP
+.I /usr/tmp
+Obsolete.
+This should be a link
+to
+.IR /var/tmp .
+This link is present only for compatibility reasons and shouldn't be used.
+.TP
+.I /var
+This directory contains files which may change in size, such as spool
+and log files.
+.TP
+.I /var/account
+Process accounting logs (optional).
+.TP
+.I /var/adm
+This directory is superseded by
+.I /var/log
+and should be a symbolic link to
+.IR /var/log .
+.TP
+.I /var/backups
+Reserved for historical reasons.
+.TP
+.I /var/cache
+Data cached for programs.
+.TP
+.I /var/cache/fonts
+Locally generated fonts (optional).
+.TP
+.I /var/cache/man
+Locally formatted man pages (optional).
+.TP
+.I /var/cache/www
+WWW proxy or cache data (optional).
+.TP
+.I /var/cache/<package>
+Package specific cache data (optional).
+.TP
+.IR /var/catman/cat[1\-9] " or " /var/cache/man/cat[1\-9]
+These directories contain preformatted manual pages according to their
+man page section.
+(The use of preformatted manual pages is deprecated.)
+.TP
+.I /var/crash
+System crash dumps (optional).
+.TP
+.I /var/cron
+Reserved for historical reasons.
+.TP
+.I /var/games
+Variable game data (optional).
+.TP
+.I /var/lib
+Variable state information for programs.
+.TP
+.I /var/lib/color
+Variable files containing color management information (optional).
+.TP
+.I /var/lib/hwclock
+State directory for hwclock (optional).
+.TP
+.I /var/lib/misc
+Miscellaneous state data.
+.TP
+.I /var/lib/xdm
+X display manager variable data (optional).
+.TP
+.I /var/lib/<editor>
+Editor backup files and state (optional).
+.TP
+.I /var/lib/<name>
+These directories must be used for all distribution packaging support.
+.TP
+.I /var/lib/<package>
+State data for packages and subsystems (optional).
+.TP
+.I /var/lib/<pkgtool>
+Packaging support files (optional).
+.TP
+.I /var/local
+Variable data for
+.IR /usr/local .
+.TP
+.I /var/lock
+Lock files are placed in this directory.
+The naming convention for
+device lock files is
+.I LCK..<device>
+where
+.I <device>
+is the device's name in the filesystem.
+The format used is that of HDU UUCP lock files, that is, lock files
+contain a PID as a 10-byte ASCII decimal number, followed by a newline
+character.
+.TP
+.I /var/log
+Miscellaneous log files.
+.TP
+.I /var/opt
+Variable data for
+.IR /opt .
+.TP
+.I /var/mail
+Users' mailboxes.
+Replaces
+.IR /var/spool/mail .
+.TP
+.I /var/msgs
+Reserved for historical reasons.
+.TP
+.I /var/preserve
+Reserved for historical reasons.
+.TP
+.I /var/run
+Run-time variable files, like files holding process identifiers (PIDs)
+and logged user information
+.IR (utmp) .
+Files in this directory are usually cleared when the system boots.
+.TP
+.I /var/spool
+Spooled (or queued) files for various programs.
+.TP
+.I /var/spool/at
+Spooled jobs for
+.BR at (1).
+.TP
+.I /var/spool/cron
+Spooled jobs for
+.BR cron (8).
+.TP
+.I /var/spool/lpd
+Spooled files for printing (optional).
+.TP
+.I /var/spool/lpd/printer
+Spools for a specific printer (optional).
+.TP
+.I /var/spool/mail
+Replaced by
+.IR /var/mail .
+.TP
+.I /var/spool/mqueue
+Queued outgoing mail (optional).
+.TP
+.I /var/spool/news
+Spool directory for news (optional).
+.TP
+.I /var/spool/rwho
+Spooled files for
+.BR rwhod (8)
+(optional).
+.TP
+.I /var/spool/smail
+Spooled files for the
+.BR smail (1)
+mail delivery program.
+.TP
+.I /var/spool/uucp
+Spooled files for
+.BR uucp (1)
+(optional).
+.TP
+.I /var/tmp
+Like
+.IR /tmp ,
+this directory holds temporary files stored for an unspecified duration.
+.TP
+.I /var/yp
+Database files for NIS,
+formerly known as the Sun Yellow Pages (YP).
+.SH STANDARDS
+.UR https://refspecs.linuxfoundation.org/fhs.shtml
+The Filesystem Hierarchy Standard (FHS), Version 3.0
+.UE ,
+published March 19, 2015
+.SH BUGS
+This list is not exhaustive;
+different distributions and systems may be configured differently.
+.SH SEE ALSO
+.BR find (1),
+.BR ln (1),
+.BR proc (5),
+.BR file\-hierarchy (7),
+.BR mount (8)
+.PP
+The Filesystem Hierarchy Standard
diff --git a/man7/hostname.7 b/man7/hostname.7
new file mode 100644
index 0000000..60940ba
--- /dev/null
+++ b/man7/hostname.7
@@ -0,0 +1,97 @@
+.\" Copyright (c) 1987, 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" SPDX-License-Identifier: BSD-4-Clause-UC
+.\"
+.\" @(#)hostname.7 8.2 (Berkeley) 12/30/93
+.\" $FreeBSD: src/share/man/man7/hostname.7,v 1.7 2004/07/03 18:29:23 ru Exp $
+.\"
+.\" 2008-06-11, mtk, Taken from FreeBSD 6.2 and modified for Linux.
+.\"
+.TH hostname 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+hostname \- hostname resolution description
+.SH DESCRIPTION
+Hostnames are domains, where a domain is a hierarchical, dot-separated
+list of subdomains; for example, the machine "monet", in the "example"
+subdomain of the "com" domain would be represented as "monet.example.com".
+.PP
+Each element of the hostname must be from 1 to 63 characters long and the
+entire hostname, including the dots, can be at most 253 characters long.
+Valid characters for hostnames are
+.BR ASCII (7)
+letters from
+.I a
+to
+.IR z ,
+the digits from
+.I 0
+to
+.IR 9 ,
+and the hyphen (\-).
+A hostname may not start with a hyphen.
+.PP
+Hostnames are often used with network client and server programs,
+which must generally translate the name to an address for use.
+(This task is generally performed by either
+.BR getaddrinfo (3)
+or the obsolete
+.BR gethostbyname (3).)
+.PP
+Hostnames are resolved by the NSS framework in glibc according
+to the
+.B hosts
+configuration in
+.BR nsswitch.conf .
+The DNS-based name resolver
+(in the
+.B dns
+NSS service module) resolves them in the following fashion.
+.PP
+If the name consists of a single component, that is, contains no dot,
+and if the environment variable
+.B HOSTALIASES
+is set to the name of a file,
+that file is searched for any string matching the input hostname.
+The file should consist of lines made up of two white-space separated strings,
+the first of which is the hostname alias,
+and the second of which is the complete hostname
+to be substituted for that alias.
+If a case-insensitive match is found between the hostname to be resolved
+and the first field of a line in the file, the substituted name is looked
+up with no further processing.
+.PP
+If the input name ends with a trailing dot,
+the trailing dot is removed,
+and the remaining name is looked up with no further processing.
+.PP
+If the input name does not end with a trailing dot, it is looked up
+by searching through a list of domains until a match is found.
+The default search list includes first the local domain,
+then its parent domains with at least 2 name components (longest first).
+For example,
+in the domain cs.example.com, the name lithium.cchem will be checked first
+as lithium.cchem.cs.example and then as lithium.cchem.example.com.
+lithium.cchem.com will not be tried, as there is only one component
+remaining from the local domain.
+The search path can be changed from the default
+by a system-wide configuration file (see
+.BR resolver (5)).
+.SH SEE ALSO
+.BR getaddrinfo (3),
+.BR gethostbyname (3),
+.BR nsswitch.conf (5),
+.BR resolver (5),
+.BR mailaddr (7),
+.BR named (8)
+.PP
+.UR http://www.ietf.org\:/rfc\:/rfc1123.txt
+IETF RFC\ 1123
+.UE
+.PP
+.UR http://www.ietf.org\:/rfc\:/rfc1178.txt
+IETF RFC\ 1178
+.UE
+.\" .SH HISTORY
+.\" Hostname appeared in
+.\" 4.2BSD.
diff --git a/man7/icmp.7 b/man7/icmp.7
new file mode 100644
index 0000000..cd54614
--- /dev/null
+++ b/man7/icmp.7
@@ -0,0 +1,196 @@
+'\" t
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\"
+.\" $Id: icmp.7,v 1.6 2000/08/14 08:03:45 ak Exp $
+.\"
+.TH icmp 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+icmp \- Linux IPv4 ICMP kernel module.
+.SH DESCRIPTION
+This kernel protocol module implements the Internet Control
+Message Protocol defined in RFC\ 792.
+It is used to signal error conditions and for diagnosis.
+The user doesn't interact directly with this module;
+instead it communicates with the other protocols in the kernel
+and these pass the ICMP errors to the application layers.
+The kernel ICMP module also answers ICMP requests.
+.PP
+A user protocol may receive ICMP packets for all local sockets by opening
+a raw socket with the protocol
+.BR IPPROTO_ICMP .
+See
+.BR raw (7)
+for more information.
+The types of ICMP packets passed to the socket can be filtered using the
+.B ICMP_FILTER
+socket option.
+ICMP packets are always processed by the kernel too, even
+when passed to a user socket.
+.PP
+Linux limits the rate of ICMP error packets to each destination.
+.B ICMP_REDIRECT
+and
+.B ICMP_DEST_UNREACH
+are also limited by the destination route of the incoming packets.
+.SS /proc interfaces
+ICMP supports a set of
+.I /proc
+interfaces to configure some global IP parameters.
+The parameters can be accessed by reading or writing files in the directory
+.IR /proc/sys/net/ipv4/ .
+Most of these parameters are rate limitations for specific ICMP types.
+Linux 2.2 uses a token bucket filter to limit ICMPs.
+.\" FIXME . better description needed
+The value is the timeout in jiffies until the token bucket filter is
+cleared after a burst.
+A jiffy is a system dependent unit, usually 10ms on i386 and
+about 1ms on alpha and ia64.
+.TP
+.IR icmp_destunreach_rate " (Linux 2.2 to Linux 2.4.9)"
+.\" Precisely: from Linux 2.1.102
+Maximum rate to send ICMP Destination Unreachable packets.
+This limits the rate at which packets are sent to any individual
+route or destination.
+The limit does not affect sending of
+.B ICMP_FRAG_NEEDED
+packets needed for path MTU discovery.
+.TP
+.IR icmp_echo_ignore_all " (since Linux 2.2)"
+.\" Precisely: 2.1.68
+If this value is nonzero, Linux will ignore all
+.B ICMP_ECHO
+requests.
+.TP
+.IR icmp_echo_ignore_broadcasts " (since Linux 2.2)"
+.\" Precisely: from Linux 2.1.68
+If this value is nonzero, Linux will ignore all
+.B ICMP_ECHO
+packets sent to broadcast addresses.
+.TP
+.IR icmp_echoreply_rate " (Linux 2.2 to Linux 2.4.9)"
+.\" Precisely: from Linux 2.1.102
+Maximum rate for sending
+.B ICMP_ECHOREPLY
+packets in response to
+.B ICMP_ECHOREQUEST
+packets.
+.TP
+.IR icmp_errors_use_inbound_ifaddr " (Boolean; default: disabled; since Linux 2.6.12)"
+.\" The following taken from Linux 2.6.28-rc4 Documentation/networking/ip-sysctl.txt
+If disabled, ICMP error messages are sent with the primary address of
+the exiting interface.
+.IP
+If enabled, the message will be sent with the primary address of
+the interface that received the packet that caused the ICMP error.
+This is the behavior that many network administrators will expect from
+a router.
+And it can make debugging complicated network layouts much easier.
+.IP
+Note that if no primary address exists for the interface selected,
+then the primary address of the first non-loopback interface that
+has one will be used regardless of this setting.
+.TP
+.IR icmp_ignore_bogus_error_responses " (Boolean; default: disabled; since Linux 2.2)"
+.\" precisely: since Linux 2.1.32
+.\" The following taken from Linux 2.6.28-rc4 Documentation/networking/ip-sysctl.txt
+Some routers violate RFC1122 by sending bogus responses to broadcast frames.
+Such violations are normally logged via a kernel warning.
+If this parameter is enabled, the kernel will not give such warnings,
+which will avoid log file clutter.
+.TP
+.IR icmp_paramprob_rate " (Linux 2.2 to Linux 2.4.9)"
+.\" Precisely: from Linux 2.1.102
+Maximum rate for sending
+.B ICMP_PARAMETERPROB
+packets.
+These packets are sent when a packet arrives with an invalid IP header.
+.TP
+.IR icmp_ratelimit " (integer; default: 1000; since Linux 2.4.10)"
+.\" The following taken from Linux 2.6.28-rc4 Documentation/networking/ip-sysctl.txt
+Limit the maximum rates for sending ICMP packets whose type matches
+.I icmp_ratemask
+(see below) to specific targets.
+0 to disable any limiting,
+otherwise the minimum space between responses in milliseconds.
+.TP
+.IR icmp_ratemask " (integer; default: see below; since Linux 2.4.10)"
+.\" The following taken from Linux 2.6.28-rc4 Documentation/networking/ip-sysctl.txt
+Mask made of ICMP types for which rates are being limited.
+.IP
+Significant bits: IHGFEDCBA9876543210
+.br
+Default mask: 0000001100000011000 (0x1818)
+.IP
+Bit definitions (see the Linux kernel source file
+.IR include/linux/icmp.h ):
+.RS 12
+.TS
+l l.
+0 Echo Reply
+3 Destination Unreachable *
+4 Source Quench *
+5 Redirect
+8 Echo Request
+B Time Exceeded *
+C Parameter Problem *
+D Timestamp Request
+E Timestamp Reply
+F Info Request
+G Info Reply
+H Address Mask Request
+I Address Mask Reply
+.TE
+.RE
+.PP
+The bits marked with an asterisk are rate limited by default
+(see the default mask above).
+.TP
+.IR icmp_timeexceed_rate " (Linux 2.2 to Linux 2.4.9)"
+Maximum rate for sending
+.B ICMP_TIME_EXCEEDED
+packets.
+These packets are
+sent to prevent loops when a packet has crossed too many hops.
+.TP
+.IR ping_group_range " (two integers; default: see below; since Linux 2.6.39)"
+Range of the group IDs (minimum and maximum group IDs, inclusive)
+that are allowed to create ICMP Echo sockets.
+The default is "1 0", which
+means no group is allowed to create ICMP Echo sockets.
+.SH VERSIONS
+Support for the
+.B ICMP_ADDRESS
+request was removed in Linux 2.2.
+.PP
+Support for
+.B ICMP_SOURCE_QUENCH
+was removed in Linux 2.2.
+.SH NOTES
+As many other implementations don't support
+.B IPPROTO_ICMP
+raw sockets, this feature
+should not be relied on in portable programs.
+.\" not really true ATM
+.\" .PP
+.\" Linux ICMP should be compliant to RFC 1122.
+.PP
+.B ICMP_REDIRECT
+packets are not sent when Linux is not acting as a router.
+They are also accepted only from the old gateway defined in the
+routing table and the redirect routes are expired after some time.
+.PP
+The 64-bit timestamp returned by
+.B ICMP_TIMESTAMP
+is in milliseconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC).
+.PP
+Linux ICMP internally uses a raw socket to send ICMPs.
+This raw socket may appear in
+.BR netstat (8)
+output with a zero inode.
+.SH SEE ALSO
+.BR ip (7),
+.BR rdisc (8)
+.PP
+RFC\ 792 for a description of the ICMP protocol.
diff --git a/man7/inode.7 b/man7/inode.7
new file mode 100644
index 0000000..3cbdeee
--- /dev/null
+++ b/man7/inode.7
@@ -0,0 +1,481 @@
+'\" t
+.\" Copyright (c) 2017 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH inode 7 2023-07-30 "Linux man-pages 6.05.01"
+.SH NAME
+inode \- file inode information
+.SH DESCRIPTION
+Each file has an inode containing metadata about the file.
+An application can retrieve this metadata using
+.BR stat (2)
+(or related calls), which returns a
+.I stat
+structure, or
+.BR statx (2),
+which returns a
+.I statx
+structure.
+.PP
+The following is a list of the information typically found in,
+or associated with, the file inode,
+with the names of the corresponding structure fields returned by
+.BR stat (2)
+and
+.BR statx (2):
+.TP
+Device where inode resides
+\fIstat.st_dev\fP; \fIstatx.stx_dev_minor\fP and \fIstatx.stx_dev_major\fP
+.IP
+Each inode (as well as the associated file) resides in a filesystem
+that is hosted on a device.
+That device is identified by the combination of its major ID
+(which identifies the general class of device)
+and minor ID (which identifies a specific instance in the general class).
+.TP
+Inode number
+\fIstat.st_ino\fP; \fIstatx.stx_ino\fP
+.IP
+Each file in a filesystem has a unique inode number.
+Inode numbers are guaranteed to be unique only within a filesystem
+(i.e., the same inode numbers may be used by different filesystems,
+which is the reason that hard links may not cross filesystem boundaries).
+This field contains the file's inode number.
+.TP
+File type and mode
+\fIstat.st_mode\fP; \fIstatx.stx_mode\fP
+.IP
+See the discussion of file type and mode, below.
+.TP
+Link count
+\fIstat.st_nlink\fP; \fIstatx.stx_nlink\fP
+.IP
+This field contains the number of hard links to the file.
+Additional links to an existing file are created using
+.BR link (2).
+.TP
+User ID
+.I st_uid
+\fIstat.st_uid\fP; \fIstatx.stx_uid\fP
+.IP
+This field records the user ID of the owner of the file.
+For newly created files,
+the file user ID is the effective user ID of the creating process.
+The user ID of a file can be changed using
+.BR chown (2).
+.TP
+Group ID
+\fIstat.st_gid\fP; \fIstatx.stx_gid\fP
+.IP
+The inode records the ID of the group owner of the file.
+For newly created files,
+the file group ID is either the group ID of the parent directory or
+the effective group ID of the creating process,
+depending on whether or not the set-group-ID bit
+is set on the parent directory (see below).
+The group ID of a file can be changed using
+.BR chown (2).
+.TP
+Device represented by this inode
+\fIstat.st_rdev\fP; \fIstatx.stx_rdev_minor\fP and \fIstatx.stx_rdev_major\fP
+.IP
+If this file (inode) represents a device,
+then the inode records the major and minor ID of that device.
+.TP
+File size
+\fIstat.st_size\fP; \fIstatx.stx_size\fP
+.IP
+This field gives the size of the file (if it is a regular
+file or a symbolic link) in bytes.
+The size of a symbolic link is the length of the pathname
+it contains, without a terminating null byte.
+.TP
+Preferred block size for I/O
+\fIstat.st_blksize\fP; \fIstatx.stx_blksize\fP
+.IP
+This field gives the "preferred" blocksize for efficient filesystem I/O.
+(Writing to a file in smaller chunks may cause
+an inefficient read-modify-rewrite.)
+.TP
+Number of blocks allocated to the file
+\fIstat.st_blocks\fP; \fIstatx.stx_size\fP
+.IP
+This field indicates the number of blocks allocated to the file,
+512-byte units,
+(This may be smaller than
+.IR st_size /512
+when the file has holes.)
+.IP
+The POSIX.1 standard notes
+.\" Rationale for sys/stat.h in POSIX.1-2008
+that the unit for the
+.I st_blocks
+member of the
+.I stat
+structure is not defined by the standard.
+On many implementations it is 512 bytes;
+on a few systems, a different unit is used, such as 1024.
+Furthermore, the unit may differ on a per-filesystem basis.
+.TP
+Last access timestamp (atime)
+\fIstat.st_atime\fP; \fIstatx.stx_atime\fP
+.IP
+This is the file's last access timestamp.
+It is changed by file accesses, for example, by
+.BR execve (2),
+.BR mknod (2),
+.BR pipe (2),
+.BR utime (2),
+and
+.BR read (2)
+(of more than zero bytes).
+Other interfaces, such as
+.BR mmap (2),
+may or may not update the atime timestamp
+.IP
+Some filesystem types allow mounting in such a way that file
+and/or directory accesses do not cause an update of the atime timestamp.
+(See
+.IR noatime ,
+.IR nodiratime ,
+and
+.I relatime
+in
+.BR mount (8),
+and related information in
+.BR mount (2).)
+In addition, the atime timestamp
+is not updated if a file is opened with the
+.B O_NOATIME
+flag; see
+.BR open (2).
+.TP
+File creation (birth) timestamp (btime)
+(not returned in the \fIstat\fP structure); \fIstatx.stx_btime\fP
+.IP
+The file's creation timestamp.
+This is set on file creation and not changed subsequently.
+.IP
+The btime timestamp was not historically present on UNIX systems
+and is not currently supported by most Linux filesystems.
+.\" FIXME Is it supported on ext4 and XFS?
+.TP
+Last modification timestamp (mtime)
+\fIstat.st_mtime\fP; \fIstatx.stx_mtime\fP
+.IP
+This is the file's last modification timestamp.
+It is changed by file modifications, for example, by
+.BR mknod (2),
+.BR truncate (2),
+.BR utime (2),
+and
+.BR write (2)
+(of more than zero bytes).
+Moreover, the mtime timestamp
+of a directory is changed by the creation or deletion of files
+in that directory.
+The mtime timestamp is
+.I not
+changed for changes in owner, group, hard link count, or mode.
+.TP
+Last status change timestamp (ctime)
+\fIstat.st_ctime\fP; \fIstatx.stx_ctime\fP
+.IP
+This is the file's last status change timestamp.
+It is changed by writing or by setting inode information
+(i.e., owner, group, link count, mode, etc.).
+.PP
+The timestamp fields report time measured with a zero point at the
+.IR Epoch ,
+1970-01-01 00:00:00 +0000, UTC (see
+.BR time (7)).
+.PP
+Nanosecond timestamps are supported on XFS, JFS, Btrfs, and
+ext4 (since Linux 2.6.23).
+.\" commit ef7f38359ea8b3e9c7f2cae9a4d4935f55ca9e80
+Nanosecond timestamps are not supported in ext2, ext3, and Reiserfs.
+In order to return timestamps with nanosecond precision,
+the timestamp fields in the
+.I stat
+and
+.I statx
+structures are defined as structures that include a nanosecond component.
+See
+.BR stat (2)
+and
+.BR statx (2)
+for details.
+On filesystems that do not support subsecond timestamps,
+the nanosecond fields in the
+.I stat
+and
+.I statx
+structures are returned with the value 0.
+.\"
+.SS The file type and mode
+The
+.I stat.st_mode
+field (for
+.BR statx (2),
+the
+.I statx.stx_mode
+field) contains the file type and mode.
+.PP
+POSIX refers to the
+.I stat.st_mode
+bits corresponding to the mask
+.B S_IFMT
+(see below) as the
+.IR "file type" ,
+the 12 bits corresponding to the mask 07777 as the
+.I file mode bits
+and the least significant 9 bits (0777) as the
+.IR "file permission bits" .
+.PP
+The following mask values are defined for the file type:
+.in +4n
+.TS
+lB l l.
+S_IFMT 0170000 bit mask for the file type bit field
+
+S_IFSOCK 0140000 socket
+S_IFLNK 0120000 symbolic link
+S_IFREG 0100000 regular file
+S_IFBLK 0060000 block device
+S_IFDIR 0040000 directory
+S_IFCHR 0020000 character device
+S_IFIFO 0010000 FIFO
+.TE
+.in
+.PP
+Thus, to test for a regular file (for example), one could write:
+.PP
+.in +4n
+.EX
+stat(pathname, &sb);
+if ((sb.st_mode & S_IFMT) == S_IFREG) {
+ /* Handle regular file */
+}
+.EE
+.in
+.PP
+Because tests of the above form are common, additional
+macros are defined by POSIX to allow the test of the file type in
+.I st_mode
+to be written more concisely:
+.RS 4
+.TP 1.2i
+.BR S_ISREG (m)
+is it a regular file?
+.TP
+.BR S_ISDIR (m)
+directory?
+.TP
+.BR S_ISCHR (m)
+character device?
+.TP
+.BR S_ISBLK (m)
+block device?
+.TP
+.BR S_ISFIFO (m)
+FIFO (named pipe)?
+.TP
+.BR S_ISLNK (m)
+symbolic link? (Not in POSIX.1-1996.)
+.TP
+.BR S_ISSOCK (m)
+socket? (Not in POSIX.1-1996.)
+.RE
+.PP
+The preceding code snippet could thus be rewritten as:
+.PP
+.in +4n
+.EX
+stat(pathname, &sb);
+if (S_ISREG(sb.st_mode)) {
+ /* Handle regular file */
+}
+.EE
+.in
+.PP
+The definitions of most of the above file type test macros
+are provided if any of the following feature test macros is defined:
+.B _BSD_SOURCE
+(in glibc 2.19 and earlier),
+.B _SVID_SOURCE
+(in glibc 2.19 and earlier),
+or
+.B _DEFAULT_SOURCE
+(in glibc 2.20 and later).
+In addition, definitions of all of the above macros except
+.B S_IFSOCK
+and
+.BR S_ISSOCK ()
+are provided if
+.B _XOPEN_SOURCE
+is defined.
+.PP
+The definition of
+.B S_IFSOCK
+can also be exposed either by defining
+.B _XOPEN_SOURCE
+with a value of 500 or greater or (since glibc 2.24) by defining both
+.B _XOPEN_SOURCE
+and
+.BR _XOPEN_SOURCE_EXTENDED .
+.PP
+The definition of
+.BR S_ISSOCK ()
+is exposed if any of the following feature test macros is defined:
+.B _BSD_SOURCE
+(in glibc 2.19 and earlier),
+.B _DEFAULT_SOURCE
+(in glibc 2.20 and later),
+.B _XOPEN_SOURCE
+with a value of 500 or greater,
+.B _POSIX_C_SOURCE
+with a value of 200112L or greater, or (since glibc 2.24) by defining both
+.B _XOPEN_SOURCE
+and
+.BR _XOPEN_SOURCE_EXTENDED .
+.PP
+The following mask values are defined for
+the file mode component of the
+.I st_mode
+field:
+.in +4n
+.TS
+lB l lx.
+S_ISUID 04000 T{
+set-user-ID bit (see \fBexecve\fP(2))
+T}
+S_ISGID 02000 T{
+set-group-ID bit (see below)
+T}
+S_ISVTX 01000 T{
+sticky bit (see below)
+T}
+
+S_IRWXU 00700 T{
+owner has read, write, and execute permission
+T}
+S_IRUSR 00400 T{
+owner has read permission
+T}
+S_IWUSR 00200 T{
+owner has write permission
+T}
+S_IXUSR 00100 T{
+owner has execute permission
+T}
+
+S_IRWXG 00070 T{
+group has read, write, and execute permission
+T}
+S_IRGRP 00040 T{
+group has read permission
+T}
+S_IWGRP 00020 T{
+group has write permission
+T}
+S_IXGRP 00010 T{
+group has execute permission
+T}
+
+S_IRWXO 00007 T{
+others (not in group) have read, write, and execute permission
+T}
+S_IROTH 00004 T{
+others have read permission
+T}
+S_IWOTH 00002 T{
+others have write permission
+T}
+S_IXOTH 00001 T{
+others have execute permission
+T}
+.TE
+.in
+.PP
+The set-group-ID bit
+.RB ( S_ISGID )
+has several special uses.
+For a directory, it indicates that BSD semantics are to be used
+for that directory: files created there inherit their group ID from
+the directory, not from the effective group ID of the creating process,
+and directories created there will also get the
+.B S_ISGID
+bit set.
+For an executable file, the set-group-ID bit causes the effective group ID
+of a process that executes the file to change as described in
+.BR execve (2).
+For a file that does not have the group execution bit
+.RB ( S_IXGRP )
+set,
+the set-group-ID bit indicates mandatory file/record locking.
+.PP
+The sticky bit
+.RB ( S_ISVTX )
+on a directory means that a file
+in that directory can be renamed or deleted only by the owner
+of the file, by the owner of the directory, and by a privileged
+process.
+.SH STANDARDS
+POSIX.1-2008.
+.SH HISTORY
+POSIX.1-2001.
+.PP
+POSIX.1-1990 did not describe the
+.BR S_IFMT ,
+.BR S_IFSOCK ,
+.BR S_IFLNK ,
+.BR S_IFREG ,
+.BR S_IFBLK ,
+.BR S_IFDIR ,
+.BR S_IFCHR ,
+.BR S_IFIFO ,
+and
+.B S_ISVTX
+constants, but instead specified the use of
+the macros
+.BR S_ISDIR ()
+and so on.
+.PP
+The
+.BR S_ISLNK ()
+and
+.BR S_ISSOCK ()
+macros were not in
+POSIX.1-1996;
+the former is from SVID 4, the latter from SUSv2.
+.PP
+UNIX\ V7 (and later systems) had
+.BR S_IREAD ,
+.BR S_IWRITE ,
+.BR S_IEXEC ,
+and
+where POSIX
+prescribes the synonyms
+.BR S_IRUSR ,
+.BR S_IWUSR ,
+and
+.BR S_IXUSR .
+.SH NOTES
+For pseudofiles that are autogenerated by the kernel, the file size
+(\fIstat.st_size\fP; \fIstatx.stx_size\fP)
+reported by the kernel is not accurate.
+For example, the value 0 is returned for many files under the
+.I /proc
+directory,
+while various files under
+.I /sys
+report a size of 4096 bytes, even though the file content is smaller.
+For such files, one should simply try to read as many bytes as possible
+(and append \[aq]\e0\[aq] to the returned buffer
+if it is to be interpreted as a string).
+.SH SEE ALSO
+.BR stat (1),
+.BR stat (2),
+.BR statx (2),
+.BR symlink (7)
diff --git a/man7/inotify.7 b/man7/inotify.7
new file mode 100644
index 0000000..73a6ab0
--- /dev/null
+++ b/man7/inotify.7
@@ -0,0 +1,1100 @@
+.\" Copyright (C) 2006, 2014 Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Copyright (C) 2014 Heinrich Schuchardt <xypron.glpk@gmx.de>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH inotify 7 2023-07-08 "Linux man-pages 6.05.01"
+.SH NAME
+inotify \- monitoring filesystem events
+.SH DESCRIPTION
+The
+.I inotify
+API provides a mechanism for monitoring filesystem events.
+Inotify can be used to monitor individual files,
+or to monitor directories.
+When a directory is monitored, inotify will return events
+for the directory itself, and for files inside the directory.
+.PP
+The following system calls are used with this API:
+.IP \[bu] 3
+.BR inotify_init (2)
+creates an inotify instance and returns a file descriptor
+referring to the inotify instance.
+The more recent
+.BR inotify_init1 (2)
+is like
+.BR inotify_init (2),
+but has a
+.I flags
+argument that provides access to some extra functionality.
+.IP \[bu]
+.BR inotify_add_watch (2)
+manipulates the "watch list" associated with an inotify instance.
+Each item ("watch") in the watch list specifies the pathname of
+a file or directory,
+along with some set of events that the kernel should monitor for the
+file referred to by that pathname.
+.BR inotify_add_watch (2)
+either creates a new watch item, or modifies an existing watch.
+Each watch has a unique "watch descriptor", an integer
+returned by
+.BR inotify_add_watch (2)
+when the watch is created.
+.IP \[bu]
+When events occur for monitored files and directories,
+those events are made available to the application as structured data that
+can be read from the inotify file descriptor using
+.BR read (2)
+(see below).
+.IP \[bu]
+.BR inotify_rm_watch (2)
+removes an item from an inotify watch list.
+.IP \[bu]
+When all file descriptors referring to an inotify
+instance have been closed (using
+.BR close (2)),
+the underlying object and its resources are
+freed for reuse by the kernel;
+all associated watches are automatically freed.
+.PP
+With careful programming,
+an application can use inotify to efficiently monitor and cache
+the state of a set of filesystem objects.
+However, robust applications should allow for the fact that bugs
+in the monitoring logic or races of the kind described below
+may leave the cache inconsistent with the filesystem state.
+It is probably wise to do some consistency checking,
+and rebuild the cache when inconsistencies are detected.
+.SS Reading events from an inotify file descriptor
+To determine what events have occurred, an application
+.BR read (2)s
+from the inotify file descriptor.
+If no events have so far occurred, then,
+assuming a blocking file descriptor,
+.BR read (2)
+will block until at least one event occurs
+(unless interrupted by a signal,
+in which case the call fails with the error
+.BR EINTR ;
+see
+.BR signal (7)).
+.PP
+Each successful
+.BR read (2)
+returns a buffer containing one or more of the following structures:
+.PP
+.in +4n
+.EX
+struct inotify_event {
+ int wd; /* Watch descriptor */
+.\" FIXME . The type of the 'wd' field should probably be "int32_t".
+.\" I submitted a patch to fix this. See the LKML thread
+.\" "[patch] Fix type errors in inotify interfaces", 18 Nov 2008
+.\" glibc bug filed: https://www.sourceware.org/bugzilla/show_bug.cgi?id=7040
+ uint32_t mask; /* Mask describing event */
+ uint32_t cookie; /* Unique cookie associating related
+ events (for rename(2)) */
+ uint32_t len; /* Size of \fIname\fP field */
+ char name[]; /* Optional null\-terminated name */
+};
+.EE
+.in
+.PP
+.I wd
+identifies the watch for which this event occurs.
+It is one of the watch descriptors returned by a previous call to
+.BR inotify_add_watch (2).
+.PP
+.I mask
+contains bits that describe the event that occurred (see below).
+.PP
+.I cookie
+is a unique integer that connects related events.
+Currently, this is used only for rename events, and
+allows the resulting pair of
+.B IN_MOVED_FROM
+and
+.B IN_MOVED_TO
+events to be connected by the application.
+For all other event types,
+.I cookie
+is set to 0.
+.PP
+The
+.I name
+field is present only when an event is returned
+for a file inside a watched directory;
+it identifies the filename within the watched directory.
+This filename is null-terminated,
+and may include further null bytes (\[aq]\e0\[aq])
+to align subsequent reads to a suitable address boundary.
+.PP
+The
+.I len
+field counts all of the bytes in
+.IR name ,
+including the null bytes;
+the length of each
+.I inotify_event
+structure is thus
+.IR "sizeof(struct inotify_event)+len" .
+.PP
+The behavior when the buffer given to
+.BR read (2)
+is too small to return information about the next event depends
+on the kernel version: before Linux 2.6.21,
+.BR read (2)
+returns 0; since Linux 2.6.21,
+.BR read (2)
+fails with the error
+.BR EINVAL .
+Specifying a buffer of size
+.PP
+.in +4n
+.EX
+sizeof(struct inotify_event) + NAME_MAX + 1
+.EE
+.in
+.PP
+will be sufficient to read at least one event.
+.SS inotify events
+The
+.BR inotify_add_watch (2)
+.I mask
+argument and the
+.I mask
+field of the
+.I inotify_event
+structure returned when
+.BR read (2)ing
+an inotify file descriptor are both bit masks identifying
+inotify events.
+The following bits can be specified in
+.I mask
+when calling
+.BR inotify_add_watch (2)
+and may be returned in the
+.I mask
+field returned by
+.BR read (2):
+.RS 4
+.TP
+.BR IN_ACCESS " (+)"
+File was accessed (e.g.,
+.BR read (2),
+.BR execve (2)).
+.TP
+.BR IN_ATTRIB " (*)"
+Metadata changed\[em]for example, permissions (e.g.,
+.BR chmod (2)),
+timestamps (e.g.,
+.BR utimensat (2)),
+extended attributes
+.RB ( setxattr (2)),
+link count (since Linux 2.6.25; e.g.,
+.\" FIXME .
+.\" Events do not occur for link count changes on a file inside a monitored
+.\" directory. This differs from other metadata changes for files inside
+.\" a monitored directory.
+for the target of
+.BR link (2)
+and for
+.BR unlink (2)),
+and user/group ID (e.g.,
+.BR chown (2)).
+.TP
+.BR IN_CLOSE_WRITE " (+)"
+File opened for writing was closed.
+.TP
+.BR IN_CLOSE_NOWRITE " (*)"
+File or directory not opened for writing was closed.
+.TP
+.BR IN_CREATE " (+)"
+File/directory created in watched directory (e.g.,
+.BR open (2)
+.BR O_CREAT ,
+.BR mkdir (2),
+.BR link (2),
+.BR symlink (2),
+.BR bind (2)
+on a UNIX domain socket).
+.TP
+.BR IN_DELETE " (+)"
+File/directory deleted from watched directory.
+.TP
+.B IN_DELETE_SELF
+Watched file/directory was itself deleted.
+(This event also occurs if an object is moved to another filesystem,
+since
+.BR mv (1)
+in effect copies the file to the other filesystem and
+then deletes it from the original filesystem.)
+In addition, an
+.B IN_IGNORED
+event will subsequently be generated for the watch descriptor.
+.TP
+.BR IN_MODIFY " (+)"
+File was modified (e.g.,
+.BR write (2),
+.BR truncate (2)).
+.TP
+.B IN_MOVE_SELF
+Watched file/directory was itself moved.
+.TP
+.BR IN_MOVED_FROM " (+)"
+Generated for the directory containing the old filename
+when a file is renamed.
+.TP
+.BR IN_MOVED_TO " (+)"
+Generated for the directory containing the new filename
+when a file is renamed.
+.TP
+.BR IN_OPEN " (*)"
+File or directory was opened.
+.RE
+.PP
+Inotify monitoring is inode-based: when monitoring a file
+(but not when monitoring the directory containing a file),
+an event can be generated for activity on any link to the file
+(in the same or a different directory).
+.PP
+When monitoring a directory:
+.IP \[bu] 3
+the events marked above with an asterisk (*) can occur both
+for the directory itself and for objects inside the directory; and
+.IP \[bu]
+the events marked with a plus sign (+) occur only for objects
+inside the directory (not for the directory itself).
+.PP
+.IR Note :
+when monitoring a directory,
+events are not generated for the files inside the directory
+when the events are performed via a pathname (i.e., a link)
+that lies outside the monitored directory.
+.PP
+When events are generated for objects inside a watched directory, the
+.I name
+field in the returned
+.I inotify_event
+structure identifies the name of the file within the directory.
+.PP
+The
+.B IN_ALL_EVENTS
+macro is defined as a bit mask of all of the above events.
+This macro can be used as the
+.I mask
+argument when calling
+.BR inotify_add_watch (2).
+.PP
+Two additional convenience macros are defined:
+.RS 4
+.TP
+.B IN_MOVE
+Equates to
+.BR "IN_MOVED_FROM | IN_MOVED_TO" .
+.TP
+.B IN_CLOSE
+Equates to
+.BR "IN_CLOSE_WRITE | IN_CLOSE_NOWRITE" .
+.RE
+.PP
+The following further bits can be specified in
+.I mask
+when calling
+.BR inotify_add_watch (2):
+.RS 4
+.TP
+.BR IN_DONT_FOLLOW " (since Linux 2.6.15)"
+Don't dereference
+.I pathname
+if it is a symbolic link.
+.TP
+.BR IN_EXCL_UNLINK " (since Linux 2.6.36)"
+.\" commit 8c1934c8d70b22ca8333b216aec6c7d09fdbd6a6
+By default, when watching events on the children of a directory,
+events are generated for children even after they have been unlinked
+from the directory.
+This can result in large numbers of uninteresting events for
+some applications (e.g., if watching
+.IR /tmp ,
+in which many applications create temporary files whose
+names are immediately unlinked).
+Specifying
+.B IN_EXCL_UNLINK
+changes the default behavior,
+so that events are not generated for children after
+they have been unlinked from the watched directory.
+.TP
+.B IN_MASK_ADD
+If a watch instance already exists for the filesystem object corresponding to
+.IR pathname ,
+add (OR) the events in
+.I mask
+to the watch mask (instead of replacing the mask);
+the error
+.B EINVAL
+results if
+.B IN_MASK_CREATE
+is also specified.
+.TP
+.B IN_ONESHOT
+Monitor the filesystem object corresponding to
+.I pathname
+for one event, then remove from
+watch list.
+.TP
+.BR IN_ONLYDIR " (since Linux 2.6.15)"
+Watch
+.I pathname
+only if it is a directory;
+the error
+.B ENOTDIR
+results if
+.I pathname
+is not a directory.
+Using this flag provides an application with a race-free way of
+ensuring that the monitored object is a directory.
+.TP
+.BR IN_MASK_CREATE " (since Linux 4.18)"
+Watch
+.I pathname
+only if it does not already have a watch associated with it;
+the error
+.B EEXIST
+results if
+.I pathname
+is already being watched.
+.IP
+Using this flag provides an application with a way of ensuring
+that new watches do not modify existing ones.
+This is useful because multiple paths may refer to the same inode,
+and multiple calls to
+.BR inotify_add_watch (2)
+without this flag may clobber existing watch masks.
+.RE
+.PP
+The following bits may be set in the
+.I mask
+field returned by
+.BR read (2):
+.RS 4
+.TP
+.B IN_IGNORED
+Watch was removed explicitly
+.RB ( inotify_rm_watch (2))
+or automatically (file was deleted, or filesystem was unmounted).
+See also BUGS.
+.TP
+.B IN_ISDIR
+Subject of this event is a directory.
+.TP
+.B IN_Q_OVERFLOW
+Event queue overflowed
+.RI ( wd
+is \-1 for this event).
+.TP
+.B IN_UNMOUNT
+Filesystem containing watched object was unmounted.
+In addition, an
+.B IN_IGNORED
+event will subsequently be generated for the watch descriptor.
+.RE
+.SS Examples
+Suppose an application is watching the directory
+.I dir
+and the file
+.I dir/myfile
+for all events.
+The examples below show some events that will be generated
+for these two objects.
+.RS 4
+.TP
+fd = open("dir/myfile", O_RDWR);
+Generates
+.B IN_OPEN
+events for both
+.I dir
+and
+.IR dir/myfile .
+.TP
+read(fd, buf, count);
+Generates
+.B IN_ACCESS
+events for both
+.I dir
+and
+.IR dir/myfile .
+.TP
+write(fd, buf, count);
+Generates
+.B IN_MODIFY
+events for both
+.I dir
+and
+.IR dir/myfile .
+.TP
+fchmod(fd, mode);
+Generates
+.B IN_ATTRIB
+events for both
+.I dir
+and
+.IR dir/myfile .
+.TP
+close(fd);
+Generates
+.B IN_CLOSE_WRITE
+events for both
+.I dir
+and
+.IR dir/myfile .
+.RE
+.PP
+Suppose an application is watching the directories
+.I dir1
+and
+.IR dir2 ,
+and the file
+.IR dir1/myfile .
+The following examples show some events that may be generated.
+.RS 4
+.TP
+link("dir1/myfile", "dir2/new");
+Generates an
+.B IN_ATTRIB
+event for
+.I myfile
+and an
+.B IN_CREATE
+event for
+.IR dir2 .
+.TP
+rename("dir1/myfile", "dir2/myfile");
+Generates an
+.B IN_MOVED_FROM
+event for
+.IR dir1 ,
+an
+.B IN_MOVED_TO
+event for
+.IR dir2 ,
+and an
+.B IN_MOVE_SELF
+event for
+.IR myfile .
+The
+.B IN_MOVED_FROM
+and
+.B IN_MOVED_TO
+events will have the same
+.I cookie
+value.
+.RE
+.PP
+Suppose that
+.I dir1/xx
+and
+.I dir2/yy
+are (the only) links to the same file, and an application is watching
+.IR dir1 ,
+.IR dir2 ,
+.IR dir1/xx ,
+and
+.IR dir2/yy .
+Executing the following calls in the order given below will generate
+the following events:
+.RS 4
+.TP
+unlink("dir2/yy");
+Generates an
+.B IN_ATTRIB
+event for
+.I xx
+(because its link count changes)
+and an
+.B IN_DELETE
+event for
+.IR dir2 .
+.TP
+unlink("dir1/xx");
+Generates
+.BR IN_ATTRIB ,
+.BR IN_DELETE_SELF ,
+and
+.B IN_IGNORED
+events for
+.IR xx ,
+and an
+.B IN_DELETE
+event for
+.IR dir1 .
+.RE
+.PP
+Suppose an application is watching the directory
+.I dir
+and (the empty) directory
+.IR dir/subdir .
+The following examples show some events that may be generated.
+.RS 4
+.TP
+mkdir("dir/new", mode);
+Generates an
+.B "IN_CREATE | IN_ISDIR"
+event for
+.IR dir .
+.TP
+rmdir("dir/subdir");
+Generates
+.B IN_DELETE_SELF
+and
+.B IN_IGNORED
+events for
+.IR subdir ,
+and an
+.B "IN_DELETE | IN_ISDIR"
+event for
+.IR dir .
+.RE
+.SS /proc interfaces
+The following interfaces can be used to limit the amount of
+kernel memory consumed by inotify:
+.TP
+.I /proc/sys/fs/inotify/max_queued_events
+The value in this file is used when an application calls
+.BR inotify_init (2)
+to set an upper limit on the number of events that can be
+queued to the corresponding inotify instance.
+Events in excess of this limit are dropped, but an
+.B IN_Q_OVERFLOW
+event is always generated.
+.TP
+.I /proc/sys/fs/inotify/max_user_instances
+This specifies an upper limit on the number of inotify instances
+that can be created per real user ID.
+.TP
+.I /proc/sys/fs/inotify/max_user_watches
+This specifies an upper limit on the number of watches
+that can be created per real user ID.
+.SH STANDARDS
+Linux.
+.SH HISTORY
+Inotify was merged into Linux 2.6.13.
+The required library interfaces were added in glibc 2.4.
+.RB ( IN_DONT_FOLLOW ,
+.BR IN_MASK_ADD ,
+and
+.B IN_ONLYDIR
+were added in glibc 2.5.)
+.SH NOTES
+Inotify file descriptors can be monitored using
+.BR select (2),
+.BR poll (2),
+and
+.BR epoll (7).
+When an event is available, the file descriptor indicates as readable.
+.PP
+Since Linux 2.6.25,
+signal-driven I/O notification is available for inotify file descriptors;
+see the discussion of
+.B F_SETFL
+(for setting the
+.B O_ASYNC
+flag),
+.BR F_SETOWN ,
+and
+.B F_SETSIG
+in
+.BR fcntl (2).
+The
+.I siginfo_t
+structure (described in
+.BR sigaction (2))
+that is passed to the signal handler has the following fields set:
+.I si_fd
+is set to the inotify file descriptor number;
+.I si_signo
+is set to the signal number;
+.I si_code
+is set to
+.BR POLL_IN ;
+and
+.B POLLIN
+is set in
+.IR si_band .
+.PP
+If successive output inotify events produced on the
+inotify file descriptor are identical (same
+.IR wd ,
+.IR mask ,
+.IR cookie ,
+and
+.IR name ),
+then they are coalesced into a single event if the
+older event has not yet been read (but see BUGS).
+This reduces the amount of kernel memory required for the event queue,
+but also means that an application can't use inotify to reliably count
+file events.
+.PP
+The events returned by reading from an inotify file descriptor
+form an ordered queue.
+Thus, for example, it is guaranteed that when renaming from
+one directory to another, events will be produced in the
+correct order on the inotify file descriptor.
+.PP
+The set of watch descriptors that is being monitored via
+an inotify file descriptor can be viewed via the entry for
+the inotify file descriptor in the process's
+.IR /proc/ pid /fdinfo
+directory.
+See
+.BR proc (5)
+for further details.
+The
+.B FIONREAD
+.BR ioctl (2)
+returns the number of bytes available to read from an
+inotify file descriptor.
+.SS Limitations and caveats
+The inotify API provides no information about the user or process that
+triggered the inotify event.
+In particular, there is no easy
+way for a process that is monitoring events via inotify
+to distinguish events that it triggers
+itself from those that are triggered by other processes.
+.PP
+Inotify reports only events that a user-space program triggers through
+the filesystem API.
+As a result, it does not catch remote events that occur
+on network filesystems.
+(Applications must fall back to polling the filesystem
+to catch such events.)
+Furthermore, various pseudo-filesystems such as
+.IR /proc ,
+.IR /sys ,
+and
+.I /dev/pts
+are not monitorable with inotify.
+.PP
+The inotify API does not report file accesses and modifications that
+may occur because of
+.BR mmap (2),
+.BR msync (2),
+and
+.BR munmap (2).
+.PP
+The inotify API identifies affected files by filename.
+However, by the time an application processes an inotify event,
+the filename may already have been deleted or renamed.
+.PP
+The inotify API identifies events via watch descriptors.
+It is the application's responsibility to cache a mapping
+(if one is needed) between watch descriptors and pathnames.
+Be aware that directory renamings may affect multiple cached pathnames.
+.PP
+Inotify monitoring of directories is not recursive:
+to monitor subdirectories under a directory,
+additional watches must be created.
+This can take a significant amount time for large directory trees.
+.PP
+If monitoring an entire directory subtree,
+and a new subdirectory is created in that tree or an existing directory
+is renamed into that tree,
+be aware that by the time you create a watch for the new subdirectory,
+new files (and subdirectories) may already exist inside the subdirectory.
+Therefore, you may want to scan the contents of the subdirectory
+immediately after adding the watch (and, if desired,
+recursively add watches for any subdirectories that it contains).
+.PP
+Note that the event queue can overflow.
+In this case, events are lost.
+Robust applications should handle the possibility of
+lost events gracefully.
+For example, it may be necessary to rebuild part or all of
+the application cache.
+(One simple, but possibly expensive,
+approach is to close the inotify file descriptor, empty the cache,
+create a new inotify file descriptor,
+and then re-create watches and cache entries
+for the objects to be monitored.)
+.PP
+If a filesystem is mounted on top of a monitored directory,
+no event is generated, and no events are generated
+for objects immediately under the new mount point.
+If the filesystem is subsequently unmounted,
+events will subsequently be generated for the directory and
+the objects it contains.
+.\"
+.SS Dealing with rename() events
+As noted above, the
+.B IN_MOVED_FROM
+and
+.B IN_MOVED_TO
+event pair that is generated by
+.BR rename (2)
+can be matched up via their shared cookie value.
+However, the task of matching has some challenges.
+.PP
+These two events are usually consecutive in the event stream available
+when reading from the inotify file descriptor.
+However, this is not guaranteed.
+If multiple processes are triggering events for monitored objects,
+then (on rare occasions) an arbitrary number of
+other events may appear between the
+.B IN_MOVED_FROM
+and
+.B IN_MOVED_TO
+events.
+Furthermore, it is not guaranteed that the event pair is atomically
+inserted into the queue: there may be a brief interval where the
+.B IN_MOVED_FROM
+has appeared, but the
+.B IN_MOVED_TO
+has not.
+.PP
+Matching up the
+.B IN_MOVED_FROM
+and
+.B IN_MOVED_TO
+event pair generated by
+.BR rename (2)
+is thus inherently racy.
+(Don't forget that if an object is renamed outside of a monitored directory,
+there may not even be an
+.B IN_MOVED_TO
+event.)
+Heuristic approaches (e.g., assume the events are always consecutive)
+can be used to ensure a match in most cases,
+but will inevitably miss some cases,
+causing the application to perceive the
+.B IN_MOVED_FROM
+and
+.B IN_MOVED_TO
+events as being unrelated.
+If watch descriptors are destroyed and re-created as a result,
+then those watch descriptors will be inconsistent with
+the watch descriptors in any pending events.
+(Re-creating the inotify file descriptor and rebuilding the cache may
+be useful to deal with this scenario.)
+.PP
+Applications should also allow for the possibility that the
+.B IN_MOVED_FROM
+event was the last event that could fit in the buffer
+returned by the current call to
+.BR read (2),
+and the accompanying
+.B IN_MOVED_TO
+event might be fetched only on the next
+.BR read (2),
+which should be done with a (small) timeout to allow for the fact that
+insertion of the
+.BR IN_MOVED_FROM + IN_MOVED_TO
+event pair is not atomic,
+and also the possibility that there may not be any
+.B IN_MOVED_TO
+event.
+.SH BUGS
+Before Linux 3.19,
+.BR fallocate (2)
+did not create any inotify events.
+Since Linux 3.19,
+.\" commit 820c12d5d6c0890bc93dd63893924a13041fdc35
+calls to
+.BR fallocate (2)
+generate
+.B IN_MODIFY
+events.
+.PP
+.\" FIXME . kernel commit 611da04f7a31b2208e838be55a42c7a1310ae321
+.\" implies that unmount events were buggy since Linux 2.6.11 to Linux 2.6.36
+.\"
+Before Linux 2.6.16, the
+.B IN_ONESHOT
+.I mask
+flag does not work.
+.PP
+As originally designed and implemented, the
+.B IN_ONESHOT
+flag did not cause an
+.B IN_IGNORED
+event to be generated when the watch was dropped after one event.
+However, as an unintended effect of other changes,
+since Linux 2.6.36, an
+.B IN_IGNORED
+event is generated in this case.
+.PP
+Before Linux 2.6.25,
+.\" commit 1c17d18e3775485bf1e0ce79575eb637a94494a2
+the kernel code that was intended to coalesce successive identical events
+(i.e., the two most recent events could potentially be coalesced
+if the older had not yet been read)
+instead checked if the most recent event could be coalesced with the
+.I oldest
+unread event.
+.PP
+When a watch descriptor is removed by calling
+.BR inotify_rm_watch (2)
+(or because a watch file is deleted or the filesystem
+that contains it is unmounted),
+any pending unread events for that watch descriptor remain available to read.
+As watch descriptors are subsequently allocated with
+.BR inotify_add_watch (2),
+the kernel cycles through the range of possible watch descriptors (1 to
+.BR INT_MAX )
+incrementally.
+When allocating a free watch descriptor, no check is made to see whether that
+watch descriptor number has any pending unread events in the inotify queue.
+Thus, it can happen that a watch descriptor is reallocated even
+when pending unread events exist for a previous incarnation of
+that watch descriptor number, with the result that the application
+might then read those events and interpret them as belonging to
+the file associated with the newly recycled watch descriptor.
+In practice, the likelihood of hitting this bug may be extremely low,
+since it requires that an application cycle through
+.B INT_MAX
+watch descriptors,
+release a watch descriptor while leaving unread events for that
+watch descriptor in the queue,
+and then recycle that watch descriptor.
+For this reason, and because there have been no reports
+of the bug occurring in real-world applications,
+as of Linux 3.15,
+.\" FIXME . https://bugzilla.kernel.org/show_bug.cgi?id=77111
+no kernel changes have yet been made to eliminate this possible bug.
+.SH EXAMPLES
+The following program demonstrates the usage of the inotify API.
+It marks the directories passed as a command-line arguments
+and waits for events of type
+.BR IN_OPEN ,
+.BR IN_CLOSE_NOWRITE ,
+and
+.BR IN_CLOSE_WRITE .
+.PP
+The following output was recorded while editing the file
+.I /home/user/temp/foo
+and listing directory
+.IR /tmp .
+Before the file and the directory were opened,
+.B IN_OPEN
+events occurred.
+After the file was closed, an
+.B IN_CLOSE_WRITE
+event occurred.
+After the directory was closed, an
+.B IN_CLOSE_NOWRITE
+event occurred.
+Execution of the program ended when the user pressed the ENTER key.
+.SS Example output
+.in +4n
+.EX
+$ \fB./a.out /tmp /home/user/temp\fP
+Press enter key to terminate.
+Listening for events.
+IN_OPEN: /home/user/temp/foo [file]
+IN_CLOSE_WRITE: /home/user/temp/foo [file]
+IN_OPEN: /tmp/ [directory]
+IN_CLOSE_NOWRITE: /tmp/ [directory]
+\&
+Listening for events stopped.
+.EE
+.in
+.SS Program source
+\&
+.EX
+#include <errno.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+#include <string.h>
+\&
+/* Read all available inotify events from the file descriptor \[aq]fd\[aq].
+ wd is the table of watch descriptors for the directories in argv.
+ argc is the length of wd and argv.
+ argv is the list of watched directories.
+ Entry 0 of wd and argv is unused. */
+\&
+static void
+handle_events(int fd, int *wd, int argc, char* argv[])
+{
+ /* Some systems cannot read integer variables if they are not
+ properly aligned. On other systems, incorrect alignment may
+ decrease performance. Hence, the buffer used for reading from
+ the inotify file descriptor should have the same alignment as
+ struct inotify_event. */
+\&
+ char buf[4096]
+ __attribute__ ((aligned(__alignof__(struct inotify_event))));
+ const struct inotify_event *event;
+ ssize_t len;
+\&
+ /* Loop while events can be read from inotify file descriptor. */
+\&
+ for (;;) {
+\&
+ /* Read some events. */
+\&
+ len = read(fd, buf, sizeof(buf));
+ if (len == \-1 && errno != EAGAIN) {
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* If the nonblocking read() found no events to read, then
+ it returns \-1 with errno set to EAGAIN. In that case,
+ we exit the loop. */
+\&
+ if (len <= 0)
+ break;
+\&
+ /* Loop over all events in the buffer. */
+\&
+ for (char *ptr = buf; ptr < buf + len;
+ ptr += sizeof(struct inotify_event) + event\->len) {
+\&
+ event = (const struct inotify_event *) ptr;
+\&
+ /* Print event type. */
+\&
+ if (event\->mask & IN_OPEN)
+ printf("IN_OPEN: ");
+ if (event\->mask & IN_CLOSE_NOWRITE)
+ printf("IN_CLOSE_NOWRITE: ");
+ if (event\->mask & IN_CLOSE_WRITE)
+ printf("IN_CLOSE_WRITE: ");
+\&
+ /* Print the name of the watched directory. */
+\&
+ for (size_t i = 1; i < argc; ++i) {
+ if (wd[i] == event\->wd) {
+ printf("%s/", argv[i]);
+ break;
+ }
+ }
+\&
+ /* Print the name of the file. */
+\&
+ if (event\->len)
+ printf("%s", event\->name);
+\&
+ /* Print type of filesystem object. */
+\&
+ if (event\->mask & IN_ISDIR)
+ printf(" [directory]\en");
+ else
+ printf(" [file]\en");
+ }
+ }
+}
+\&
+int
+main(int argc, char* argv[])
+{
+ char buf;
+ int fd, i, poll_num;
+ int *wd;
+ nfds_t nfds;
+ struct pollfd fds[2];
+\&
+ if (argc < 2) {
+ printf("Usage: %s PATH [PATH ...]\en", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+\&
+ printf("Press ENTER key to terminate.\en");
+\&
+ /* Create the file descriptor for accessing the inotify API. */
+\&
+ fd = inotify_init1(IN_NONBLOCK);
+ if (fd == \-1) {
+ perror("inotify_init1");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Allocate memory for watch descriptors. */
+\&
+ wd = calloc(argc, sizeof(int));
+ if (wd == NULL) {
+ perror("calloc");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Mark directories for events
+ \- file was opened
+ \- file was closed */
+\&
+ for (i = 1; i < argc; i++) {
+ wd[i] = inotify_add_watch(fd, argv[i],
+ IN_OPEN | IN_CLOSE);
+ if (wd[i] == \-1) {
+ fprintf(stderr, "Cannot watch \[aq]%s\[aq]: %s\en",
+ argv[i], strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+\&
+ /* Prepare for polling. */
+\&
+ nfds = 2;
+\&
+ fds[0].fd = STDIN_FILENO; /* Console input */
+ fds[0].events = POLLIN;
+\&
+ fds[1].fd = fd; /* Inotify input */
+ fds[1].events = POLLIN;
+\&
+ /* Wait for events and/or terminal input. */
+\&
+ printf("Listening for events.\en");
+ while (1) {
+ poll_num = poll(fds, nfds, \-1);
+ if (poll_num == \-1) {
+ if (errno == EINTR)
+ continue;
+ perror("poll");
+ exit(EXIT_FAILURE);
+ }
+\&
+ if (poll_num > 0) {
+\&
+ if (fds[0].revents & POLLIN) {
+\&
+ /* Console input is available. Empty stdin and quit. */
+\&
+ while (read(STDIN_FILENO, &buf, 1) > 0 && buf != \[aq]\en\[aq])
+ continue;
+ break;
+ }
+\&
+ if (fds[1].revents & POLLIN) {
+\&
+ /* Inotify events are available. */
+\&
+ handle_events(fd, wd, argc, argv);
+ }
+ }
+ }
+\&
+ printf("Listening for events stopped.\en");
+\&
+ /* Close inotify file descriptor. */
+\&
+ close(fd);
+\&
+ free(wd);
+ exit(EXIT_SUCCESS);
+}
+.EE
+.SH SEE ALSO
+.BR inotifywait (1),
+.BR inotifywatch (1),
+.BR inotify_add_watch (2),
+.BR inotify_init (2),
+.BR inotify_init1 (2),
+.BR inotify_rm_watch (2),
+.BR read (2),
+.BR stat (2),
+.BR fanotify (7)
+.PP
+.I Documentation/filesystems/inotify.txt
+in the Linux kernel source tree
diff --git a/man7/intro.7 b/man7/intro.7
new file mode 100644
index 0000000..e12ff9d
--- /dev/null
+++ b/man7/intro.7
@@ -0,0 +1,23 @@
+.\" Copyright (c) 1993 Michael Haardt
+.\" (michael@moria.de), Fri Apr 2 11:32:09 MET DST
+.\" 1993
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" Modified by Thomas Koenig (ig25@rz.uni-karlsruhe.de) 24 Apr 1993
+.\" Modified Sat Jul 24 17:28:08 1993 by Rik Faith (faith@cs.unc.edu)
+.TH intro 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+intro \- introduction to overview and miscellany section
+.SH DESCRIPTION
+Section 7 of the manual provides overviews on various topics, and
+describes conventions and protocols,
+character set standards, the standard filesystem layout,
+and miscellaneous other things.
+.SH NOTES
+.SS Authors and copyright conditions
+Look at the header of the manual page source for the author(s) and copyright
+conditions.
+Note that these can be different from page to page!
+.SH SEE ALSO
+.BR standards (7)
diff --git a/man7/ip.7 b/man7/ip.7
new file mode 100644
index 0000000..d96afc7
--- /dev/null
+++ b/man7/ip.7
@@ -0,0 +1,1524 @@
+'\" t
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\"
+.\" $Id: ip.7,v 1.19 2000/12/20 18:10:31 ak Exp $
+.\"
+.\" FIXME The following socket options are yet to be documented
+.\"
+.\" IP_XFRM_POLICY (2.5.48)
+.\" Needs CAP_NET_ADMIN
+.\"
+.\" IP_IPSEC_POLICY (2.5.47)
+.\" Needs CAP_NET_ADMIN
+.\"
+.\" IP_MINTTL (2.6.34)
+.\" commit d218d11133d888f9745802146a50255a4781d37a
+.\" Author: Stephen Hemminger <shemminger@vyatta.com>
+.\"
+.\" MCAST_JOIN_GROUP (2.4.22 / 2.6)
+.\"
+.\" MCAST_BLOCK_SOURCE (2.4.22 / 2.6)
+.\"
+.\" MCAST_UNBLOCK_SOURCE (2.4.22 / 2.6)
+.\"
+.\" MCAST_LEAVE_GROUP (2.4.22 / 2.6)
+.\"
+.\" MCAST_JOIN_SOURCE_GROUP (2.4.22 / 2.6)
+.\"
+.\" MCAST_LEAVE_SOURCE_GROUP (2.4.22 / 2.6)
+.\"
+.\" MCAST_MSFILTER (2.4.22 / 2.6)
+.\"
+.\" IP_UNICAST_IF (3.4)
+.\" commit 76e21053b5bf33a07c76f99d27a74238310e3c71
+.\" Author: Erich E. Hoover <ehoover@mines.edu>
+.\"
+.TH ip 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+ip \- Linux IPv4 protocol implementation
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.\" .B #include <net/netinet.h> -- does not exist anymore
+.\" .B #include <linux/errqueue.h> -- never include <linux/foo.h>
+.B #include <netinet/in.h>
+.B #include <netinet/ip.h> \fR/* superset of previous */
+.PP
+.IB tcp_socket " = socket(AF_INET, SOCK_STREAM, 0);"
+.IB udp_socket " = socket(AF_INET, SOCK_DGRAM, 0);"
+.IB raw_socket " = socket(AF_INET, SOCK_RAW, " protocol ");"
+.fi
+.SH DESCRIPTION
+Linux implements the Internet Protocol, version 4,
+described in RFC\ 791 and RFC\ 1122.
+.B ip
+contains a level 2 multicasting implementation conforming to RFC\ 1112.
+It also contains an IP router including a packet filter.
+.PP
+The programming interface is BSD-sockets compatible.
+For more information on sockets, see
+.BR socket (7).
+.PP
+An IP socket is created using
+.BR socket (2):
+.PP
+.in +4n
+.EX
+socket(AF_INET, socket_type, protocol);
+.EE
+.in
+.PP
+Valid socket types include
+.B SOCK_STREAM
+to open a stream socket,
+.B SOCK_DGRAM
+to open a datagram socket, and
+.B SOCK_RAW
+to open a
+.BR raw (7)
+socket to access the IP protocol directly.
+.PP
+.I protocol
+is the IP protocol in the IP header to be received or sent.
+Valid values for
+.I protocol
+include:
+.IP \[bu] 3
+0 and
+.B IPPROTO_TCP
+for
+.BR tcp (7)
+stream sockets;
+.IP \[bu]
+0 and
+.B IPPROTO_UDP
+for
+.BR udp (7)
+datagram sockets;
+.IP \[bu]
+.B IPPROTO_SCTP
+for
+.BR sctp (7)
+stream sockets; and
+.IP \[bu]
+.B IPPROTO_UDPLITE
+for
+.BR udplite (7)
+datagram sockets.
+.PP
+For
+.B SOCK_RAW
+you may specify a valid IANA IP protocol defined in
+RFC\ 1700 assigned numbers.
+.PP
+When a process wants to receive new incoming packets or connections, it
+should bind a socket to a local interface address using
+.BR bind (2).
+In this case, only one IP socket may be bound to any given local
+(address, port) pair.
+When
+.B INADDR_ANY
+is specified in the bind call, the socket will be bound to
+.I all
+local interfaces.
+When
+.BR listen (2)
+is called on an unbound socket, the socket is automatically bound
+to a random free port with the local address set to
+.BR INADDR_ANY .
+When
+.BR connect (2)
+is called on an unbound socket, the socket is automatically bound
+to a random free port or to a usable shared port with the local address
+set to
+.BR INADDR_ANY .
+.PP
+A TCP local socket address that has been bound is unavailable for
+some time after closing, unless the
+.B SO_REUSEADDR
+flag has been set.
+Care should be taken when using this flag as it makes TCP less reliable.
+.SS Address format
+An IP socket address is defined as a combination of an IP interface
+address and a 16-bit port number.
+The basic IP protocol does not supply port numbers, they
+are implemented by higher level protocols like
+.BR udp (7)
+and
+.BR tcp (7).
+On raw sockets
+.I sin_port
+is set to the IP protocol.
+.PP
+.in +4n
+.EX
+struct sockaddr_in {
+ sa_family_t sin_family; /* address family: AF_INET */
+ in_port_t sin_port; /* port in network byte order */
+ struct in_addr sin_addr; /* internet address */
+};
+\&
+/* Internet address */
+struct in_addr {
+ uint32_t s_addr; /* address in network byte order */
+};
+.EE
+.in
+.PP
+.I sin_family
+is always set to
+.BR AF_INET .
+This is required; in Linux 2.2 most networking functions return
+.B EINVAL
+when this setting is missing.
+.I sin_port
+contains the port in network byte order.
+The port numbers below 1024 are called
+.I privileged ports
+(or sometimes:
+.IR "reserved ports" ).
+Only a privileged process
+(on Linux: a process that has the
+.B CAP_NET_BIND_SERVICE
+capability in the user namespace governing its network namespace) may
+.BR bind (2)
+to these sockets.
+Note that the raw IPv4 protocol as such has no concept of a
+port, they are implemented only by higher protocols like
+.BR tcp (7)
+and
+.BR udp (7).
+.PP
+.I sin_addr
+is the IP host address.
+The
+.I s_addr
+member of
+.I struct in_addr
+contains the host interface address in network byte order.
+.I in_addr
+should be assigned one of the
+.B INADDR_*
+values
+(e.g.,
+.BR INADDR_LOOPBACK )
+using
+.BR htonl (3)
+or set using the
+.BR inet_aton (3),
+.BR inet_addr (3),
+.BR inet_makeaddr (3)
+library functions or directly with the name resolver (see
+.BR gethostbyname (3)).
+.PP
+IPv4 addresses are divided into unicast, broadcast,
+and multicast addresses.
+Unicast addresses specify a single interface of a host,
+broadcast addresses specify all hosts on a network, and multicast
+addresses address all hosts in a multicast group.
+Datagrams to broadcast addresses can be sent or received only when the
+.B SO_BROADCAST
+socket flag is set.
+In the current implementation, connection-oriented sockets are allowed
+to use only unicast addresses.
+.\" Leave a loophole for XTP @)
+.PP
+Note that the address and the port are always stored in
+network byte order.
+In particular, this means that you need to call
+.BR htons (3)
+on the number that is assigned to a port.
+All address/port manipulation
+functions in the standard library work in network byte order.
+.SS Special and reserved addresses
+There are several special addresses:
+.TP
+.BR INADDR_LOOPBACK " (127.0.0.1)"
+always refers to the local host via the loopback device;
+.TP
+.BR INADDR_ANY " (0.0.0.0)"
+means any address for socket binding;
+.TP
+.BR INADDR_BROADCAST " (255.255.255.255)"
+has the same effect on
+.BR bind (2)
+as
+.B INADDR_ANY
+for historical reasons.
+A packet addressed to
+.B INADDR_BROADCAST
+through a socket which has
+.B SO_BROADCAST
+set will be broadcast to all hosts on the local network segment,
+as long as the link is broadcast-capable.
+.TP
+Highest-numbered address
+.TQ
+Lowest-numbered address
+On any locally-attached non-point-to-point IP subnet
+with a link type that supports broadcasts,
+the highest-numbered address
+(e.g., the .255 address on a subnet with netmask 255.255.255.0)
+is designated as a broadcast address.
+It cannot usefully be assigned to an individual interface,
+and can only be addressed with a socket on which the
+.B SO_BROADCAST
+option has been set.
+Internet standards have historically
+also reserved the lowest-numbered address
+(e.g., the .0 address on a subnet with netmask 255.255.255.0)
+for broadcast, though they call it "obsolete" for this purpose.
+(Some sources also refer to this as the "network address.")
+Since Linux 5.14,
+.\" commit 58fee5fc83658aaacf60246aeab738946a9ba516
+it is treated as an ordinary unicast address
+and can be assigned to an interface.
+.PP
+Internet standards have traditionally also reserved various addresses
+for particular uses, though Linux no longer treats
+some of these specially.
+.TP
+[0.0.0.1, 0.255.255.255]
+.TQ
+[240.0.0.0, 255.255.255.254]
+Addresses in these ranges (0/8 and 240/4) are reserved globally.
+Since Linux 5.3
+.\" commit 96125bf9985a75db00496dd2bc9249b777d2b19b
+and Linux 2.6.25,
+.\" commit 1e637c74b0f84eaca02b914c0b8c6f67276e9697
+respectively,
+the 0/8 and 240/4 addresses, other than
+.B INADDR_ANY
+and
+.BR INADDR_BROADCAST ,
+are treated as ordinary unicast addresses.
+Systems that follow the traditional behaviors may not
+interoperate with these historically reserved addresses.
+.TP
+[127.0.0.1, 127.255.255.254]
+Addresses in this range (127/8) are treated as loopback addresses
+akin to the standardized local loopback address
+.B INADDR_LOOPBACK
+(127.0.0.1);
+.TP
+[224.0.0.0, 239.255.255.255]
+Addresses in this range (224/4) are dedicated to multicast use.
+.SS Socket options
+IP supports some protocol-specific socket options that can be set with
+.BR setsockopt (2)
+and read with
+.BR getsockopt (2).
+The socket option level for IP is
+.BR IPPROTO_IP .
+.\" or SOL_IP on Linux
+A boolean integer flag is zero when it is false, otherwise true.
+.PP
+When an invalid socket option is specified,
+.BR getsockopt (2)
+and
+.BR setsockopt (2)
+fail with the error
+.BR ENOPROTOOPT .
+.TP
+.BR IP_ADD_MEMBERSHIP " (since Linux 1.2)"
+Join a multicast group.
+Argument is an
+.I ip_mreqn
+structure.
+.IP
+.in +4n
+.EX
+struct ip_mreqn {
+ struct in_addr imr_multiaddr; /* IP multicast group
+ address */
+ struct in_addr imr_address; /* IP address of local
+ interface */
+ int imr_ifindex; /* interface index */
+};
+.EE
+.in
+.IP
+.I imr_multiaddr
+contains the address of the multicast group the application
+wants to join or leave.
+It must be a valid multicast address
+.\" (i.e., within the 224.0.0.0-239.255.255.255 range)
+(or
+.BR setsockopt (2)
+fails with the error
+.BR EINVAL ).
+.I imr_address
+is the address of the local interface with which the system
+should join the multicast group; if it is equal to
+.BR INADDR_ANY ,
+an appropriate interface is chosen by the system.
+.I imr_ifindex
+is the interface index of the interface that should join/leave the
+.I imr_multiaddr
+group, or 0 to indicate any interface.
+.IP
+The
+.I ip_mreqn
+structure is available only since Linux 2.2.
+For compatibility, the old
+.I ip_mreq
+structure (present since Linux 1.2) is still supported;
+it differs from
+.I ip_mreqn
+only by not including the
+.I imr_ifindex
+field.
+(The kernel determines which structure is being passed based
+on the size passed in
+.IR optlen .)
+.IP
+.B IP_ADD_MEMBERSHIP
+is valid only for
+.BR setsockopt (2).
+.\"
+.TP
+.BR IP_ADD_SOURCE_MEMBERSHIP " (since Linux 2.4.22 / Linux 2.5.68)"
+Join a multicast group and allow receiving data only
+from a specified source.
+Argument is an
+.I ip_mreq_source
+structure.
+.IP
+.in +4n
+.EX
+struct ip_mreq_source {
+ struct in_addr imr_multiaddr; /* IP multicast group
+ address */
+ struct in_addr imr_interface; /* IP address of local
+ interface */
+ struct in_addr imr_sourceaddr; /* IP address of
+ multicast source */
+};
+.EE
+.in
+.IP
+The
+.I ip_mreq_source
+structure is similar to
+.I ip_mreqn
+described under
+.BR IP_ADD_MEMBERSHIP .
+The
+.I imr_multiaddr
+field contains the address of the multicast group the application
+wants to join or leave.
+The
+.I imr_interface
+field is the address of the local interface with which
+the system should join the multicast group.
+Finally, the
+.I imr_sourceaddr
+field contains the address of the source the
+application wants to receive data from.
+.IP
+This option can be used multiple times to allow
+receiving data from more than one source.
+.TP
+.BR IP_BIND_ADDRESS_NO_PORT " (since Linux 4.2)"
+.\" commit 90c337da1524863838658078ec34241f45d8394d
+Inform the kernel to not reserve an ephemeral port when using
+.BR bind (2)
+with a port number of 0.
+The port will later be automatically chosen at
+.BR connect (2)
+time,
+in a way that allows sharing a source port as long as the 4-tuple is unique.
+.TP
+.BR IP_BLOCK_SOURCE " (since Linux 2.4.22 / 2.5.68)"
+Stop receiving multicast data from a specific source in a given group.
+This is valid only after the application has subscribed
+to the multicast group using either
+.B IP_ADD_MEMBERSHIP
+or
+.BR IP_ADD_SOURCE_MEMBERSHIP .
+.IP
+Argument is an
+.I ip_mreq_source
+structure as described under
+.BR IP_ADD_SOURCE_MEMBERSHIP .
+.TP
+.BR IP_DROP_MEMBERSHIP " (since Linux 1.2)"
+Leave a multicast group.
+Argument is an
+.I ip_mreqn
+or
+.I ip_mreq
+structure similar to
+.BR IP_ADD_MEMBERSHIP .
+.TP
+.BR IP_DROP_SOURCE_MEMBERSHIP " (since Linux 2.4.22 / 2.5.68)"
+Leave a source-specific group\[em]that is, stop receiving data from
+a given multicast group that come from a given source.
+If the application has subscribed to multiple sources within
+the same group, data from the remaining sources will still be delivered.
+To stop receiving data from all sources at once, use
+.BR IP_DROP_MEMBERSHIP .
+.IP
+Argument is an
+.I ip_mreq_source
+structure as described under
+.BR IP_ADD_SOURCE_MEMBERSHIP .
+.TP
+.BR IP_FREEBIND " (since Linux 2.4)"
+.\" Precisely: since Linux 2.4.0-test10
+If enabled, this boolean option allows binding to an IP address
+that is nonlocal or does not (yet) exist.
+This permits listening on a socket,
+without requiring the underlying network interface or the
+specified dynamic IP address to be up at the time that
+the application is trying to bind to it.
+This option is the per-socket equivalent of the
+.I ip_nonlocal_bind
+.I /proc
+interface described below.
+.TP
+.BR IP_HDRINCL " (since Linux 2.0)"
+If enabled,
+the user supplies an IP header in front of the user data.
+Valid only for
+.B SOCK_RAW
+sockets; see
+.BR raw (7)
+for more information.
+When this flag is enabled, the values set by
+.BR IP_OPTIONS ,
+.BR IP_TTL ,
+and
+.B IP_TOS
+are ignored.
+.TP
+.BR IP_LOCAL_PORT_RANGE " (since Linux 6.3)"
+Set or get the per-socket default local port range.
+This option can be used to clamp down the global local port range,
+defined by the
+.I ip_local_port_range
+.I /proc
+interface described below, for a given socket.
+.IP
+The option takes an
+.I uint32_t
+value with
+the high 16 bits set to the upper range bound,
+and the low 16 bits set to the lower range bound.
+Range bounds are inclusive.
+The 16-bit values should be in host byte order.
+.IP
+The lower bound has to be less than the upper bound
+when both bounds are not zero.
+Otherwise, setting the option fails with EINVAL.
+.IP
+If either bound is outside of the global local port range, or is zero,
+then that bound has no effect.
+.IP
+To reset the setting,
+pass zero as both the upper and the lower bound.
+.TP
+.BR IP_MSFILTER " (since Linux 2.4.22 / 2.5.68)"
+This option provides access to the advanced full-state filtering API.
+Argument is an
+.I ip_msfilter
+structure.
+.IP
+.in +4n
+.EX
+struct ip_msfilter {
+ struct in_addr imsf_multiaddr; /* IP multicast group
+ address */
+ struct in_addr imsf_interface; /* IP address of local
+ interface */
+ uint32_t imsf_fmode; /* Filter\-mode */
+\&
+ uint32_t imsf_numsrc; /* Number of sources in
+ the following array */
+ struct in_addr imsf_slist[1]; /* Array of source
+ addresses */
+};
+.EE
+.in
+.IP
+There are two macros,
+.B MCAST_INCLUDE
+and
+.BR MCAST_EXCLUDE ,
+which can be used to specify the filtering mode.
+Additionally, the
+.BR IP_MSFILTER_SIZE (n)
+macro exists to determine how much memory is needed to store
+.I ip_msfilter
+structure with
+.I n
+sources in the source list.
+.IP
+For the full description of multicast source filtering
+refer to RFC 3376.
+.TP
+.BR IP_MTU " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.124
+Retrieve the current known path MTU of the current socket.
+Returns an integer.
+.IP
+.B IP_MTU
+is valid only for
+.BR getsockopt (2)
+and can be employed only when the socket has been connected.
+.TP
+.BR IP_MTU_DISCOVER " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.124
+Set or receive the Path MTU Discovery setting for a socket.
+When enabled, Linux will perform Path MTU Discovery
+as defined in RFC\ 1191 on
+.B SOCK_STREAM
+sockets.
+For
+.RB non- SOCK_STREAM
+sockets,
+.B IP_PMTUDISC_DO
+forces the don't-fragment flag to be set on all outgoing packets.
+It is the user's responsibility to packetize the data
+in MTU-sized chunks and to do the retransmits if necessary.
+The kernel will reject (with
+.BR EMSGSIZE )
+datagrams that are bigger than the known path MTU.
+.B IP_PMTUDISC_WANT
+will fragment a datagram if needed according to the path MTU,
+or will set the don't-fragment flag otherwise.
+.IP
+The system-wide default can be toggled between
+.B IP_PMTUDISC_WANT
+and
+.B IP_PMTUDISC_DONT
+by writing (respectively, zero and nonzero values) to the
+.I /proc/sys/net/ipv4/ip_no_pmtu_disc
+file.
+.TS
+tab(:);
+c l
+l l.
+Path MTU discovery value:Meaning
+IP_PMTUDISC_WANT:Use per-route settings.
+IP_PMTUDISC_DONT:Never do Path MTU Discovery.
+IP_PMTUDISC_DO:Always do Path MTU Discovery.
+IP_PMTUDISC_PROBE:Set DF but ignore Path MTU.
+.TE
+.sp 1
+When PMTU discovery is enabled, the kernel automatically keeps track of
+the path MTU per destination host.
+When it is connected to a specific peer with
+.BR connect (2),
+the currently known path MTU can be retrieved conveniently using the
+.B IP_MTU
+socket option (e.g., after an
+.B EMSGSIZE
+error occurred).
+The path MTU may change over time.
+For connectionless sockets with many destinations,
+the new MTU for a given destination can also be accessed using the
+error queue (see
+.BR IP_RECVERR ).
+A new error will be queued for every incoming MTU update.
+.IP
+While MTU discovery is in progress, initial packets from datagram sockets
+may be dropped.
+Applications using UDP should be aware of this and not
+take it into account for their packet retransmit strategy.
+.IP
+To bootstrap the path MTU discovery process on unconnected sockets, it
+is possible to start with a big datagram size
+(headers up to 64 kilobytes long) and let it shrink by updates of the path MTU.
+.IP
+To get an initial estimate of the
+path MTU, connect a datagram socket to the destination address using
+.BR connect (2)
+and retrieve the MTU by calling
+.BR getsockopt (2)
+with the
+.B IP_MTU
+option.
+.IP
+It is possible to implement RFC 4821 MTU probing with
+.B SOCK_DGRAM
+or
+.B SOCK_RAW
+sockets by setting a value of
+.B IP_PMTUDISC_PROBE
+(available since Linux 2.6.22).
+This is also particularly useful for diagnostic tools such as
+.BR tracepath (8)
+that wish to deliberately send probe packets larger than
+the observed Path MTU.
+.TP
+.BR IP_MULTICAST_ALL " (since Linux 2.6.31)"
+This option can be used to modify the delivery policy of multicast messages.
+The argument is a boolean integer (defaults to 1).
+If set to 1,
+the socket will receive messages from all the groups that have been joined
+globally on the whole system.
+Otherwise, it will deliver messages only from
+the groups that have been explicitly joined (for example via the
+.B IP_ADD_MEMBERSHIP
+option) on this particular socket.
+.TP
+.BR IP_MULTICAST_IF " (since Linux 1.2)"
+Set the local device for a multicast socket.
+The argument for
+.BR setsockopt (2)
+is an
+.I ip_mreqn
+or
+.\" net: IP_MULTICAST_IF setsockopt now recognizes struct mreq
+.\" Commit: 3a084ddb4bf299a6e898a9a07c89f3917f0713f7
+(since Linux 3.5)
+.I ip_mreq
+structure similar to
+.BR IP_ADD_MEMBERSHIP ,
+or an
+.I in_addr
+structure.
+(The kernel determines which structure is being passed based
+on the size passed in
+.IR optlen .)
+For
+.BR getsockopt (2),
+the argument is an
+.I in_addr
+structure.
+.TP
+.BR IP_MULTICAST_LOOP " (since Linux 1.2)"
+Set or read a boolean integer argument that determines whether
+sent multicast packets should be looped back to the local sockets.
+.TP
+.BR IP_MULTICAST_TTL " (since Linux 1.2)"
+Set or read the time-to-live value of outgoing multicast packets for this
+socket.
+It is very important for multicast packets to set the smallest TTL possible.
+The default is 1 which means that multicast packets don't leave the local
+network unless the user program explicitly requests it.
+Argument is an integer.
+.TP
+.BR IP_NODEFRAG " (since Linux 2.6.36)"
+If enabled (argument is nonzero),
+the reassembly of outgoing packets is disabled in the netfilter layer.
+The argument is an integer.
+.IP
+This option is valid only for
+.B SOCK_RAW
+sockets.
+.TP
+.BR IP_OPTIONS " (since Linux 2.0)"
+.\" Precisely: since Linux 1.3.30
+Set or get the IP options to be sent with every packet from this socket.
+The arguments are a pointer to a memory buffer containing the options
+and the option length.
+The
+.BR setsockopt (2)
+call sets the IP options associated with a socket.
+The maximum option size for IPv4 is 40 bytes.
+See RFC\ 791 for the allowed options.
+When the initial connection request packet for a
+.B SOCK_STREAM
+socket contains IP options, the IP options will be set automatically
+to the options from the initial packet with routing headers reversed.
+Incoming packets are not allowed to change options after the connection
+is established.
+The processing of all incoming source routing options
+is disabled by default and can be enabled by using the
+.I accept_source_route
+.I /proc
+interface.
+Other options like timestamps are still handled.
+For datagram sockets, IP options can be set only by the local user.
+Calling
+.BR getsockopt (2)
+with
+.B IP_OPTIONS
+puts the current IP options used for sending into the supplied buffer.
+.TP
+.BR IP_PASSSEC " (since Linux 2.6.17)"
+.\" commit 2c7946a7bf45ae86736ab3b43d0085e43947945c
+If labeled IPSEC or NetLabel is configured on the sending and receiving
+hosts, this option enables receiving of the security context of the peer
+socket in an ancillary message of type
+.B SCM_SECURITY
+retrieved using
+.BR recvmsg (2).
+This option is supported only for UDP sockets; for TCP or SCTP sockets,
+see the description of the
+.B SO_PEERSEC
+option below.
+.IP
+The value given as an argument to
+.BR setsockopt (2)
+and returned as the result of
+.BR getsockopt (2)
+is an integer boolean flag.
+.IP
+The security context returned in the
+.B SCM_SECURITY
+ancillary message
+is of the same format as the one described under the
+.B SO_PEERSEC
+option below.
+.IP
+Note: the reuse of the
+.B SCM_SECURITY
+message type for the
+.B IP_PASSSEC
+socket option was likely a mistake, since other IP control messages use
+their own numbering scheme in the IP namespace and often use the
+socket option value as the message type.
+There is no conflict currently since the IP option with the same value as
+.B SCM_SECURITY
+is
+.B IP_HDRINCL
+and this is never used for a control message type.
+.TP
+.BR IP_PKTINFO " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.68
+Pass an
+.B IP_PKTINFO
+ancillary message that contains a
+.I pktinfo
+structure that supplies some information about the incoming packet.
+This works only for datagram oriented sockets.
+The argument is a flag that tells the socket whether the
+.B IP_PKTINFO
+message should be passed or not.
+The message itself can be sent/retrieved
+only as a control message with a packet using
+.BR recvmsg (2)
+or
+.BR sendmsg (2).
+.IP
+.in +4n
+.EX
+struct in_pktinfo {
+ unsigned int ipi_ifindex; /* Interface index */
+ struct in_addr ipi_spec_dst; /* Local address */
+ struct in_addr ipi_addr; /* Header Destination
+ address */
+};
+.EE
+.in
+.IP
+.I ipi_ifindex
+is the unique index of the interface the packet was received on.
+.I ipi_spec_dst
+is the local address of the packet and
+.I ipi_addr
+is the destination address in the packet header.
+If
+.B IP_PKTINFO
+is passed to
+.BR sendmsg (2)
+and
+.\" This field is grossly misnamed
+.I ipi_spec_dst
+is not zero, then it is used as the local source address for the routing
+table lookup and for setting up IP source route options.
+When
+.I ipi_ifindex
+is not zero, the primary local address of the interface specified by the
+index overwrites
+.I ipi_spec_dst
+for the routing table lookup.
+.TP
+.BR IP_RECVERR " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.15
+Enable extended reliable error message passing.
+When enabled on a datagram socket, all
+generated errors will be queued in a per-socket error queue.
+When the user receives an error from a socket operation,
+the errors can be received by calling
+.BR recvmsg (2)
+with the
+.B MSG_ERRQUEUE
+flag set.
+The
+.I sock_extended_err
+structure describing the error will be passed in an ancillary message with
+the type
+.B IP_RECVERR
+and the level
+.BR IPPROTO_IP .
+.\" or SOL_IP on Linux
+This is useful for reliable error handling on unconnected sockets.
+The received data portion of the error queue contains the error packet.
+.IP
+The
+.B IP_RECVERR
+control message contains a
+.I sock_extended_err
+structure:
+.IP
+.in +4n
+.EX
+#define SO_EE_ORIGIN_NONE 0
+#define SO_EE_ORIGIN_LOCAL 1
+#define SO_EE_ORIGIN_ICMP 2
+#define SO_EE_ORIGIN_ICMP6 3
+\&
+struct sock_extended_err {
+ uint32_t ee_errno; /* error number */
+ uint8_t ee_origin; /* where the error originated */
+ uint8_t ee_type; /* type */
+ uint8_t ee_code; /* code */
+ uint8_t ee_pad;
+ uint32_t ee_info; /* additional information */
+ uint32_t ee_data; /* other data */
+ /* More data may follow */
+};
+\&
+struct sockaddr *SO_EE_OFFENDER(struct sock_extended_err *);
+.EE
+.in
+.IP
+.I ee_errno
+contains the
+.I errno
+number of the queued error.
+.I ee_origin
+is the origin code of where the error originated.
+The other fields are protocol-specific.
+The macro
+.B SO_EE_OFFENDER
+returns a pointer to the address of the network object
+where the error originated from given a pointer to the ancillary message.
+If this address is not known, the
+.I sa_family
+member of the
+.I sockaddr
+contains
+.B AF_UNSPEC
+and the other fields of the
+.I sockaddr
+are undefined.
+.IP
+IP uses the
+.I sock_extended_err
+structure as follows:
+.I ee_origin
+is set to
+.B SO_EE_ORIGIN_ICMP
+for errors received as an ICMP packet, or
+.B SO_EE_ORIGIN_LOCAL
+for locally generated errors.
+Unknown values should be ignored.
+.I ee_type
+and
+.I ee_code
+are set from the type and code fields of the ICMP header.
+.I ee_info
+contains the discovered MTU for
+.B EMSGSIZE
+errors.
+The message also contains the
+.I sockaddr_in of the node
+caused the error, which can be accessed with the
+.B SO_EE_OFFENDER
+macro.
+The
+.I sin_family
+field of the
+.B SO_EE_OFFENDER
+address is
+.B AF_UNSPEC
+when the source was unknown.
+When the error originated from the network, all IP options
+.RB ( IP_OPTIONS ", " IP_TTL ,
+etc.) enabled on the socket and contained in the
+error packet are passed as control messages.
+The payload of the packet causing the error is returned as normal payload.
+.\" FIXME . Is it a good idea to document that? It is a dubious feature.
+.\" On
+.\" .B SOCK_STREAM
+.\" sockets,
+.\" .B IP_RECVERR
+.\" has slightly different semantics. Instead of
+.\" saving the errors for the next timeout, it passes all incoming
+.\" errors immediately to the user.
+.\" This might be useful for very short-lived TCP connections which
+.\" need fast error handling. Use this option with care:
+.\" it makes TCP unreliable
+.\" by not allowing it to recover properly from routing
+.\" shifts and other normal
+.\" conditions and breaks the protocol specification.
+Note that TCP has no error queue;
+.B MSG_ERRQUEUE
+is not permitted on
+.B SOCK_STREAM
+sockets.
+.B IP_RECVERR
+is valid for TCP, but all errors are returned by socket function return or
+.B SO_ERROR
+only.
+.IP
+For raw sockets,
+.B IP_RECVERR
+enables passing of all received ICMP errors to the
+application, otherwise errors are reported only on connected sockets
+.IP
+It sets or retrieves an integer boolean flag.
+.B IP_RECVERR
+defaults to off.
+.TP
+.BR IP_RECVOPTS " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.15
+Pass all incoming IP options to the user in a
+.B IP_OPTIONS
+control message.
+The routing header and other options are already filled in
+for the local host.
+Not supported for
+.B SOCK_STREAM
+sockets.
+.TP
+.BR IP_RECVORIGDSTADDR " (since Linux 2.6.29)"
+.\" commit e8b2dfe9b4501ed0047459b2756ba26e5a940a69
+This boolean option enables the
+.B IP_ORIGDSTADDR
+ancillary message in
+.BR recvmsg (2),
+in which the kernel returns the original destination address
+of the datagram being received.
+The ancillary message contains a
+.IR "struct sockaddr_in" .
+.TP
+.BR IP_RECVTOS " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.68
+If enabled, the
+.B IP_TOS
+ancillary message is passed with incoming packets.
+It contains a byte which specifies the Type of Service/Precedence
+field of the packet header.
+Expects a boolean integer flag.
+.TP
+.BR IP_RECVTTL " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.68
+When this flag is set, pass a
+.B IP_TTL
+control message with the time-to-live
+field of the received packet as a 32 bit integer.
+Not supported for
+.B SOCK_STREAM
+sockets.
+.TP
+.BR IP_RETOPTS " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.15
+Identical to
+.BR IP_RECVOPTS ,
+but returns raw unprocessed options with timestamp and route record
+options not filled in for this hop.
+.TP
+.BR IP_ROUTER_ALERT " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.68
+Pass all to-be forwarded packets with the
+IP Router Alert option set to this socket.
+Valid only for raw sockets.
+This is useful, for instance, for user-space RSVP daemons.
+The tapped packets are not forwarded by the kernel; it is
+the user's responsibility to send them out again.
+Socket binding is ignored,
+such packets are filtered only by protocol.
+Expects an integer flag.
+.TP
+.BR IP_TOS " (since Linux 1.0)"
+Set or receive the Type-Of-Service (TOS) field that is sent
+with every IP packet originating from this socket.
+It is used to prioritize packets on the network.
+TOS is a byte.
+There are some standard TOS flags defined:
+.B IPTOS_LOWDELAY
+to minimize delays for interactive traffic,
+.B IPTOS_THROUGHPUT
+to optimize throughput,
+.B IPTOS_RELIABILITY
+to optimize for reliability,
+.B IPTOS_MINCOST
+should be used for "filler data" where slow transmission doesn't matter.
+At most one of these TOS values can be specified.
+Other bits are invalid and shall be cleared.
+Linux sends
+.B IPTOS_LOWDELAY
+datagrams first by default,
+but the exact behavior depends on the configured queueing discipline.
+.\" FIXME elaborate on this
+Some high-priority levels may require superuser privileges (the
+.B CAP_NET_ADMIN
+capability).
+.\" The priority can also be set in a protocol-independent way by the
+.\" .RB ( SOL_SOCKET ", " SO_PRIORITY )
+.\" socket option (see
+.\" .BR socket (7)).
+.TP
+.BR IP_TRANSPARENT " (since Linux 2.6.24)"
+.\" commit f5715aea4564f233767ea1d944b2637a5fd7cd2e
+.\" This patch introduces the IP_TRANSPARENT socket option: enabling that
+.\" will make the IPv4 routing omit the non-local source address check on
+.\" output. Setting IP_TRANSPARENT requires NET_ADMIN capability.
+.\" http://lwn.net/Articles/252545/
+Setting this boolean option enables transparent proxying on this socket.
+This socket option allows
+the calling application to bind to a nonlocal IP address and operate
+both as a client and a server with the foreign address as the local endpoint.
+NOTE: this requires that routing be set up in a way that
+packets going to the foreign address are routed through the TProxy box
+(i.e., the system hosting the application that employs the
+.B IP_TRANSPARENT
+socket option).
+Enabling this socket option requires superuser privileges
+(the
+.B CAP_NET_ADMIN
+capability).
+.IP
+TProxy redirection with the iptables TPROXY target also requires that
+this option be set on the redirected socket.
+.TP
+.BR IP_TTL " (since Linux 1.0)"
+Set or retrieve the current time-to-live field that is used in every packet
+sent from this socket.
+.TP
+.BR IP_UNBLOCK_SOURCE " (since Linux 2.4.22 / 2.5.68)"
+Unblock previously blocked multicast source.
+Returns
+.B EADDRNOTAVAIL
+when given source is not being blocked.
+.IP
+Argument is an
+.I ip_mreq_source
+structure as described under
+.BR IP_ADD_SOURCE_MEMBERSHIP .
+.TP
+.BR SO_PEERSEC " (since Linux 2.6.17)"
+If labeled IPSEC or NetLabel is configured on both the sending and
+receiving hosts, this read-only socket option returns the security
+context of the peer socket connected to this socket.
+By default,
+this will be the same as the security context of the process that created
+the peer socket unless overridden by the policy or by a process with
+the required permissions.
+.IP
+The argument to
+.BR getsockopt (2)
+is a pointer to a buffer of the specified length in bytes
+into which the security context string will be copied.
+If the buffer length is less than the length of the security
+context string, then
+.BR getsockopt (2)
+returns \-1, sets
+.I errno
+to
+.BR ERANGE ,
+and returns the required length via
+.IR optlen .
+The caller should allocate at least
+.B NAME_MAX
+bytes for the buffer initially, although this is not guaranteed
+to be sufficient.
+Resizing the buffer to the returned length
+and retrying may be necessary.
+.IP
+The security context string may include a terminating null character
+in the returned length, but is not guaranteed to do so: a security
+context "foo" might be represented as either {'f','o','o'} of length 3
+or {'f','o','o','\\0'} of length 4, which are considered to be
+interchangeable.
+The string is printable, does not contain non-terminating null characters,
+and is in an unspecified encoding (in particular, it
+is not guaranteed to be ASCII or UTF-8).
+.IP
+The use of this option for sockets in the
+.B AF_INET
+address family is supported since Linux 2.6.17
+.\" commit 2c7946a7bf45ae86736ab3b43d0085e43947945c
+for TCP sockets, and since Linux 4.17
+.\" commit d452930fd3b9031e59abfeddb2fa383f1403d61a
+for SCTP sockets.
+.IP
+For SELinux, NetLabel conveys only the MLS portion of the security
+context of the peer across the wire, defaulting the rest of the
+security context to the values defined in the policy for the
+netmsg initial security identifier (SID).
+However, NetLabel can
+be configured to pass full security contexts over loopback.
+Labeled IPSEC always passes full security contexts as part of establishing
+the security association (SA) and looks them up based on the association
+for each packet.
+.\"
+.SS /proc interfaces
+The IP protocol
+supports a set of
+.I /proc
+interfaces to configure some global parameters.
+The parameters can be accessed by reading or writing files in the directory
+.IR /proc/sys/net/ipv4/ .
+.\" FIXME As at 2.6.12, 14 Jun 2005, the following are undocumented:
+.\" ip_queue_maxlen
+.\" ip_conntrack_max
+Interfaces described as
+.I Boolean
+take an integer value, with a nonzero value ("true") meaning that
+the corresponding option is enabled, and a zero value ("false")
+meaning that the option is disabled.
+.\"
+.TP
+.IR ip_always_defrag " (Boolean; since Linux 2.2.13)"
+[New with Linux 2.2.13; in earlier kernel versions this feature
+was controlled at compile time by the
+.B CONFIG_IP_ALWAYS_DEFRAG
+option; this option is not present in Linux 2.4.x and later]
+.IP
+When this boolean flag is enabled (not equal 0), incoming fragments
+(parts of IP packets
+that arose when some host between origin and destination decided
+that the packets were too large and cut them into pieces) will be
+reassembled (defragmented) before being processed, even if they are
+about to be forwarded.
+.IP
+Enable only if running either a firewall that is the sole link
+to your network or a transparent proxy; never ever use it for a
+normal router or host.
+Otherwise, fragmented communication can be disturbed
+if the fragments travel over different links.
+Defragmentation also has a large memory and CPU time cost.
+.IP
+This is automagically turned on when masquerading or transparent
+proxying are configured.
+.\"
+.TP
+.IR ip_autoconfig " (since Linux 2.2 to Linux 2.6.17)"
+.\" Precisely: since Linux 2.1.68
+.\" FIXME document ip_autoconfig
+Not documented.
+.\"
+.TP
+.IR ip_default_ttl " (integer; default: 64; since Linux 2.2)"
+.\" Precisely: since Linux 2.1.15
+Set the default time-to-live value of outgoing packets.
+This can be changed per socket with the
+.B IP_TTL
+option.
+.\"
+.TP
+.IR ip_dynaddr " (Boolean; default: disabled; since Linux 2.0.31)"
+Enable dynamic socket address and masquerading entry rewriting on interface
+address change.
+This is useful for dialup interface with changing IP addresses.
+0 means no rewriting, 1 turns it on and 2 enables verbose mode.
+.\"
+.TP
+.IR ip_forward " (Boolean; default: disabled; since Linux 1.2)"
+Enable IP forwarding with a boolean flag.
+IP forwarding can be also set on a per-interface basis.
+.\"
+.TP
+.IR ip_local_port_range " (since Linux 2.2)"
+.\" Precisely: since Linux 2.1.68
+This file contains two integers that define the default local port range
+allocated to sockets that are not explicitly bound to a port number\[em]that
+is, the range used for
+.IR "ephemeral ports" .
+An ephemeral port is allocated to a socket in the following circumstances:
+.RS
+.IP \[bu] 3
+the port number in a socket address is specified as 0 when calling
+.BR bind (2);
+.IP \[bu]
+.BR listen (2)
+is called on a stream socket that was not previously bound;
+.IP \[bu]
+.BR connect (2)
+was called on a socket that was not previously bound;
+.IP \[bu]
+.BR sendto (2)
+is called on a datagram socket that was not previously bound.
+.RE
+.IP
+Allocation of ephemeral ports starts with the first number in
+.I ip_local_port_range
+and ends with the second number.
+If the range of ephemeral ports is exhausted,
+then the relevant system call returns an error (but see BUGS).
+.IP
+Note that the port range in
+.I ip_local_port_range
+should not conflict with the ports used by masquerading
+(although the case is handled).
+Also, arbitrary choices may cause problems with some firewall packet
+filters that make assumptions about the local ports in use.
+The first number should be at least greater than 1024,
+or better, greater than 4096, to avoid clashes
+with well known ports and to minimize firewall problems.
+.\"
+.TP
+.IR ip_no_pmtu_disc " (Boolean; default: disabled; since Linux 2.2)"
+.\" Precisely: 2.1.15
+If enabled, don't do Path MTU Discovery for TCP sockets by default.
+Path MTU discovery may fail if misconfigured firewalls (that drop
+all ICMP packets) or misconfigured interfaces (e.g., a point-to-point
+link where the both ends don't agree on the MTU) are on the path.
+It is better to fix the broken routers on the path than to turn off
+Path MTU Discovery globally, because not doing it incurs a high cost
+to the network.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR ip_nonlocal_bind " (Boolean; default: disabled; since Linux 2.4)"
+.\" Precisely: patch-2.4.0-test10
+If set, allows processes to
+.BR bind (2)
+to nonlocal IP addresses,
+which can be quite useful, but may break some applications.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR ip6frag_time " (integer; default: 30)"
+Time in seconds to keep an IPv6 fragment in memory.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR ip6frag_secret_interval " (integer; default: 600)"
+Regeneration interval (in seconds) of the hash secret (or lifetime
+for the hash secret) for IPv6 fragments.
+.TP
+.IR ipfrag_high_thresh " (integer), " ipfrag_low_thresh " (integer)"
+If the amount of queued IP fragments reaches
+.IR ipfrag_high_thresh ,
+the queue is pruned down to
+.IR ipfrag_low_thresh .
+Contains an integer with the number of bytes.
+.TP
+.I neigh/*
+See
+.BR arp (7).
+.\" FIXME Document the conf/*/* interfaces
+.\"
+.\" FIXME Document the route/* interfaces
+.SS Ioctls
+All ioctls described in
+.BR socket (7)
+apply to
+.BR ip .
+.PP
+Ioctls to configure generic device parameters are described in
+.BR netdevice (7).
+.\" FIXME Add a discussion of multicasting
+.SH ERRORS
+.\" FIXME document all errors.
+.\" We should really fix the kernels to give more uniform
+.\" error returns (ENOMEM vs ENOBUFS, EPERM vs EACCES etc.)
+.TP
+.B EACCES
+The user tried to execute an operation without the necessary permissions.
+These include:
+sending a packet to a broadcast address without having the
+.B SO_BROADCAST
+flag set;
+sending a packet via a
+.I prohibit
+route;
+modifying firewall settings without superuser privileges (the
+.B CAP_NET_ADMIN
+capability);
+binding to a privileged port without superuser privileges (the
+.B CAP_NET_BIND_SERVICE
+capability).
+.TP
+.B EADDRINUSE
+Tried to bind to an address already in use.
+.TP
+.B EADDRNOTAVAIL
+A nonexistent interface was requested or the requested source
+address was not local.
+.TP
+.B EAGAIN
+Operation on a nonblocking socket would block.
+.TP
+.B EALREADY
+A connection operation on a nonblocking socket is already in progress.
+.TP
+.B ECONNABORTED
+A connection was closed during an
+.BR accept (2).
+.TP
+.B EHOSTUNREACH
+No valid routing table entry matches the destination address.
+This error can be caused by an ICMP message from a remote router or
+for the local routing table.
+.TP
+.B EINVAL
+Invalid argument passed.
+For send operations this can be caused by sending to a
+.I blackhole
+route.
+.TP
+.B EISCONN
+.BR connect (2)
+was called on an already connected socket.
+.TP
+.B EMSGSIZE
+Datagram is bigger than an MTU on the path and it cannot be fragmented.
+.TP
+.BR ENOBUFS ", " ENOMEM
+Not enough free memory.
+This often means that the memory allocation is limited by the socket
+buffer limits, not by the system memory, but this is not 100% consistent.
+.TP
+.B ENOENT
+.B SIOCGSTAMP
+was called on a socket where no packet arrived.
+.TP
+.B ENOPKG
+A kernel subsystem was not configured.
+.TP
+.BR ENOPROTOOPT " and " EOPNOTSUPP
+Invalid socket option passed.
+.TP
+.B ENOTCONN
+The operation is defined only on a connected socket, but the socket wasn't
+connected.
+.TP
+.B EPERM
+User doesn't have permission to set high priority, change configuration,
+or send signals to the requested process or group.
+.TP
+.B EPIPE
+The connection was unexpectedly closed or shut down by the other end.
+.TP
+.B ESOCKTNOSUPPORT
+The socket is not configured or an unknown socket type was requested.
+.PP
+Other errors may be generated by the overlaying protocols; see
+.BR tcp (7),
+.BR raw (7),
+.BR udp (7),
+and
+.BR socket (7).
+.SH NOTES
+.BR IP_FREEBIND ,
+.BR IP_MSFILTER ,
+.BR IP_MTU ,
+.BR IP_MTU_DISCOVER ,
+.BR IP_RECVORIGDSTADDR ,
+.BR IP_PASSSEC ,
+.BR IP_PKTINFO ,
+.BR IP_RECVERR ,
+.BR IP_ROUTER_ALERT ,
+and
+.B IP_TRANSPARENT
+are Linux-specific.
+.\" IP_XFRM_POLICY is Linux-specific
+.\" IP_IPSEC_POLICY is a nonstandard extension, also present on some BSDs
+.PP
+Be very careful with the
+.B SO_BROADCAST
+option \- it is not privileged in Linux.
+It is easy to overload the network
+with careless broadcasts.
+For new application protocols
+it is better to use a multicast group instead of broadcasting.
+Broadcasting is discouraged.
+See RFC 6762 for an example of a protocol (mDNS)
+using the more modern multicast approach
+to communicating with an open-ended
+group of hosts on the local network.
+.PP
+Some other BSD sockets implementations provide
+.B IP_RCVDSTADDR
+and
+.B IP_RECVIF
+socket options to get the destination address and the interface of
+received datagrams.
+Linux has the more general
+.B IP_PKTINFO
+for the same task.
+.PP
+Some BSD sockets implementations also provide an
+.B IP_RECVTTL
+option, but an ancillary message with type
+.B IP_RECVTTL
+is passed with the incoming packet.
+This is different from the
+.B IP_TTL
+option used in Linux.
+.PP
+Using the
+.B SOL_IP
+socket options level isn't portable; BSD-based stacks use the
+.B IPPROTO_IP
+level.
+.PP
+.B INADDR_ANY
+(0.0.0.0) and
+.B INADDR_BROADCAST
+(255.255.255.255) are byte-order-neutral.
+This means
+.BR htonl (3)
+has no effect on them.
+.SS Compatibility
+For compatibility with Linux 2.0, the obsolete
+.BI "socket(AF_INET, SOCK_PACKET, " protocol )
+syntax is still supported to open a
+.BR packet (7)
+socket.
+This is deprecated and should be replaced by
+.BI "socket(AF_PACKET, SOCK_RAW, " protocol )
+instead.
+The main difference is the new
+.I sockaddr_ll
+address structure for generic link layer information instead of the old
+.BR sockaddr_pkt .
+.SH BUGS
+There are too many inconsistent error values.
+.PP
+The error used to diagnose exhaustion of the ephemeral port range differs
+across the various system calls
+.RB ( connect (2),
+.BR bind (2),
+.BR listen (2),
+.BR sendto (2))
+that can assign ephemeral ports.
+.PP
+The ioctls to configure IP-specific interface options and ARP tables are
+not described.
+.\" .PP
+.\" Some versions of glibc forget to declare
+.\" .IR in_pktinfo .
+.\" Workaround currently is to copy it into your program from this man page.
+.PP
+Receiving the original destination address with
+.B MSG_ERRQUEUE
+in
+.I msg_name
+by
+.BR recvmsg (2)
+does not work in some Linux 2.2 kernels.
+.\" .SH AUTHORS
+.\" This man page was written by Andi Kleen.
+.SH SEE ALSO
+.BR recvmsg (2),
+.BR sendmsg (2),
+.BR byteorder (3),
+.BR capabilities (7),
+.BR icmp (7),
+.BR ipv6 (7),
+.BR netdevice (7),
+.BR netlink (7),
+.BR raw (7),
+.BR socket (7),
+.BR tcp (7),
+.BR udp (7),
+.BR ip (8)
+.PP
+The kernel source file
+.IR Documentation/networking/ip\-sysctl.txt .
+.PP
+RFC\ 791 for the original IP specification.
+RFC\ 1122 for the IPv4 host requirements.
+RFC\ 1812 for the IPv4 router requirements.
diff --git a/man7/ipc_namespaces.7 b/man7/ipc_namespaces.7
new file mode 100644
index 0000000..0b13f07
--- /dev/null
+++ b/man7/ipc_namespaces.7
@@ -0,0 +1,66 @@
+.\" Copyright (c) 2019 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH ipc_namespaces 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+ipc_namespaces \- overview of Linux IPC namespaces
+.SH DESCRIPTION
+IPC namespaces isolate certain IPC resources,
+namely, System V IPC objects (see
+.BR sysvipc (7))
+and (since Linux 2.6.30)
+.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
+.\" https://lwn.net/Articles/312232/
+POSIX message queues (see
+.BR mq_overview (7)).
+The common characteristic of these IPC mechanisms is that IPC
+objects are identified by mechanisms other than filesystem
+pathnames.
+.PP
+Each IPC namespace has its own set of System V IPC identifiers and
+its own POSIX message queue filesystem.
+Objects created in an IPC namespace are visible to all other processes
+that are members of that namespace,
+but are not visible to processes in other IPC namespaces.
+.PP
+The following
+.I /proc
+interfaces are distinct in each IPC namespace:
+.IP \[bu] 3
+The POSIX message queue interfaces in
+.IR /proc/sys/fs/mqueue .
+.IP \[bu]
+The System V IPC interfaces in
+.IR /proc/sys/kernel ,
+namely:
+.IR msgmax ,
+.IR msgmnb ,
+.IR msgmni ,
+.IR sem ,
+.IR shmall ,
+.IR shmmax ,
+.IR shmmni ,
+and
+.IR shm_rmid_forced .
+.IP \[bu]
+The System V IPC interfaces in
+.IR /proc/sysvipc .
+.PP
+When an IPC namespace is destroyed
+(i.e., when the last process that is a member of the namespace terminates),
+all IPC objects in the namespace are automatically destroyed.
+.PP
+Use of IPC namespaces requires a kernel that is configured with the
+.B CONFIG_IPC_NS
+option.
+.SH SEE ALSO
+.BR nsenter (1),
+.BR unshare (1),
+.BR clone (2),
+.BR setns (2),
+.BR unshare (2),
+.BR mq_overview (7),
+.BR namespaces (7),
+.BR sysvipc (7)
diff --git a/man7/ipv6.7 b/man7/ipv6.7
new file mode 100644
index 0000000..e6f9d54
--- /dev/null
+++ b/man7/ipv6.7
@@ -0,0 +1,416 @@
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 2000 Andi Kleen <ak@muc.de>.
+.\"
+.\" $Id: ipv6.7,v 1.3 2000/12/20 18:10:31 ak Exp $
+.\"
+.\" The following socket options are undocumented
+.\" All of the following are from:
+.\" commit 333fad5364d6b457c8d837f7d05802d2aaf8a961
+.\" Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+.\" Support several new sockopt / ancillary data in Advanced API (RFC3542).
+.\" IPV6_2292PKTINFO (2.6.14)
+.\" Formerly IPV6_PKTINFO
+.\" IPV6_2292HOPOPTS (2.6.14)
+.\" Formerly IPV6_HOPOPTS, which is documented
+.\" IPV6_2292DSTOPTS (2.6.14)
+.\" Formerly IPV6_DSTOPTS, which is documented
+.\" IPV6_2292RTHDR (2.6.14)
+.\" Formerly IPV6_RTHDR, which is documented
+.\" IPV6_2292PKTOPTIONS (2.6.14)
+.\" Formerly IPV6_PKTOPTIONS
+.\" IPV6_2292HOPLIMIT (2.6.14)
+.\" Formerly IPV6_HOPLIMIT, which is documented
+.\"
+.\" IPV6_RECVHOPLIMIT (2.6.14)
+.\" IPV6_RECVHOPOPTS (2.6.14)
+.\" IPV6_RTHDRDSTOPTS (2.6.14)
+.\" IPV6_RECVRTHDR (2.6.14)
+.\" IPV6_RECVDSTOPTS (2.6.14)
+.\"
+.\" IPV6_RECVPATHMTU (Linux 2.6.35, flag value added in Linux 2.6.14)
+.\" commit 793b14731686595a741d9f47726ad8b9a235385a
+.\" Author: Brian Haley <brian.haley@hp.com>
+.\" IPV6_PATHMTU (Linux 2.6.35, flag value added in Linux 2.6.14)
+.\" commit 793b14731686595a741d9f47726ad8b9a235385a
+.\" Author: Brian Haley <brian.haley@hp.com>
+.\" IPV6_DONTFRAG (Linux 2.6.35, flag value added in Linux 2.6.14)
+.\" commit 793b14731686595a741d9f47726ad8b9a235385a
+.\" Author: Brian Haley <brian.haley@hp.com>
+.\" commit 4b340ae20d0e2366792abe70f46629e576adaf5e
+.\" Author: Brian Haley <brian.haley@hp.com>
+.\"
+.\" IPV6_RECVTCLASS (Linux 2.6.14)
+.\" commit 41a1f8ea4fbfcdc4232f023732584aae2220de31
+.\" Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+.\" Based on patch from David L Stevens <dlstevens@us.ibm.com>
+.\"
+.\" IPV6_CHECKSUM (Linux 2.2)
+.\" IPV6_NEXTHOP (Linux 2.2)
+.\" IPV6_JOIN_ANYCAST (Linux 2.4.21 / Linux 2.6)
+.\" IPV6_LEAVE_ANYCAST (Linux 2.4.21 / Linux 2.6)
+.\" IPV6_FLOWLABEL_MGR (Linux 2.2.7 / Linux 2.4)
+.\" IPV6_FLOWINFO_SEND (Linux 2.2.7 / Linux 2.4)
+.\" IPV6_IPSEC_POLICY (Linux 2.6)
+.\" IPV6_XFRM_POLICY (Linux 2.6)
+.\" IPV6_TCLASS (Linux 2.6)
+.\"
+.\" IPV6_ADDR_PREFERENCES (Linux 2.6.26)
+.\" commit 7cbca67c073263c179f605bdbbdc565ab29d801d
+.\" Author: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+.\" IPV6_MINHOPCOUNT (Linux 2.6.35)
+.\" commit e802af9cabb011f09b9c19a82faef3dd315f27eb
+.\" Author: Stephen Hemminger <shemminger@vyatta.com>
+.\" IPV6_ORIGDSTADDR (Linux 2.6.37)
+.\" Actually a CMSG rather than a sockopt?
+.\" In header file, we have IPV6_RECVORIGDSTADDR == IPV6_ORIGDSTADDR
+.\" commit 6c46862280c5f55eda7750391bc65cd7e08c7535
+.\" Author: Balazs Scheidler <bazsi@balabit.hu>
+.\" IPV6_RECVORIGDSTADDR (Linux 2.6.37)
+.\" commit 6c46862280c5f55eda7750391bc65cd7e08c7535
+.\" Author: Balazs Scheidler <bazsi@balabit.hu>
+.\" Support for IPV6_RECVORIGDSTADDR sockopt for UDP sockets
+.\" were contributed by Harry Mason.
+.\" IPV6_TRANSPARENT (Linux 2.6.37)
+.\" commit 6c46862280c5f55eda7750391bc65cd7e08c7535
+.\" Author: Balazs Scheidler <bazsi@balabit.hu>
+.\" IPV6_UNICAST_IF (Linux 3.4)
+.\" commit c4062dfc425e94290ac427a98d6b4721dd2bc91f
+.\" Author: Erich E. Hoover <ehoover@mines.edu>
+.\"
+.TH ipv6 7 2023-07-30 "Linux man-pages 6.05.01"
+.SH NAME
+ipv6 \- Linux IPv6 protocol implementation
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <netinet/in.h>
+.PP
+.IB tcp6_socket " = socket(AF_INET6, SOCK_STREAM, 0);"
+.IB raw6_socket " = socket(AF_INET6, SOCK_RAW, " protocol ");"
+.IB udp6_socket " = socket(AF_INET6, SOCK_DGRAM, " protocol ");"
+.fi
+.SH DESCRIPTION
+Linux 2.2 optionally implements the Internet Protocol, version 6.
+This man page contains a description of the IPv6 basic API as
+implemented by the Linux kernel and glibc 2.1.
+The interface
+is based on the BSD sockets interface; see
+.BR socket (7).
+.PP
+The IPv6 API aims to be mostly compatible with the
+IPv4 API (see
+.BR ip (7)).
+Only differences are described in this man page.
+.PP
+To bind an
+.B AF_INET6
+socket to any process, the local address should be copied from the
+.I in6addr_any
+variable which has
+.I in6_addr
+type.
+In static initializations,
+.B IN6ADDR_ANY_INIT
+may also be used, which expands to a constant expression.
+Both of them are in network byte order.
+.PP
+The IPv6 loopback address (::1) is available in the global
+.I in6addr_loopback
+variable.
+For initializations,
+.B IN6ADDR_LOOPBACK_INIT
+should be used.
+.PP
+IPv4 connections can be handled with the v6 API by using the
+v4-mapped-on-v6 address type;
+thus a program needs to support only this API type to
+support both protocols.
+This is handled transparently by the address
+handling functions in the C library.
+.PP
+IPv4 and IPv6 share the local port space.
+When you get an IPv4 connection
+or packet to an IPv6 socket,
+its source address will be mapped to v6.
+.SS Address format
+.in +4n
+.EX
+struct sockaddr_in6 {
+ sa_family_t sin6_family; /* AF_INET6 */
+ in_port_t sin6_port; /* port number */
+ uint32_t sin6_flowinfo; /* IPv6 flow information */
+ struct in6_addr sin6_addr; /* IPv6 address */
+ uint32_t sin6_scope_id; /* Scope ID (new in Linux 2.4) */
+};
+\&
+struct in6_addr {
+ unsigned char s6_addr[16]; /* IPv6 address */
+};
+.EE
+.in
+.PP
+.I sin6_family
+is always set to
+.BR AF_INET6 ;
+.I sin6_port
+is the protocol port (see
+.I sin_port
+in
+.BR ip (7));
+.I sin6_flowinfo
+is the IPv6 flow identifier;
+.I sin6_addr
+is the 128-bit IPv6 address.
+.I sin6_scope_id
+is an ID depending on the scope of the address.
+It is new in Linux 2.4.
+Linux supports it only for link-local addresses, in that case
+.I sin6_scope_id
+contains the interface index (see
+.BR netdevice (7))
+.PP
+IPv6 supports several address types: unicast to address a single
+host, multicast to address a group of hosts,
+anycast to address the nearest member of a group of hosts
+(not implemented in Linux), IPv4-on-IPv6 to
+address an IPv4 host, and other reserved address types.
+.PP
+The address notation for IPv6 is a group of 8 4-digit hexadecimal
+numbers, separated with a \[aq]:\[aq].
+\&"::" stands for a string of 0 bits.
+Special addresses are ::1 for loopback and ::FFFF:<IPv4 address>
+for IPv4-mapped-on-IPv6.
+.PP
+The port space of IPv6 is shared with IPv4.
+.SS Socket options
+IPv6 supports some protocol-specific socket options that can be set with
+.BR setsockopt (2)
+and read with
+.BR getsockopt (2).
+The socket option level for IPv6 is
+.BR IPPROTO_IPV6 .
+A boolean integer flag is zero when it is false, otherwise true.
+.TP
+.B IPV6_ADDRFORM
+Turn an
+.B AF_INET6
+socket into a socket of a different address family.
+Only
+.B AF_INET
+is currently supported for that.
+It is allowed only for IPv6 sockets
+that are connected and bound to a v4-mapped-on-v6 address.
+The argument is a pointer to an integer containing
+.BR AF_INET .
+This is useful to pass v4-mapped sockets as file descriptors to
+programs that don't know how to deal with the IPv6 API.
+.TP
+.B IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP
+Control membership in multicast groups.
+Argument is a pointer to a
+.IR "struct ipv6_mreq" .
+.TP
+.B IPV6_MTU
+.BR getsockopt ():
+Retrieve the current known path MTU of the current socket.
+Valid only when the socket has been connected.
+Returns an integer.
+.IP
+.BR setsockopt ():
+Set the MTU to be used for the socket.
+The MTU is limited by the device
+MTU or the path MTU when path MTU discovery is enabled.
+Argument is a pointer to integer.
+.TP
+.B IPV6_MTU_DISCOVER
+Control path-MTU discovery on the socket.
+See
+.B IP_MTU_DISCOVER
+in
+.BR ip (7)
+for details.
+.TP
+.B IPV6_MULTICAST_HOPS
+Set the multicast hop limit for the socket.
+Argument is a pointer to an
+integer.
+\-1 in the value means use the route default, otherwise it should be
+between 0 and 255.
+.TP
+.B IPV6_MULTICAST_IF
+Set the device for outgoing multicast packets on the socket.
+This is allowed only for
+.B SOCK_DGRAM
+and
+.B SOCK_RAW
+socket.
+The argument is a pointer to an interface index (see
+.BR netdevice (7))
+in an integer.
+.TP
+.B IPV6_MULTICAST_LOOP
+Control whether the socket sees multicast packets that it has send itself.
+Argument is a pointer to boolean.
+.TP
+.BR IPV6_RECVPKTINFO " (since Linux 2.6.14)"
+Set delivery of the
+.B IPV6_PKTINFO
+control message on incoming datagrams.
+Such control messages contain a
+.IR "struct in6_pktinfo" ,
+as per RFC 3542.
+Allowed only for
+.B SOCK_DGRAM
+or
+.B SOCK_RAW
+sockets.
+Argument is a pointer to a boolean value in an integer.
+.TP
+.B \%IPV6_RTHDR, \%IPV6_AUTHHDR, \%IPV6_DSTOPTS, \%IPV6_HOPOPTS, \
+\%IPV6_FLOWINFO, \%IPV6_HOPLIMIT
+Set delivery of control messages for incoming datagrams containing
+extension headers from the received packet.
+.B IPV6_RTHDR
+delivers the routing header,
+.B IPV6_AUTHHDR
+delivers the authentication header,
+.B IPV6_DSTOPTS
+delivers the destination options,
+.B IPV6_HOPOPTS
+delivers the hop options,
+.B IPV6_FLOWINFO
+delivers an integer containing the flow ID,
+.B IPV6_HOPLIMIT
+delivers an integer containing the hop count of the packet.
+The control messages have the same type as the socket option.
+All these header options can also be set for outgoing packets
+by putting the appropriate control message into the control buffer of
+.BR sendmsg (2).
+Allowed only for
+.B SOCK_DGRAM
+or
+.B SOCK_RAW
+sockets.
+Argument is a pointer to a boolean value.
+.TP
+.B IPV6_RECVERR
+Control receiving of asynchronous error options.
+See
+.B IP_RECVERR
+in
+.BR ip (7)
+for details.
+Argument is a pointer to boolean.
+.TP
+.B IPV6_ROUTER_ALERT
+Pass forwarded packets containing a router alert hop-by-hop option to
+this socket.
+Allowed only for
+.B SOCK_RAW
+sockets.
+The tapped packets are not forwarded by the kernel, it is the
+user's responsibility to send them out again.
+Argument is a pointer to an integer.
+A positive integer indicates a router alert option value to intercept.
+Packets carrying a router alert option with a value field containing
+this integer will be delivered to the socket.
+A negative integer disables delivery of packets with router alert options
+to this socket.
+.TP
+.B IPV6_UNICAST_HOPS
+Set the unicast hop limit for the socket.
+Argument is a pointer to an integer.
+\-1 in the value means use the route default,
+otherwise it should be between 0 and 255.
+.TP
+.BR IPV6_V6ONLY " (since Linux 2.4.21 and 2.6)"
+.\" See RFC 3493
+If this flag is set to true (nonzero), then the socket is restricted
+to sending and receiving IPv6 packets only.
+In this case, an IPv4 and an IPv6 application can bind
+to a single port at the same time.
+.IP
+If this flag is set to false (zero),
+then the socket can be used to send and receive packets
+to and from an IPv6 address or an IPv4-mapped IPv6 address.
+.IP
+The argument is a pointer to a boolean value in an integer.
+.IP
+The default value for this flag is defined by the contents of the file
+.IR /proc/sys/net/ipv6/bindv6only .
+The default value for that file is 0 (false).
+.\" FLOWLABEL_MGR, FLOWINFO_SEND
+.SH ERRORS
+.TP
+.B ENODEV
+The user tried to
+.BR bind (2)
+to a link-local IPv6 address, but the
+.I sin6_scope_id
+in the supplied
+.I sockaddr_in6
+structure is not a valid
+interface index.
+.SH VERSIONS
+Linux 2.4 will break binary compatibility for the
+.I sockaddr_in6
+for 64-bit
+hosts by changing the alignment of
+.I in6_addr
+and adding an additional
+.I sin6_scope_id
+field.
+The kernel interfaces stay compatible, but a program including
+.I sockaddr_in6
+or
+.I in6_addr
+into other structures may not be.
+This is not
+a problem for 32-bit hosts like i386.
+.PP
+The
+.I sin6_flowinfo
+field is new in Linux 2.4.
+It is transparently passed/read by the kernel
+when the passed address length contains it.
+Some programs that pass a longer address buffer and then
+check the outgoing address length may break.
+.SH NOTES
+The
+.I sockaddr_in6
+structure is bigger than the generic
+.IR sockaddr .
+Programs that assume that all address types can be stored safely in a
+.I struct sockaddr
+need to be changed to use
+.I struct sockaddr_storage
+for that instead.
+.PP
+.BR SOL_IP ,
+.BR SOL_IPV6 ,
+.BR SOL_ICMPV6 ,
+and other
+.B SOL_*
+socket options are nonportable variants of
+.BR IPPROTO_* .
+See also
+.BR ip (7).
+.SH BUGS
+The IPv6 extended API as in RFC\ 2292 is currently only partly
+implemented;
+although the 2.2 kernel has near complete support for receiving options,
+the macros for generating IPv6 options are missing in glibc 2.1.
+.PP
+IPSec support for EH and AH headers is missing.
+.PP
+Flow label management is not complete and not documented here.
+.PP
+This man page is not complete.
+.SH SEE ALSO
+.BR cmsg (3),
+.BR ip (7)
+.PP
+RFC\ 2553: IPv6 BASIC API;
+Linux tries to be compliant to this.
+RFC\ 2460: IPv6 specification.
diff --git a/man7/iso-8859-1.7 b/man7/iso-8859-1.7
new file mode 100644
index 0000000..1969dfb
--- /dev/null
+++ b/man7/iso-8859-1.7
@@ -0,0 +1 @@
+.so man7/iso_8859-1.7
diff --git a/man7/iso-8859-10.7 b/man7/iso-8859-10.7
new file mode 100644
index 0000000..9b4658f
--- /dev/null
+++ b/man7/iso-8859-10.7
@@ -0,0 +1 @@
+.so man7/iso_8859-10.7
diff --git a/man7/iso-8859-11.7 b/man7/iso-8859-11.7
new file mode 100644
index 0000000..cbd4cfe
--- /dev/null
+++ b/man7/iso-8859-11.7
@@ -0,0 +1 @@
+.so man7/iso_8859-11.7
diff --git a/man7/iso-8859-13.7 b/man7/iso-8859-13.7
new file mode 100644
index 0000000..8ad2335
--- /dev/null
+++ b/man7/iso-8859-13.7
@@ -0,0 +1 @@
+.so man7/iso_8859-13.7
diff --git a/man7/iso-8859-14.7 b/man7/iso-8859-14.7
new file mode 100644
index 0000000..4aa555d
--- /dev/null
+++ b/man7/iso-8859-14.7
@@ -0,0 +1 @@
+.so man7/iso_8859-14.7
diff --git a/man7/iso-8859-15.7 b/man7/iso-8859-15.7
new file mode 100644
index 0000000..a4095d7
--- /dev/null
+++ b/man7/iso-8859-15.7
@@ -0,0 +1 @@
+.so man7/iso_8859-15.7
diff --git a/man7/iso-8859-16.7 b/man7/iso-8859-16.7
new file mode 100644
index 0000000..b9c8e91
--- /dev/null
+++ b/man7/iso-8859-16.7
@@ -0,0 +1 @@
+.so man7/iso_8859-16.7
diff --git a/man7/iso-8859-2.7 b/man7/iso-8859-2.7
new file mode 100644
index 0000000..da36668
--- /dev/null
+++ b/man7/iso-8859-2.7
@@ -0,0 +1 @@
+.so man7/iso_8859-2.7
diff --git a/man7/iso-8859-3.7 b/man7/iso-8859-3.7
new file mode 100644
index 0000000..75e42ce
--- /dev/null
+++ b/man7/iso-8859-3.7
@@ -0,0 +1 @@
+.so man7/iso_8859-3.7
diff --git a/man7/iso-8859-4.7 b/man7/iso-8859-4.7
new file mode 100644
index 0000000..15a829e
--- /dev/null
+++ b/man7/iso-8859-4.7
@@ -0,0 +1 @@
+.so man7/iso_8859-4.7
diff --git a/man7/iso-8859-5.7 b/man7/iso-8859-5.7
new file mode 100644
index 0000000..1f20320
--- /dev/null
+++ b/man7/iso-8859-5.7
@@ -0,0 +1 @@
+.so man7/iso_8859-5.7
diff --git a/man7/iso-8859-6.7 b/man7/iso-8859-6.7
new file mode 100644
index 0000000..edcafdf
--- /dev/null
+++ b/man7/iso-8859-6.7
@@ -0,0 +1 @@
+.so man7/iso_8859-6.7
diff --git a/man7/iso-8859-7.7 b/man7/iso-8859-7.7
new file mode 100644
index 0000000..951384c
--- /dev/null
+++ b/man7/iso-8859-7.7
@@ -0,0 +1 @@
+.so man7/iso_8859-7.7
diff --git a/man7/iso-8859-8.7 b/man7/iso-8859-8.7
new file mode 100644
index 0000000..07cf216
--- /dev/null
+++ b/man7/iso-8859-8.7
@@ -0,0 +1 @@
+.so man7/iso_8859-8.7
diff --git a/man7/iso-8859-9.7 b/man7/iso-8859-9.7
new file mode 100644
index 0000000..0fcc7d4
--- /dev/null
+++ b/man7/iso-8859-9.7
@@ -0,0 +1 @@
+.so man7/iso_8859-9.7
diff --git a/man7/iso_8859-1.7 b/man7/iso_8859-1.7
new file mode 100644
index 0000000..7534a85
--- /dev/null
+++ b/man7/iso_8859-1.7
@@ -0,0 +1,150 @@
+'\" t
+.\" Copyright 1993-1995 Daniel Quinlan (quinlan@yggdrasil.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" Slightly rearranged, aeb, 950713
+.\" Updated, dpo, 990531
+.TH ISO_8859-1 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-1 \- ISO 8859-1 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-1 encodes the
+characters used in many West European languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-1 characters
+The following table displays the characters in ISO 8859-1 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 ¡ INVERTED EXCLAMATION MARK
+242 162 A2 ¢ CENT SIGN
+243 163 A3 £ POUND SIGN
+244 164 A4 ¤ CURRENCY SIGN
+245 165 A5 ¥ YEN SIGN
+246 166 A6 ¦ BROKEN BAR
+247 167 A7 § SECTION SIGN
+250 168 A8 ¨ DIAERESIS
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA ª FEMININE ORDINAL INDICATOR
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC ¬ NOT SIGN
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ® REGISTERED SIGN
+257 175 AF ¯ MACRON
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 ² SUPERSCRIPT TWO
+263 179 B3 ³ SUPERSCRIPT THREE
+264 180 B4 ´ ACUTE ACCENT
+265 181 B5 µ MICRO SIGN
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 · MIDDLE DOT
+270 184 B8 ¸ CEDILLA
+271 185 B9 ¹ SUPERSCRIPT ONE
+272 186 BA º MASCULINE ORDINAL INDICATOR
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC ¼ VULGAR FRACTION ONE QUARTER
+275 189 BD ½ VULGAR FRACTION ONE HALF
+276 190 BE ¾ VULGAR FRACTION THREE QUARTERS
+277 191 BF ¿ INVERTED QUESTION MARK
+300 192 C0 À LATIN CAPITAL LETTER A WITH GRAVE
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ã LATIN CAPITAL LETTER A WITH TILDE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Å LATIN CAPITAL LETTER A WITH RING ABOVE
+306 198 C6 Æ LATIN CAPITAL LETTER AE
+307 199 C7 Ç LATIN CAPITAL LETTER C WITH CEDILLA
+310 200 C8 È LATIN CAPITAL LETTER E WITH GRAVE
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ì LATIN CAPITAL LETTER I WITH GRAVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ï LATIN CAPITAL LETTER I WITH DIAERESIS
+320 208 D0 Ð LATIN CAPITAL LETTER ETH
+321 209 D1 Ñ LATIN CAPITAL LETTER N WITH TILDE
+322 210 D2 Ò LATIN CAPITAL LETTER O WITH GRAVE
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Õ LATIN CAPITAL LETTER O WITH TILDE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 × MULTIPLICATION SIGN
+330 216 D8 Ø LATIN CAPITAL LETTER O WITH STROKE
+331 217 D9 Ù LATIN CAPITAL LETTER U WITH GRAVE
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ý LATIN CAPITAL LETTER Y WITH ACUTE
+336 222 DE Þ LATIN CAPITAL LETTER THORN
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 à LATIN SMALL LETTER A WITH GRAVE
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ã LATIN SMALL LETTER A WITH TILDE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 å LATIN SMALL LETTER A WITH RING ABOVE
+346 230 E6 æ LATIN SMALL LETTER AE
+347 231 E7 ç LATIN SMALL LETTER C WITH CEDILLA
+350 232 E8 è LATIN SMALL LETTER E WITH GRAVE
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ê LATIN SMALL LETTER E WITH CIRCUMFLEX
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ì LATIN SMALL LETTER I WITH GRAVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ï LATIN SMALL LETTER I WITH DIAERESIS
+360 240 F0 ð LATIN SMALL LETTER ETH
+361 241 F1 ñ LATIN SMALL LETTER N WITH TILDE
+362 242 F2 ò LATIN SMALL LETTER O WITH GRAVE
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 õ LATIN SMALL LETTER O WITH TILDE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ÷ DIVISION SIGN
+370 248 F8 ø LATIN SMALL LETTER O WITH STROKE
+371 249 F9 ù LATIN SMALL LETTER U WITH GRAVE
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ý LATIN SMALL LETTER Y WITH ACUTE
+376 254 FE þ LATIN SMALL LETTER THORN
+377 255 FF ÿ LATIN SMALL LETTER Y WITH DIAERESIS
+.TE
+.SH NOTES
+ISO 8859-1 is also known as Latin-1.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR cp1252 (7),
+.BR iso_8859\-15 (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-10.7 b/man7/iso_8859-10.7
new file mode 100644
index 0000000..d43a00a
--- /dev/null
+++ b/man7/iso_8859-10.7
@@ -0,0 +1,146 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-10 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-10 \- ISO 8859-10 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-10 encodes the
+characters used in Nordic languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-10 characters
+The following table displays the characters in ISO 8859-10 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 Ą LATIN CAPITAL LETTER A WITH OGONEK
+242 162 A2 Ē LATIN CAPITAL LETTER E WITH MACRON
+243 163 A3 Ģ LATIN CAPITAL LETTER G WITH CEDILLA
+244 164 A4 Ī LATIN CAPITAL LETTER I WITH MACRON
+245 165 A5 Ĩ LATIN CAPITAL LETTER I WITH TILDE
+246 166 A6 Ķ LATIN CAPITAL LETTER K WITH CEDILLA
+247 167 A7 § SECTION SIGN
+250 168 A8 Ļ LATIN CAPITAL LETTER L WITH CEDILLA
+251 169 A9 Đ LATIN CAPITAL LETTER D WITH STROKE
+252 170 AA Š LATIN CAPITAL LETTER S WITH CARON
+253 171 AB Ŧ LATIN CAPITAL LETTER T WITH STROKE
+254 172 AC Ž LATIN CAPITAL LETTER Z WITH CARON
+255 173 AD ­ SOFT HYPHEN
+256 174 AE Ū LATIN CAPITAL LETTER U WITH MACRON
+257 175 AF Ŋ LATIN CAPITAL LETTER ENG
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ą LATIN SMALL LETTER A WITH OGONEK
+262 178 B2 ē LATIN SMALL LETTER E WITH MACRON
+263 179 B3 ģ LATIN SMALL LETTER G WITH CEDILLA
+264 180 B4 ī LATIN SMALL LETTER I WITH MACRON
+265 181 B5 ĩ LATIN SMALL LETTER I WITH TILDE
+266 182 B6 ķ LATIN SMALL LETTER K WITH CEDILLA
+267 183 B7 · MIDDLE DOT
+270 184 B8 ļ LATIN SMALL LETTER L WITH CEDILLA
+271 185 B9 đ LATIN SMALL LETTER D WITH STROKE
+272 186 BA š LATIN SMALL LETTER S WITH CARON
+273 187 BB ŧ LATIN SMALL LETTER T WITH STROKE
+274 188 BC ž LATIN SMALL LETTER Z WITH CARON
+275 189 BD ― HORIZONTAL BAR
+276 190 BE ū LATIN SMALL LETTER U WITH MACRON
+277 191 BF ŋ LATIN SMALL LETTER ENG
+300 192 C0 Ā LATIN CAPITAL LETTER A WITH MACRON
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ã LATIN CAPITAL LETTER A WITH TILDE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Å LATIN CAPITAL LETTER A WITH RING ABOVE
+306 198 C6 Æ LATIN CAPITAL LETTER AE
+307 199 C7 Į LATIN CAPITAL LETTER I WITH OGONEK
+310 200 C8 Č LATIN CAPITAL LETTER C WITH CARON
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ę LATIN CAPITAL LETTER E WITH OGONEK
+312 202 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ė LATIN CAPITAL LETTER E WITH DOT ABOVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ï LATIN CAPITAL LETTER I WITH DIAERESIS
+320 208 D0 Ð LATIN CAPITAL LETTER ETH
+321 209 D1 Ņ LATIN CAPITAL LETTER N WITH CEDILLA
+322 210 D2 Ō LATIN CAPITAL LETTER O WITH MACRON
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Õ LATIN CAPITAL LETTER O WITH TILDE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 Ũ LATIN CAPITAL LETTER U WITH TILDE
+330 216 D8 Ø LATIN CAPITAL LETTER O WITH STROKE
+331 217 D9 Ų LATIN CAPITAL LETTER U WITH OGONEK
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ý LATIN CAPITAL LETTER Y WITH ACUTE
+336 222 DE Þ LATIN CAPITAL LETTER THORN
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 ā LATIN SMALL LETTER A WITH MACRON
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ã LATIN SMALL LETTER A WITH TILDE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 å LATIN SMALL LETTER A WITH RING ABOVE
+346 230 E6 æ LATIN SMALL LETTER AE
+347 231 E7 į LATIN SMALL LETTER I WITH OGONEK
+350 232 E8 č LATIN SMALL LETTER C WITH CARON
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ę LATIN SMALL LETTER E WITH OGONEK
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ė LATIN SMALL LETTER E WITH DOT ABOVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ï LATIN SMALL LETTER I WITH DIAERESIS
+360 240 F0 ð LATIN SMALL LETTER ETH
+361 241 F1 ņ LATIN SMALL LETTER N WITH CEDILLA
+362 242 F2 ō LATIN SMALL LETTER O WITH MACRON
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 õ LATIN SMALL LETTER O WITH TILDE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ũ LATIN SMALL LETTER U WITH TILDE
+370 248 F8 ø LATIN SMALL LETTER O WITH STROKE
+371 249 F9 ų LATIN SMALL LETTER U WITH OGONEK
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ý LATIN SMALL LETTER Y WITH ACUTE
+376 254 FE þ LATIN SMALL LETTER THORN
+377 255 FF ĸ LATIN SMALL LETTER KRA
+.TE
+.SH NOTES
+ISO 8859-10 is also known as Latin-6.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-11.7 b/man7/iso_8859-11.7
new file mode 100644
index 0000000..e488606
--- /dev/null
+++ b/man7/iso_8859-11.7
@@ -0,0 +1,143 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis <edimitro at tee.gr>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\"Thanomsub Noppaburana <donga.nb@gmail.com> made valuable suggestions.
+.\"
+.TH ISO_8859-11 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-11 \- ISO 8859-11 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-11 encodes the
+characters used in the Thai language.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-11 characters
+The following table displays the characters in ISO 8859-11 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 ก THAI CHARACTER KO KAI
+242 162 A2 ข THAI CHARACTER KHO KHAI
+243 163 A3 ฃ THAI CHARACTER KHO KHUAT
+244 164 A4 ค THAI CHARACTER KHO KHWAI
+245 165 A5 ฅ THAI CHARACTER KHO KHON
+246 166 A6 ฆ THAI CHARACTER KHO RAKHANG
+247 167 A7 ง THAI CHARACTER NGO NGU
+250 168 A8 จ THAI CHARACTER CHO CHAN
+251 169 A9 ฉ THAI CHARACTER CHO CHING
+252 170 AA ช THAI CHARACTER CHO CHANG
+253 171 AB ซ THAI CHARACTER SO SO
+254 172 AC ฌ THAI CHARACTER CHO CHOE
+255 173 AD ญ THAI CHARACTER YO YING
+256 174 AE ฎ THAI CHARACTER DO CHADA
+257 175 AF ฏ THAI CHARACTER TO PATAK
+260 176 B0 ฐ THAI CHARACTER THO THAN
+261 177 B1 ฑ THAI CHARACTER THO NANGMONTHO
+262 178 B2 ฒ THAI CHARACTER THO PHUTHAO
+263 179 B3 ณ THAI CHARACTER NO NEN
+264 180 B4 ด THAI CHARACTER DO DEK
+265 181 B5 ต THAI CHARACTER TO TAO
+266 182 B6 ถ THAI CHARACTER THO THUNG
+267 183 B7 ท THAI CHARACTER THO THAHAN
+270 184 B8 ธ THAI CHARACTER THO THONG
+271 185 B9 น THAI CHARACTER NO NU
+272 186 BA บ THAI CHARACTER BO BAIMAI
+273 187 BB ป THAI CHARACTER PO PLA
+274 188 BC ผ THAI CHARACTER PHO PHUNG
+275 189 BD ฝ THAI CHARACTER FO FA
+276 190 BE พ THAI CHARACTER PHO PHAN
+277 191 BF ฟ THAI CHARACTER FO FAN
+300 192 C0 ภ THAI CHARACTER PHO SAMPHAO
+301 193 C1 ม THAI CHARACTER MO MA
+302 194 C2 ย THAI CHARACTER YO YAK
+303 195 C3 ร THAI CHARACTER RO RUA
+304 196 C4 ฤ THAI CHARACTER RU
+305 197 C5 ล THAI CHARACTER LO LING
+306 198 C6 ฦ THAI CHARACTER LU
+307 199 C7 ว THAI CHARACTER WO WAEN
+310 200 C8 ศ THAI CHARACTER SO SALA
+311 201 C9 ษ THAI CHARACTER SO RUSI
+312 202 CA ส THAI CHARACTER SO SUA
+313 203 CB ห THAI CHARACTER HO HIP
+314 204 CC ฬ THAI CHARACTER LO CHULA
+315 205 CD อ THAI CHARACTER O ANG
+316 206 CE ฮ THAI CHARACTER HO NOKHUK
+317 207 CF ฯ THAI CHARACTER PAIYANNOI
+320 208 D0 ะ THAI CHARACTER SARA A
+321 209 D1 ั THAI CHARACTER MAI HAN-AKAT
+322 210 D2 า THAI CHARACTER SARA AA
+323 211 D3 ำ THAI CHARACTER SARA AM
+324 212 D4 ิ THAI CHARACTER SARA I
+325 213 D5 ี THAI CHARACTER SARA II
+326 214 D6 ึ THAI CHARACTER SARA UE
+327 215 D7 ื THAI CHARACTER SARA UEE
+330 216 D8 ุ THAI CHARACTER SARA U
+331 217 D9 ู THAI CHARACTER SARA UU
+332 218 DA ฺ THAI CHARACTER PHINTHU
+337 223 DF ฿ THAI CURRENCY SYMBOL BAHT
+340 224 E0 เ THAI CHARACTER SARA E
+341 225 E1 แ THAI CHARACTER SARA AE
+342 226 E2 โ THAI CHARACTER SARA O
+343 227 E3 ใ THAI CHARACTER SARA AI MAIMUAN
+344 228 E4 ไ THAI CHARACTER SARA AI MAIMALAI
+345 229 E5 ๅ THAI CHARACTER LAKKHANGYAO
+346 230 E6 ๆ THAI CHARACTER MAIYAMOK
+347 231 E7 ็ THAI CHARACTER MAITAIKHU
+350 232 E8 ่ THAI CHARACTER MAI EK
+351 233 E9 ้ THAI CHARACTER MAI THO
+352 234 EA ๊ THAI CHARACTER MAI TRI
+353 235 EB ๋ THAI CHARACTER MAI CHATTAWA
+354 236 EC ์ THAI CHARACTER THANTHAKHAT
+355 237 ED ํ THAI CHARACTER NIKHAHIT
+356 238 EE ๎ THAI CHARACTER YAMAKKAN
+357 239 EF ๏ THAI CHARACTER FONGMAN
+360 240 F0 ๐ THAI DIGIT ZERO
+361 241 F1 ๑ THAI DIGIT ONE
+362 242 F2 ๒ THAI DIGIT TWO
+363 243 F3 ๓ THAI DIGIT THREE
+364 244 F4 ๔ THAI DIGIT FOUR
+365 245 F5 ๕ THAI DIGIT FIVE
+366 246 F6 ๖ THAI DIGIT SIX
+367 247 F7 ๗ THAI DIGIT SEVEN
+370 248 F8 ๘ THAI DIGIT EIGHT
+371 249 F9 ๙ THAI DIGIT NINE
+372 250 FA ๚ THAI CHARACTER ANGKHANKHU
+373 251 FB ๛ THAI CHARACTER KHOMUT
+.TE
+.SH NOTES
+ISO 8859-11 is the same as TIS (Thai Industrial Standard) 620-2253,
+commonly known as TIS-620, except for the character in position A0:
+ISO 8859-11 defines this as NO-BREAK SPACE,
+while TIS-620 leaves it undefined.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-13.7 b/man7/iso_8859-13.7
new file mode 100644
index 0000000..1158347
--- /dev/null
+++ b/man7/iso_8859-13.7
@@ -0,0 +1,146 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-13 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-13 \- ISO 8859-13 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-13 encodes the
+characters used in Baltic Rim languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-13 characters
+The following table displays the characters in ISO 8859-13 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 ” RIGHT DOUBLE QUOTATION MARK
+242 162 A2 ¢ CENT SIGN
+243 163 A3 £ POUND SIGN
+244 164 A4 ¤ CURRENCY SIGN
+245 165 A5 „ DOUBLE LOW-9 QUOTATION MARK
+246 166 A6 ¦ BROKEN BAR
+247 167 A7 § SECTION SIGN
+250 168 A8 Ø LATIN CAPITAL LETTER O WITH STROKE
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA Ŗ LATIN CAPITAL LETTER R WITH CEDILLA
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC ¬ NOT SIGN
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ® REGISTERED SIGN
+257 175 AF Æ LATIN CAPITAL LETTER AE
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 ² SUPERSCRIPT TWO
+263 179 B3 ³ SUPERSCRIPT THREE
+264 180 B4 “ LEFT DOUBLE QUOTATION MARK
+265 181 B5 µ MICRO SIGN
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 · MIDDLE DOT
+270 184 B8 ø LATIN SMALL LETTER O WITH STROKE
+271 185 B9 ¹ SUPERSCRIPT ONE
+272 186 BA ŗ LATIN SMALL LETTER R WITH CEDILLA
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC ¼ VULGAR FRACTION ONE QUARTER
+275 189 BD ½ VULGAR FRACTION ONE HALF
+276 190 BE ¾ VULGAR FRACTION THREE QUARTERS
+277 191 BF æ LATIN SMALL LETTER AE
+300 192 C0 Ą LATIN CAPITAL LETTER A WITH OGONEK
+301 193 C1 Į LATIN CAPITAL LETTER I WITH OGONEK
+302 194 C2 Ā LATIN CAPITAL LETTER A WITH MACRON
+303 195 C3 Ć LATIN CAPITAL LETTER C WITH ACUTE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Å LATIN CAPITAL LETTER A WITH RING ABOVE
+306 198 C6 Ę LATIN CAPITAL LETTER E WITH OGONEK
+307 199 C7 Ē LATIN CAPITAL LETTER E WITH MACRON
+310 200 C8 Č LATIN CAPITAL LETTER C WITH CARON
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ź LATIN CAPITAL LETTER Z WITH ACUTE
+313 203 CB Ė LATIN CAPITAL LETTER E WITH DOT ABOVE
+314 204 CC Ģ LATIN CAPITAL LETTER G WITH CEDILLA
+315 205 CD Ķ LATIN CAPITAL LETTER K WITH CEDILLA
+316 206 CE Ī LATIN CAPITAL LETTER I WITH MACRON
+317 207 CF Ļ LATIN CAPITAL LETTER L WITH CEDILLA
+320 208 D0 Š LATIN CAPITAL LETTER S WITH CARON
+321 209 D1 Ń LATIN CAPITAL LETTER N WITH ACUTE
+322 210 D2 Ņ LATIN CAPITAL LETTER N WITH CEDILLA
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ō LATIN CAPITAL LETTER O WITH MACRON
+325 213 D5 Õ LATIN CAPITAL LETTER O WITH TILDE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 × MULTIPLICATION SIGN
+330 216 D8 Ų LATIN CAPITAL LETTER U WITH OGONEK
+331 217 D9 Ł LATIN CAPITAL LETTER L WITH STROKE
+332 218 DA Ś LATIN CAPITAL LETTER S WITH ACUTE
+333 219 DB Ū LATIN CAPITAL LETTER U WITH MACRON
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ż LATIN CAPITAL LETTER Z WITH DOT ABOVE
+336 222 DE Ž LATIN CAPITAL LETTER Z WITH CARON
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 ą LATIN SMALL LETTER A WITH OGONEK
+341 225 E1 į LATIN SMALL LETTER I WITH OGONEK
+342 226 E2 ā LATIN SMALL LETTER A WITH MACRON
+343 227 E3 ć LATIN SMALL LETTER C WITH ACUTE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 å LATIN SMALL LETTER A WITH RING ABOVE
+346 230 E6 ę LATIN SMALL LETTER E WITH OGONEK
+347 231 E7 ē LATIN SMALL LETTER E WITH MACRON
+350 232 E8 č LATIN SMALL LETTER C WITH CARON
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ź LATIN SMALL LETTER Z WITH ACUTE
+353 235 EB ė LATIN SMALL LETTER E WITH DOT ABOVE
+354 236 EC ģ LATIN SMALL LETTER G WITH CEDILLA
+355 237 ED ķ LATIN SMALL LETTER K WITH CEDILLA
+356 238 EE ī LATIN SMALL LETTER I WITH MACRON
+357 239 EF ļ LATIN SMALL LETTER L WITH CEDILLA
+360 240 F0 š LATIN SMALL LETTER S WITH CARON
+361 241 F1 ń LATIN SMALL LETTER N WITH ACUTE
+362 242 F2 ņ LATIN SMALL LETTER N WITH CEDILLA
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ō LATIN SMALL LETTER O WITH MACRON
+365 245 F5 õ LATIN SMALL LETTER O WITH TILDE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ÷ DIVISION SIGN
+370 248 F8 ų LATIN SMALL LETTER U WITH OGONEK
+371 249 F9 ł LATIN SMALL LETTER L WITH STROKE
+372 250 FA ś LATIN SMALL LETTER S WITH ACUTE
+373 251 FB ū LATIN SMALL LETTER U WITH MACRON
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ż LATIN SMALL LETTER Z WITH DOT ABOVE
+376 254 FE ž LATIN SMALL LETTER Z WITH CARON
+377 255 FF ’ RIGHT SINGLE QUOTATION MARK
+.TE
+.SH NOTES
+ISO 8859-13 is also known as Latin-7.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-14.7 b/man7/iso_8859-14.7
new file mode 100644
index 0000000..eedac82
--- /dev/null
+++ b/man7/iso_8859-14.7
@@ -0,0 +1,146 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-14 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-14 \- ISO 8859-14 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-14 encodes the
+characters used in Celtic languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-14 characters
+The following table displays the characters in ISO 8859-14 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 Ḃ LATIN CAPITAL LETTER B WITH DOT ABOVE
+242 162 A2 ḃ LATIN SMALL LETTER B WITH DOT ABOVE
+243 163 A3 £ POUND SIGN
+244 164 A4 Ċ LATIN CAPITAL LETTER C WITH DOT ABOVE
+245 165 A5 ċ LATIN SMALL LETTER C WITH DOT ABOVE
+246 166 A6 Ḋ LATIN CAPITAL LETTER D WITH DOT ABOVE
+247 167 A7 § SECTION SIGN
+250 168 A8 Ẁ LATIN CAPITAL LETTER W WITH GRAVE
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA Ẃ LATIN CAPITAL LETTER W WITH ACUTE
+253 171 AB ḋ LATIN SMALL LETTER D WITH DOT ABOVE
+254 172 AC Ỳ LATIN CAPITAL LETTER Y WITH GRAVE
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ® REGISTERED SIGN
+257 175 AF Ÿ LATIN CAPITAL LETTER Y WITH DIAERESIS
+260 176 B0 Ḟ LATIN CAPITAL LETTER F WITH DOT ABOVE
+261 177 B1 ḟ LATIN SMALL LETTER F WITH DOT ABOVE
+262 178 B2 Ġ LATIN CAPITAL LETTER G WITH DOT ABOVE
+263 179 B3 ġ LATIN SMALL LETTER G WITH DOT ABOVE
+264 180 B4 Ṁ LATIN CAPITAL LETTER M WITH DOT ABOVE
+265 181 B5 ṁ LATIN SMALL LETTER M WITH DOT ABOVE
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 Ṗ LATIN CAPITAL LETTER P WITH DOT ABOVE
+270 184 B8 ẁ LATIN SMALL LETTER W WITH GRAVE
+271 185 B9 ṗ LATIN SMALL LETTER P WITH DOT ABOVE
+272 186 BA ẃ LATIN SMALL LETTER W WITH ACUTE
+273 187 BB Ṡ LATIN CAPITAL LETTER S WITH DOT ABOVE
+274 188 BC ỳ LATIN SMALL LETTER Y WITH GRAVE
+275 189 BD Ẅ LATIN CAPITAL LETTER W WITH DIAERESIS
+276 190 BE ẅ LATIN SMALL LETTER W WITH DIAERESIS
+277 191 BF ṡ LATIN SMALL LETTER S WITH DOT ABOVE
+300 192 C0 À LATIN CAPITAL LETTER A WITH GRAVE
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ã LATIN CAPITAL LETTER A WITH TILDE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Å LATIN CAPITAL LETTER A WITH RING ABOVE
+306 198 C6 Æ LATIN CAPITAL LETTER AE
+307 199 C7 Ç LATIN CAPITAL LETTER C WITH CEDILLA
+310 200 C8 È LATIN CAPITAL LETTER E WITH GRAVE
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ì LATIN CAPITAL LETTER I WITH GRAVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ï LATIN CAPITAL LETTER I WITH DIAERESIS
+320 208 D0 Ŵ LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+321 209 D1 Ñ LATIN CAPITAL LETTER N WITH TILDE
+322 210 D2 Ò LATIN CAPITAL LETTER O WITH GRAVE
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Õ LATIN CAPITAL LETTER O WITH TILDE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 Ṫ LATIN CAPITAL LETTER T WITH DOT ABOVE
+330 216 D8 Ø LATIN CAPITAL LETTER O WITH STROKE
+331 217 D9 Ù LATIN CAPITAL LETTER U WITH GRAVE
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ý LATIN CAPITAL LETTER Y WITH ACUTE
+336 222 DE Ŷ LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 à LATIN SMALL LETTER A WITH GRAVE
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ã LATIN SMALL LETTER A WITH TILDE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 å LATIN SMALL LETTER A WITH RING ABOVE
+346 230 E6 æ LATIN SMALL LETTER AE
+347 231 E7 ç LATIN SMALL LETTER C WITH CEDILLA
+350 232 E8 è LATIN SMALL LETTER E WITH GRAVE
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ê LATIN SMALL LETTER E WITH CIRCUMFLEX
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ì LATIN SMALL LETTER I WITH GRAVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ï LATIN SMALL LETTER I WITH DIAERESIS
+360 240 F0 ŵ LATIN SMALL LETTER W WITH CIRCUMFLEX
+361 241 F1 ñ LATIN SMALL LETTER N WITH TILDE
+362 242 F2 ò LATIN SMALL LETTER O WITH GRAVE
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 õ LATIN SMALL LETTER O WITH TILDE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ṫ LATIN SMALL LETTER T WITH DOT ABOVE
+370 248 F8 ø LATIN SMALL LETTER O WITH STROKE
+371 249 F9 ù LATIN SMALL LETTER U WITH GRAVE
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ý LATIN SMALL LETTER Y WITH ACUTE
+376 254 FE ŷ LATIN SMALL LETTER Y WITH CIRCUMFLEX
+377 255 FF ÿ LATIN SMALL LETTER Y WITH DIAERESIS
+.TE
+.SH NOTES
+ISO 8859-14 is also known as Latin-8.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-15.7 b/man7/iso_8859-15.7
new file mode 100644
index 0000000..908bb9b
--- /dev/null
+++ b/man7/iso_8859-15.7
@@ -0,0 +1,149 @@
+'\" t
+.\" Copyright 1993-1995 Daniel Quinlan (quinlan@yggdrasil.com)
+.\" Copyright 1999 Dimitri Papadopoulos (dpo@club-internet.fr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-15 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-15 \- ISO 8859-15 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-15 encodes the
+characters used in many West European languages and adds the Euro sign.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-15 characters
+The following table displays the characters in ISO 8859-15 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 ¡ INVERTED EXCLAMATION MARK
+242 162 A2 ¢ CENT SIGN
+243 163 A3 £ POUND SIGN
+244 164 A4 € EURO SIGN
+245 165 A5 ¥ YEN SIGN
+246 166 A6 Š LATIN CAPITAL LETTER S WITH CARON
+247 167 A7 § SECTION SIGN
+250 168 A8 š LATIN SMALL LETTER S WITH CARON
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA ª FEMININE ORDINAL INDICATOR
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC ¬ NOT SIGN
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ® REGISTERED SIGN
+257 175 AF ¯ MACRON
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 ² SUPERSCRIPT TWO
+263 179 B3 ³ SUPERSCRIPT THREE
+264 180 B4 Ž LATIN CAPITAL LETTER Z WITH CARON
+265 181 B5 µ MICRO SIGN
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 · MIDDLE DOT
+270 184 B8 ž LATIN SMALL LETTER Z WITH CARON
+271 185 B9 ¹ SUPERSCRIPT ONE
+272 186 BA º MASCULINE ORDINAL INDICATOR
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC ΠLATIN CAPITAL LIGATURE OE
+275 189 BD œ LATIN SMALL LIGATURE OE
+276 190 BE Ÿ LATIN CAPITAL LETTER Y WITH DIAERESIS
+277 191 BF ¿ INVERTED QUESTION MARK
+300 192 C0 À LATIN CAPITAL LETTER A WITH GRAVE
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ã LATIN CAPITAL LETTER A WITH TILDE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Å LATIN CAPITAL LETTER A WITH RING ABOVE
+306 198 C6 Æ LATIN CAPITAL LETTER AE
+307 199 C7 Ç LATIN CAPITAL LETTER C WITH CEDILLA
+310 200 C8 È LATIN CAPITAL LETTER E WITH GRAVE
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ì LATIN CAPITAL LETTER I WITH GRAVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ï LATIN CAPITAL LETTER I WITH DIAERESIS
+320 208 D0 Ð LATIN CAPITAL LETTER ETH
+321 209 D1 Ñ LATIN CAPITAL LETTER N WITH TILDE
+322 210 D2 Ò LATIN CAPITAL LETTER O WITH GRAVE
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Õ LATIN CAPITAL LETTER O WITH TILDE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 × MULTIPLICATION SIGN
+330 216 D8 Ø LATIN CAPITAL LETTER O WITH STROKE
+331 217 D9 Ù LATIN CAPITAL LETTER U WITH GRAVE
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ý LATIN CAPITAL LETTER Y WITH ACUTE
+336 222 DE Þ LATIN CAPITAL LETTER THORN
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 à LATIN SMALL LETTER A WITH GRAVE
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ã LATIN SMALL LETTER A WITH TILDE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 å LATIN SMALL LETTER A WITH RING ABOVE
+346 230 E6 æ LATIN SMALL LETTER AE
+347 231 E7 ç LATIN SMALL LETTER C WITH CEDILLA
+350 232 E8 è LATIN SMALL LETTER E WITH GRAVE
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ê LATIN SMALL LETTER E WITH CIRCUMFLEX
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ì LATIN SMALL LETTER I WITH GRAVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ï LATIN SMALL LETTER I WITH DIAERESIS
+360 240 F0 ð LATIN SMALL LETTER ETH
+361 241 F1 ñ LATIN SMALL LETTER N WITH TILDE
+362 242 F2 ò LATIN SMALL LETTER O WITH GRAVE
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 õ LATIN SMALL LETTER O WITH TILDE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ÷ DIVISION SIGN
+370 248 F8 ø LATIN SMALL LETTER O WITH STROKE
+371 249 F9 ù LATIN SMALL LETTER U WITH GRAVE
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ý LATIN SMALL LETTER Y WITH ACUTE
+376 254 FE þ LATIN SMALL LETTER THORN
+377 255 FF ÿ LATIN SMALL LETTER Y WITH DIAERESIS
+.TE
+.SH NOTES
+ISO 8859-15 is also known as Latin-9 (or sometimes as Latin-0).
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR cp1252 (7),
+.BR iso_8859\-1 (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-16.7 b/man7/iso_8859-16.7
new file mode 100644
index 0000000..697a3f9
--- /dev/null
+++ b/man7/iso_8859-16.7
@@ -0,0 +1,147 @@
+'\" t
+.\" Copyright 2002 Ionel Mugurel Ciobîcă (IMCiobica@netscape.net)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-16 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-16 \- ISO 8859-16 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-16 encodes the
+Latin characters used in Southeast European languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-16 characters
+The following table displays the characters in ISO 8859-16 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 Ą LATIN CAPITAL LETTER A WITH OGONEK
+242 162 A2 ą LATIN SMALL LETTER A WITH OGONEK
+243 163 A3 Ł LATIN CAPITAL LETTER L WITH STROKE
+244 164 A4 € EURO SIGN
+245 165 A5 „ DOUBLE LOW-9 QUOTATION MARK
+246 166 A6 Š LATIN CAPITAL LETTER S WITH CARON
+247 167 A7 § SECTION SIGN
+250 168 A8 š LATIN SMALL LETTER S WITH CARON
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA Ș LATIN CAPITAL LETTER S WITH COMMA BELOW
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC Ź LATIN CAPITAL LETTER Z WITH ACUTE
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ź LATIN SMALL LETTER Z WITH ACUTE
+257 175 AF Ż LATIN CAPITAL LETTER Z WITH DOT ABOVE
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 Č LATIN CAPITAL LETTER C WITH CARON
+263 179 B3 ł LATIN SMALL LETTER L WITH STROKE
+264 180 B4 Ž LATIN CAPITAL LETTER Z WITH CARON
+265 181 B5 ” LEFT DOUBLE QUOTATION MARK
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 · MIDDLE DOT
+270 184 B8 ž LATIN SMALL LETTER Z WITH CARON
+271 185 B9 č LATIN SMALL LETTER C WITH CARON
+272 186 BA ș LATIN SMALL LETTER S WITH COMMA BELOW
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC ΠLATIN CAPITAL LIGATURE OE
+275 189 BD œ LATIN SMALL LIGATURE OE
+276 190 BE Ÿ LATIN CAPITAL LETTER Y WITH DIAERESIS
+277 191 BF ż LATIN SMALL LETTER Z WITH DOT ABOVE
+300 192 C0 À LATIN CAPITAL LETTER A WITH GRAVE
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ă LATIN CAPITAL LETTER A WITH BREVE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Ć LATIN CAPITAL LETTER C WITH ACUTE
+306 198 C6 Æ LATIN CAPITAL LETTER AE
+307 199 C7 Ç LATIN CAPITAL LETTER C WITH CEDILLA
+310 200 C8 È LATIN CAPITAL LETTER E WITH GRAVE
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ì LATIN CAPITAL LETTER I WITH GRAVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ï LATIN CAPITAL LETTER I WITH DIAERESIS
+320 208 D0 Đ LATIN CAPITAL LETTER D WITH STROKE
+321 209 D1 Ń LATIN CAPITAL LETTER N WITH ACUTE
+322 210 D2 Ò LATIN CAPITAL LETTER O WITH GRAVE
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Ő LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 Ś LATIN CAPITAL LETTER S WITH ACUTE
+330 216 D8 Ű LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+331 217 D9 Ù LATIN CAPITAL LETTER U WITH GRAVE
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ę LATIN CAPITAL LETTER E WITH OGONEK
+336 222 DE Ț LATIN CAPITAL LETTER T WITH COMMA BELOW
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 à LATIN SMALL LETTER A WITH GRAVE
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ă LATIN SMALL LETTER A WITH BREVE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 ć LATIN SMALL LETTER C WITH ACUTE
+346 230 E6 æ LATIN SMALL LETTER AE
+347 231 E7 ç LATIN SMALL LETTER C WITH CEDILLA
+350 232 E8 è LATIN SMALL LETTER E WITH GRAVE
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ê LATIN SMALL LETTER E WITH CIRCUMFLEX
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ì LATIN SMALL LETTER I WITH GRAVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ï LATIN SMALL LETTER I WITH DIAERESIS
+360 240 F0 đ LATIN SMALL LETTER D WITH STROKE
+361 241 F1 ń LATIN SMALL LETTER N WITH ACUTE
+362 242 F2 ò LATIN SMALL LETTER O WITH GRAVE
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 ő LATIN SMALL LETTER O WITH DOUBLE ACUTE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ś LATIN SMALL LETTER S WITH ACUTE
+370 248 F8 ű LATIN SMALL LETTER U WITH DOUBLE ACUTE
+371 249 F9 ù LATIN SMALL LETTER U WITH GRAVE
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ę LATIN SMALL LETTER E WITH OGONEK
+376 254 FE ț LATIN SMALL LETTER T WITH COMMA BELOW
+377 255 FF ÿ LATIN SMALL LETTER Y WITH DIAERESIS
+.TE
+.SH NOTES
+ISO 8859-16 is also known as Latin-10.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR iso_8859\-3 (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-2.7 b/man7/iso_8859-2.7
new file mode 100644
index 0000000..403e85e
--- /dev/null
+++ b/man7/iso_8859-2.7
@@ -0,0 +1,151 @@
+'\" t
+.\" Copyright 1999 Roman Maurer (roman.maurer@hermes.si)
+.\" Copyright 1993-1995 Daniel Quinlan (quinlan@yggdrasil.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" Slightly rearranged, aeb, 950713
+.\" Updated, dpo, 990531
+.TH ISO_8859-2 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-2 \- ISO 8859-2 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-2 encodes the
+Latin characters used in many Central and East European languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-2 characters
+The following table displays the characters in ISO 8859-2 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 Ą LATIN CAPITAL LETTER A WITH OGONEK
+242 162 A2 ˘ BREVE
+243 163 A3 Ł LATIN CAPITAL LETTER L WITH STROKE
+244 164 A4 ¤ CURRENCY SIGN
+245 165 A5 Ľ LATIN CAPITAL LETTER L WITH CARON
+246 166 A6 Ś LATIN CAPITAL LETTER S WITH ACUTE
+247 167 A7 § SECTION SIGN
+250 168 A8 ¨ DIAERESIS
+251 169 A9 Š LATIN CAPITAL LETTER S WITH CARON
+252 170 AA Ş LATIN CAPITAL LETTER S WITH CEDILLA
+253 171 AB Ť LATIN CAPITAL LETTER T WITH CARON
+254 172 AC Ź LATIN CAPITAL LETTER Z WITH ACUTE
+255 173 AD ­ SOFT HYPHEN
+256 174 AE Ž LATIN CAPITAL LETTER Z WITH CARON
+257 175 AF Ż LATIN CAPITAL LETTER Z WITH DOT ABOVE
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ą LATIN SMALL LETTER A WITH OGONEK
+262 178 B2 ˛ OGONEK
+263 179 B3 ł LATIN SMALL LETTER L WITH STROKE
+264 180 B4 ´ ACUTE ACCENT
+265 181 B5 ľ LATIN SMALL LETTER L WITH CARON
+266 182 B6 ś LATIN SMALL LETTER S WITH ACUTE
+267 183 B7 ˇ CARON
+270 184 B8 ¸ CEDILLA
+271 185 B9 š LATIN SMALL LETTER S WITH CARON
+272 186 BA ş LATIN SMALL LETTER S WITH CEDILLA
+273 187 BB ť LATIN SMALL LETTER T WITH CARON
+274 188 BC ź LATIN SMALL LETTER Z WITH ACUTE
+275 189 BD ˝ DOUBLE ACUTE ACCENT
+276 190 BE ž LATIN SMALL LETTER Z WITH CARON
+277 191 BF ż LATIN SMALL LETTER Z WITH DOT ABOVE
+300 192 C0 Ŕ LATIN CAPITAL LETTER R WITH ACUTE
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ă LATIN CAPITAL LETTER A WITH BREVE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Ĺ LATIN CAPITAL LETTER L WITH ACUTE
+306 198 C6 Ć LATIN CAPITAL LETTER C WITH ACUTE
+307 199 C7 Ç LATIN CAPITAL LETTER C WITH CEDILLA
+310 200 C8 Č LATIN CAPITAL LETTER C WITH CARON
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ę LATIN CAPITAL LETTER E WITH OGONEK
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ě LATIN CAPITAL LETTER E WITH CARON
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ď LATIN CAPITAL LETTER D WITH CARON
+320 208 D0 Đ LATIN CAPITAL LETTER D WITH STROKE
+321 209 D1 Ń LATIN CAPITAL LETTER N WITH ACUTE
+322 210 D2 Ň LATIN CAPITAL LETTER N WITH CARON
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Ő LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 × MULTIPLICATION SIGN
+330 216 D8 Ř LATIN CAPITAL LETTER R WITH CARON
+331 217 D9 Ů LATIN CAPITAL LETTER U WITH RING ABOVE
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Ű LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ý LATIN CAPITAL LETTER Y WITH ACUTE
+336 222 DE Ţ LATIN CAPITAL LETTER T WITH CEDILLA
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 ŕ LATIN SMALL LETTER R WITH ACUTE
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ă LATIN SMALL LETTER A WITH BREVE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 ĺ LATIN SMALL LETTER L WITH ACUTE
+346 230 E6 ć LATIN SMALL LETTER C WITH ACUTE
+347 231 E7 ç LATIN SMALL LETTER C WITH CEDILLA
+350 232 E8 č LATIN SMALL LETTER C WITH CARON
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ę LATIN SMALL LETTER E WITH OGONEK
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ě LATIN SMALL LETTER E WITH CARON
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ď LATIN SMALL LETTER D WITH CARON
+360 240 F0 đ LATIN SMALL LETTER D WITH STROKE
+361 241 F1 ń LATIN SMALL LETTER N WITH ACUTE
+362 242 F2 ň LATIN SMALL LETTER N WITH CARON
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 ő LATIN SMALL LETTER O WITH DOUBLE ACUTE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ÷ DIVISION SIGN
+370 248 F8 ř LATIN SMALL LETTER R WITH CARON
+371 249 F9 ů LATIN SMALL LETTER U WITH RING ABOVE
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB ű LATIN SMALL LETTER U WITH DOUBLE ACUTE
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ý LATIN SMALL LETTER Y WITH ACUTE
+376 254 FE ţ LATIN SMALL LETTER T WITH CEDILLA
+377 255 FF ˙ DOT ABOVE
+.TE
+.SH NOTES
+ISO 8859-2 is also known as Latin-2.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR iso_8859\-1 (7),
+.BR iso_8859\-16 (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-3.7 b/man7/iso_8859-3.7
new file mode 100644
index 0000000..8eb9a24
--- /dev/null
+++ b/man7/iso_8859-3.7
@@ -0,0 +1,139 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-3 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-3 \- ISO 8859-3 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-3 encodes the
+characters used in certain Southeast European languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-3 characters
+The following table displays the characters in ISO 8859-3 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 Ħ LATIN CAPITAL LETTER H WITH STROKE
+242 162 A2 ˘ BREVE
+243 163 A3 £ POUND SIGN
+244 164 A4 ¤ CURRENCY SIGN
+246 166 A6 Ĥ LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+247 167 A7 § SECTION SIGN
+250 168 A8 ¨ DIAERESIS
+251 169 A9 İ LATIN CAPITAL LETTER I WITH DOT ABOVE
+252 170 AA Ş LATIN CAPITAL LETTER S WITH CEDILLA
+253 171 AB Ğ LATIN CAPITAL LETTER G WITH BREVE
+254 172 AC Ĵ LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+255 173 AD ­ SOFT HYPHEN
+257 175 AF Ż LATIN CAPITAL LETTER Z WITH DOT ABOVE
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ħ LATIN SMALL LETTER H WITH STROKE
+262 178 B2 ² SUPERSCRIPT TWO
+263 179 B3 ³ SUPERSCRIPT THREE
+264 180 B4 ´ ACUTE ACCENT
+265 181 B5 µ MICRO SIGN
+266 182 B6 ĥ LATIN SMALL LETTER H WITH CIRCUMFLEX
+267 183 B7 · MIDDLE DOT
+270 184 B8 ¸ CEDILLA
+271 185 B9 ı LATIN SMALL LETTER DOTLESS I
+272 186 BA ş LATIN SMALL LETTER S WITH CEDILLA
+273 187 BB ğ LATIN SMALL LETTER G WITH BREVE
+274 188 BC ĵ LATIN SMALL LETTER J WITH CIRCUMFLEX
+275 189 BD ½ VULGAR FRACTION ONE HALF
+277 191 BF ż LATIN SMALL LETTER Z WITH DOT ABOVE
+300 192 C0 À LATIN CAPITAL LETTER A WITH GRAVE
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Ċ LATIN CAPITAL LETTER C WITH DOT ABOVE
+306 198 C6 Ĉ LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+307 199 C7 Ç LATIN CAPITAL LETTER C WITH CEDILLA
+310 200 C8 È LATIN CAPITAL LETTER E WITH GRAVE
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ì LATIN CAPITAL LETTER I WITH GRAVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ï LATIN CAPITAL LETTER I WITH DIAERESIS
+321 209 D1 Ñ LATIN CAPITAL LETTER N WITH TILDE
+322 210 D2 Ò LATIN CAPITAL LETTER O WITH GRAVE
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Ġ LATIN CAPITAL LETTER G WITH DOT ABOVE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 × MULTIPLICATION SIGN
+330 216 D8 Ĝ LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+331 217 D9 Ù LATIN CAPITAL LETTER U WITH GRAVE
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ŭ LATIN CAPITAL LETTER U WITH BREVE
+336 222 DE Ŝ LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 à LATIN SMALL LETTER A WITH GRAVE
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 ċ LATIN SMALL LETTER C WITH DOT ABOVE
+346 230 E6 ĉ LATIN SMALL LETTER C WITH CIRCUMFLEX
+347 231 E7 ç LATIN SMALL LETTER C WITH CEDILLA
+350 232 E8 è LATIN SMALL LETTER E WITH GRAVE
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ê LATIN SMALL LETTER E WITH CIRCUMFLEX
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ì LATIN SMALL LETTER I WITH GRAVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ï LATIN SMALL LETTER I WITH DIAERESIS
+361 241 F1 ñ LATIN SMALL LETTER N WITH TILDE
+362 242 F2 ò LATIN SMALL LETTER O WITH GRAVE
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 ġ LATIN SMALL LETTER G WITH DOT ABOVE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ÷ DIVISION SIGN
+370 248 F8 ĝ LATIN SMALL LETTER G WITH CIRCUMFLEX
+371 249 F9 ù LATIN SMALL LETTER U WITH GRAVE
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ŭ LATIN SMALL LETTER U WITH BREVE
+376 254 FE ŝ LATIN SMALL LETTER S WITH CIRCUMFLEX
+377 255 FF ˙ DOT ABOVE
+.TE
+.SH NOTES
+ISO 8859-3 is also known as Latin-3.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-4.7 b/man7/iso_8859-4.7
new file mode 100644
index 0000000..b209bf1
--- /dev/null
+++ b/man7/iso_8859-4.7
@@ -0,0 +1,146 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-4 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-4 \- ISO 8859-4 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-4 encodes the
+characters used in Scandinavian and Baltic languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-4 characters
+The following table displays the characters in ISO 8859-4 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 Ą LATIN CAPITAL LETTER A WITH OGONEK
+242 162 A2 ĸ LATIN SMALL LETTER KRA (Greenlandic)
+243 163 A3 Ŗ LATIN CAPITAL LETTER R WITH CEDILLA
+244 164 A4 ¤ CURRENCY SIGN
+245 165 A5 Ĩ LATIN CAPITAL LETTER I WITH TILDE
+246 166 A6 Ļ LATIN CAPITAL LETTER L WITH CEDILLA
+247 167 A7 § SECTION SIGN
+250 168 A8 ¨ DIAERESIS
+251 169 A9 Š LATIN CAPITAL LETTER S WITH CARON
+252 170 AA Ē LATIN CAPITAL LETTER E WITH MACRON
+253 171 AB Ģ LATIN CAPITAL LETTER G WITH CEDILLA
+254 172 AC Ŧ LATIN CAPITAL LETTER T WITH STROKE
+255 173 AD ­ SOFT HYPHEN
+256 174 AE Ž LATIN CAPITAL LETTER Z WITH CARON
+257 175 AF ¯ MACRON
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ą LATIN SMALL LETTER A WITH OGONEK
+262 178 B2 ˛ OGONEK
+263 179 B3 ŗ LATIN SMALL LETTER R WITH CEDILLA
+264 180 B4 ´ ACUTE ACCENT
+265 181 B5 ĩ LATIN SMALL LETTER I WITH TILDE
+266 182 B6 ļ LATIN SMALL LETTER L WITH CEDILLA
+267 183 B7 ˇ CARON
+270 184 B8 ¸ CEDILLA
+271 185 B9 š LATIN SMALL LETTER S WITH CARON
+272 186 BA ē LATIN SMALL LETTER E WITH MACRON
+273 187 BB ģ LATIN SMALL LETTER G WITH CEDILLA
+274 188 BC ŧ LATIN SMALL LETTER T WITH STROKE
+275 189 BD Ŋ LATIN CAPITAL LETTER ENG
+276 190 BE ž LATIN SMALL LETTER Z WITH CARON
+277 191 BF ŋ LATIN SMALL LETTER ENG
+300 192 C0 Ā LATIN CAPITAL LETTER A WITH MACRON
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ã LATIN CAPITAL LETTER A WITH TILDE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Å LATIN CAPITAL LETTER A WITH RING ABOVE
+306 198 C6 Æ LATIN CAPITAL LETTER AE
+307 199 C7 Į LATIN CAPITAL LETTER I WITH OGONEK
+310 200 C8 Č LATIN CAPITAL LETTER C WITH CARON
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ę LATIN CAPITAL LETTER E WITH OGONEK
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ė LATIN CAPITAL LETTER E WITH DOT ABOVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ī LATIN CAPITAL LETTER I WITH MACRON
+320 208 D0 Đ LATIN CAPITAL LETTER D WITH STROKE
+321 209 D1 Ņ LATIN CAPITAL LETTER N WITH CEDILLA
+322 210 D2 Ō LATIN CAPITAL LETTER O WITH MACRON
+323 211 D3 Ķ LATIN CAPITAL LETTER K WITH CEDILLA
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Õ LATIN CAPITAL LETTER O WITH TILDE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 × MULTIPLICATION SIGN
+330 216 D8 Ø LATIN CAPITAL LETTER O WITH STROKE
+331 217 D9 Ų LATIN CAPITAL LETTER U WITH OGONEK
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD Ũ LATIN CAPITAL LETTER U WITH TILDE
+336 222 DE Ū LATIN CAPITAL LETTER U WITH MACRON
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 ā LATIN SMALL LETTER A WITH MACRON
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ã LATIN SMALL LETTER A WITH TILDE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 å LATIN SMALL LETTER A WITH RING ABOVE
+346 230 E6 æ LATIN SMALL LETTER AE
+347 231 E7 į LATIN SMALL LETTER I WITH OGONEK
+350 232 E8 č LATIN SMALL LETTER C WITH CARON
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ę LATIN SMALL LETTER E WITH OGONEK
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ė LATIN SMALL LETTER E WITH DOT ABOVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ī LATIN SMALL LETTER I WITH MACRON
+360 240 F0 đ LATIN SMALL LETTER D WITH STROKE
+361 241 F1 ņ LATIN SMALL LETTER N WITH CEDILLA
+362 242 F2 ō LATIN SMALL LETTER O WITH MACRON
+363 243 F3 ķ LATIN SMALL LETTER K WITH CEDILLA
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 õ LATIN SMALL LETTER O WITH TILDE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ÷ DIVISION SIGN
+370 248 F8 ø LATIN SMALL LETTER O WITH STROKE
+371 249 F9 ų LATIN SMALL LETTER U WITH OGONEK
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ũ LATIN SMALL LETTER U WITH TILDE
+376 254 FE ū LATIN SMALL LETTER U WITH MACRON
+377 255 FF ˙ DOT ABOVE
+.TE
+.SH NOTES
+ISO 8859-4 is also known as Latin-4.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-5.7 b/man7/iso_8859-5.7
new file mode 100644
index 0000000..1fbb266
--- /dev/null
+++ b/man7/iso_8859-5.7
@@ -0,0 +1,151 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-5 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-5 \- ISO 8859-5 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-5 encodes the
+Cyrillic characters used in many East European languages.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-5 characters
+The following table displays the characters in ISO 8859-5 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 Ё CYRILLIC CAPITAL LETTER IO
+242 162 A2 Ђ CYRILLIC CAPITAL LETTER DJE
+243 163 A3 Ѓ CYRILLIC CAPITAL LETTER GJE
+244 164 A4 Є CYRILLIC CAPITAL LETTER UKRAINIAN IE
+245 165 A5 Ѕ CYRILLIC CAPITAL LETTER DZE
+246 166 A6 І T{
+CYRILLIC CAPITAL LETTER
+.br
+BYELORUSSIAN-UKRAINIAN I
+T}
+247 167 A7 Ї CYRILLIC CAPITAL LETTER YI
+250 168 A8 Ј CYRILLIC CAPITAL LETTER JE
+251 169 A9 Љ CYRILLIC CAPITAL LETTER LJE
+252 170 AA Њ CYRILLIC CAPITAL LETTER NJE
+253 171 AB Ћ CYRILLIC CAPITAL LETTER TSHE
+254 172 AC Ќ CYRILLIC CAPITAL LETTER KJE
+255 173 AD ­ SOFT HYPHEN
+256 174 AE Ў CYRILLIC CAPITAL LETTER SHORT U
+257 175 AF Џ CYRILLIC CAPITAL LETTER DZHE
+260 176 B0 А CYRILLIC CAPITAL LETTER A
+261 177 B1 Б CYRILLIC CAPITAL LETTER BE
+262 178 B2 В CYRILLIC CAPITAL LETTER VE
+263 179 B3 Г CYRILLIC CAPITAL LETTER GHE
+264 180 B4 Д CYRILLIC CAPITAL LETTER DE
+265 181 B5 Е CYRILLIC CAPITAL LETTER IE
+266 182 B6 Ж CYRILLIC CAPITAL LETTER ZHE
+267 183 B7 З CYRILLIC CAPITAL LETTER ZE
+270 184 B8 И CYRILLIC CAPITAL LETTER I
+271 185 B9 Й CYRILLIC CAPITAL LETTER SHORT I
+272 186 BA К CYRILLIC CAPITAL LETTER KA
+273 187 BB Л CYRILLIC CAPITAL LETTER EL
+274 188 BC М CYRILLIC CAPITAL LETTER EM
+275 189 BD Н CYRILLIC CAPITAL LETTER EN
+276 190 BE О CYRILLIC CAPITAL LETTER O
+277 191 BF П CYRILLIC CAPITAL LETTER PE
+300 192 C0 Р CYRILLIC CAPITAL LETTER ER
+301 193 C1 С CYRILLIC CAPITAL LETTER ES
+302 194 C2 Т CYRILLIC CAPITAL LETTER TE
+303 195 C3 У CYRILLIC CAPITAL LETTER U
+304 196 C4 Ф CYRILLIC CAPITAL LETTER EF
+305 197 C5 Х CYRILLIC CAPITAL LETTER HA
+306 198 C6 Ц CYRILLIC CAPITAL LETTER TSE
+307 199 C7 Ч CYRILLIC CAPITAL LETTER CHE
+310 200 C8 Ш CYRILLIC CAPITAL LETTER SHA
+311 201 C9 Щ CYRILLIC CAPITAL LETTER SHCHA
+312 202 CA Ъ CYRILLIC CAPITAL LETTER HARD SIGN
+313 203 CB Ы CYRILLIC CAPITAL LETTER YERU
+314 204 CC Ь CYRILLIC CAPITAL LETTER SOFT SIGN
+315 205 CD Э CYRILLIC CAPITAL LETTER E
+316 206 CE Ю CYRILLIC CAPITAL LETTER YU
+317 207 CF Я CYRILLIC CAPITAL LETTER YA
+320 208 D0 а CYRILLIC SMALL LETTER A
+321 209 D1 б CYRILLIC SMALL LETTER BE
+322 210 D2 в CYRILLIC SMALL LETTER VE
+323 211 D3 г CYRILLIC SMALL LETTER GHE
+324 212 D4 д CYRILLIC SMALL LETTER DE
+325 213 D5 е CYRILLIC SMALL LETTER IE
+326 214 D6 ж CYRILLIC SMALL LETTER ZHE
+327 215 D7 з CYRILLIC SMALL LETTER ZE
+330 216 D8 и CYRILLIC SMALL LETTER I
+331 217 D9 й CYRILLIC SMALL LETTER SHORT I
+332 218 DA к CYRILLIC SMALL LETTER KA
+333 219 DB л CYRILLIC SMALL LETTER EL
+334 220 DC м CYRILLIC SMALL LETTER EM
+335 221 DD н CYRILLIC SMALL LETTER EN
+336 222 DE о CYRILLIC SMALL LETTER O
+337 223 DF п CYRILLIC SMALL LETTER PE
+340 224 E0 р CYRILLIC SMALL LETTER ER
+341 225 E1 с CYRILLIC SMALL LETTER ES
+342 226 E2 т CYRILLIC SMALL LETTER TE
+343 227 E3 у CYRILLIC SMALL LETTER U
+344 228 E4 ф CYRILLIC SMALL LETTER EF
+345 229 E5 х CYRILLIC SMALL LETTER HA
+346 230 E6 ц CYRILLIC SMALL LETTER TSE
+347 231 E7 ч CYRILLIC SMALL LETTER CHE
+350 232 E8 ш CYRILLIC SMALL LETTER SHA
+351 233 E9 щ CYRILLIC SMALL LETTER SHCHA
+352 234 EA ъ CYRILLIC SMALL LETTER HARD SIGN
+353 235 EB ы CYRILLIC SMALL LETTER YERU
+354 236 EC ь CYRILLIC SMALL LETTER SOFT SIGN
+355 237 ED э CYRILLIC SMALL LETTER E
+356 238 EE ю CYRILLIC SMALL LETTER YU
+357 239 EF я CYRILLIC SMALL LETTER YA
+360 240 F0 № NUMERO SIGN
+361 241 F1 ё CYRILLIC SMALL LETTER IO
+362 242 F2 ђ CYRILLIC SMALL LETTER DJE
+363 243 F3 ѓ CYRILLIC SMALL LETTER GJE
+364 244 F4 є CYRILLIC SMALL LETTER UKRAINIAN IE
+365 245 F5 ѕ CYRILLIC SMALL LETTER DZE
+366 246 F6 і CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+367 247 F7 ї CYRILLIC SMALL LETTER YI
+370 248 F8 ј CYRILLIC SMALL LETTER JE
+371 249 F9 љ CYRILLIC SMALL LETTER LJE
+372 250 FA њ CYRILLIC SMALL LETTER NJE
+373 251 FB ј CYRILLIC SMALL LETTER TSHE
+374 252 FC ќ CYRILLIC SMALL LETTER KJE
+375 253 FD § SECTION SIGN
+376 254 FE ў CYRILLIC SMALL LETTER SHORT U
+377 255 FF џ CYRILLIC SMALL LETTER DZHE
+.TE
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR cp1251 (7),
+.BR koi8\-r (7),
+.BR koi8\-u (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-6.7 b/man7/iso_8859-6.7
new file mode 100644
index 0000000..b73e846
--- /dev/null
+++ b/man7/iso_8859-6.7
@@ -0,0 +1,102 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-6 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-6 \- ISO 8859-6 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-6 encodes the
+characters used in the Arabic language.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-6 characters
+The following table displays the characters in ISO 8859-6 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+244 164 A4 ¤ CURRENCY SIGN
+254 172 AC ، ARABIC COMMA
+255 173 AD ­ SOFT HYPHEN
+273 187 BB ؛ ARABIC SEMICOLON
+277 191 BF ؟ ARABIC QUESTION MARK
+301 193 C1 ء ARABIC LETTER HAMZA
+302 194 C2 آ ARABIC LETTER ALEF WITH MADDA ABOVE
+303 195 C3 أ ARABIC LETTER ALEF WITH HAMZA ABOVE
+304 196 C4 ؤ ARABIC LETTER WAW WITH HAMZA ABOVE
+305 197 C5 إ ARABIC LETTER ALEF WITH HAMZA BELOW
+306 198 C6 ئ ARABIC LETTER YEH WITH HAMZA ABOVE
+307 199 C7 ا ARABIC LETTER ALEF
+310 200 C8 ب ARABIC LETTER BEH
+311 201 C9 ة ARABIC LETTER TEH MARBUTA
+312 202 CA ت ARABIC LETTER TEH
+313 203 CB ث ARABIC LETTER THEH
+314 204 CC ج ARABIC LETTER JEEM
+315 205 CD ح ARABIC LETTER HAH
+316 206 CE خ ARABIC LETTER KHAH
+317 207 CF د ARABIC LETTER DAL
+320 208 D0 ذ ARABIC LETTER THAL
+321 209 D1 ر ARABIC LETTER REH
+322 210 D2 ز ARABIC LETTER ZAIN
+323 211 D3 س ARABIC LETTER SEEN
+324 212 D4 ش ARABIC LETTER SHEEN
+325 213 D5 ص ARABIC LETTER SAD
+326 214 D6 ض ARABIC LETTER DAD
+327 215 D7 ط ARABIC LETTER TAH
+330 216 D8 ظ ARABIC LETTER ZAH
+331 217 D9 ع ARABIC LETTER AIN
+332 218 DA غ ARABIC LETTER GHAIN
+340 224 E0 ـ ARABIC TATWEEL
+341 225 E1 ف ARABIC LETTER FEH
+342 226 E2 ق ARABIC LETTER QAF
+343 227 E3 ك ARABIC LETTER KAF
+344 228 E4 ل ARABIC LETTER LAM
+345 229 E5 م ARABIC LETTER MEEM
+346 230 E6 ن ARABIC LETTER NOON
+347 231 E7 ه ARABIC LETTER HEH
+350 232 E8 و ARABIC LETTER WAW
+351 233 E9 ى ARABIC LETTER ALEF MAKSURA
+352 234 EA ي ARABIC LETTER YEH
+353 235 EB ً ARABIC FATHATAN
+354 236 EC ٌ ARABIC DAMMATAN
+355 237 ED ٍ ARABIC KASRATAN
+356 238 EE َ ARABIC FATHA
+357 239 EF ُ ARABIC DAMMA
+360 240 F0 ِ ARABIC KASRA
+361 241 F1 ّ ARABIC SHADDA
+362 242 F2 ْ ARABIC SUKUN
+.TE
+.SH NOTES
+ISO 8859-6 lacks the glyphs required for many related languages,
+such as Urdu and Persian (Farsi).
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-7.7 b/man7/iso_8859-7.7
new file mode 100644
index 0000000..a66a28e
--- /dev/null
+++ b/man7/iso_8859-7.7
@@ -0,0 +1,150 @@
+'\" t
+.\" Copyright 1999 Dimitri Papadopoulos (dpo@club-internet.fr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-7 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-7 \- ISO 8859-7 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-7 encodes the
+characters used in modern monotonic Greek.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-7 characters
+The following table displays the characters in ISO 8859-7 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 ‘ LEFT SINGLE QUOTATION MARK
+242 162 A2 ’ RIGHT SINGLE QUOTATION MARK
+243 163 A3 £ POUND SIGN
+244 164 A4 € EURO SIGN
+245 165 A5 ₯ DRACHMA SIGN
+246 166 A6 ¦ BROKEN BAR
+247 167 A7 § SECTION SIGN
+250 168 A8 ¨ DIAERESIS
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA ͺ GREEK YPOGEGRAMMENI
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC ¬ NOT SIGN
+255 173 AD ­ SOFT HYPHEN
+257 175 AF ― HORIZONTAL BAR
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 ² SUPERSCRIPT TWO
+263 179 B3 ³ SUPERSCRIPT THREE
+264 180 B4 ΄ GREEK TONOS
+265 181 B5 ΅ GREEK DIALYTIKA TONOS
+266 182 B6 Ά GREEK CAPITAL LETTER ALPHA WITH TONOS
+267 183 B7 · MIDDLE DOT
+270 184 B8 Έ GREEK CAPITAL LETTER EPSILON WITH TONOS
+271 185 B9 Ή GREEK CAPITAL LETTER ETA WITH TONOS
+272 186 BA Ί GREEK CAPITAL LETTER IOTA WITH TONOS
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC Ό GREEK CAPITAL LETTER OMICRON WITH TONOS
+275 189 BD ½ VULGAR FRACTION ONE HALF
+276 190 BE Ύ GREEK CAPITAL LETTER UPSILON WITH TONOS
+277 191 BF Ώ GREEK CAPITAL LETTER OMEGA WITH TONOS
+300 192 C0 ΐ T{
+GREEK SMALL LETTER IOTA WITH
+.br
+DIALYTIKA AND TONOS
+T}
+301 193 C1 Α GREEK CAPITAL LETTER ALPHA
+302 194 C2 Β GREEK CAPITAL LETTER BETA
+303 195 C3 Γ GREEK CAPITAL LETTER GAMMA
+304 196 C4 Δ GREEK CAPITAL LETTER DELTA
+305 197 C5 Ε GREEK CAPITAL LETTER EPSILON
+306 198 C6 Ζ GREEK CAPITAL LETTER ZETA
+307 199 C7 Η GREEK CAPITAL LETTER ETA
+310 200 C8 Θ GREEK CAPITAL LETTER THETA
+311 201 C9 Ι GREEK CAPITAL LETTER IOTA
+312 202 CA Κ GREEK CAPITAL LETTER KAPPA
+313 203 CB Λ GREEK CAPITAL LETTER LAMBDA
+314 204 CC Μ GREEK CAPITAL LETTER MU
+315 205 CD Ν GREEK CAPITAL LETTER NU
+316 206 CE Ξ GREEK CAPITAL LETTER XI
+317 207 CF Ο GREEK CAPITAL LETTER OMICRON
+320 208 D0 Π GREEK CAPITAL LETTER PI
+321 209 D1 Ρ GREEK CAPITAL LETTER RHO
+323 211 D3 Σ GREEK CAPITAL LETTER SIGMA
+324 212 D4 Τ GREEK CAPITAL LETTER TAU
+325 213 D5 Υ GREEK CAPITAL LETTER UPSILON
+326 214 D6 Φ GREEK CAPITAL LETTER PHI
+327 215 D7 Χ GREEK CAPITAL LETTER CHI
+330 216 D8 Ψ GREEK CAPITAL LETTER PSI
+331 217 D9 Ω GREEK CAPITAL LETTER OMEGA
+332 218 DA Ϊ GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+333 219 DB Ϋ GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+334 220 DC ά GREEK SMALL LETTER ALPHA WITH TONOS
+335 221 DD έ GREEK SMALL LETTER EPSILON WITH TONOS
+336 222 DE ή GREEK SMALL LETTER ETA WITH TONOS
+337 223 DF ί GREEK SMALL LETTER IOTA WITH TONOS
+340 224 E0 ΰ T{
+GREEK SMALL LETTER UPSILON WITH
+DIALYTIKA AND TONOS
+T}
+341 225 E1 α GREEK SMALL LETTER ALPHA
+342 226 E2 β GREEK SMALL LETTER BETA
+343 227 E3 γ GREEK SMALL LETTER GAMMA
+344 228 E4 δ GREEK SMALL LETTER DELTA
+345 229 E5 ε GREEK SMALL LETTER EPSILON
+346 230 E6 ζ GREEK SMALL LETTER ZETA
+347 231 E7 η GREEK SMALL LETTER ETA
+350 232 E8 θ GREEK SMALL LETTER THETA
+351 233 E9 ι GREEK SMALL LETTER IOTA
+352 234 EA κ GREEK SMALL LETTER KAPPA
+353 235 EB λ GREEK SMALL LETTER LAMBDA
+354 236 EC μ GREEK SMALL LETTER MU
+355 237 ED ν GREEK SMALL LETTER NU
+356 238 EE ξ GREEK SMALL LETTER XI
+357 239 EF ο GREEK SMALL LETTER OMICRON
+360 240 F0 π GREEK SMALL LETTER PI
+361 241 F1 ρ GREEK SMALL LETTER RHO
+362 242 F2 ς GREEK SMALL LETTER FINAL SIGMA
+363 243 F3 σ GREEK SMALL LETTER SIGMA
+364 244 F4 τ GREEK SMALL LETTER TAU
+365 245 F5 υ GREEK SMALL LETTER UPSILON
+366 246 F6 φ GREEK SMALL LETTER PHI
+367 247 F7 χ GREEK SMALL LETTER CHI
+370 248 F8 ψ GREEK SMALL LETTER PSI
+371 249 F9 ω GREEK SMALL LETTER OMEGA
+372 250 FA ϊ GREEK SMALL LETTER IOTA WITH DIALYTIKA
+373 251 FB ϋ GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+374 252 FC ό GREEK SMALL LETTER OMICRON WITH TONOS
+375 253 FD ύ GREEK SMALL LETTER UPSILON WITH TONOS
+376 254 FE ώ GREEK SMALL LETTER OMEGA WITH TONOS
+.TE
+.SH NOTES
+ISO 8859-7 was formerly known as ELOT-928 or ECMA-118:1986.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-8.7 b/man7/iso_8859-8.7
new file mode 100644
index 0000000..d854116
--- /dev/null
+++ b/man7/iso_8859-8.7
@@ -0,0 +1,114 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis (edimitro@tee.gr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" Eli Zaretskii <eliz@gnu.org> made valuable suggestions
+.\"
+.TH ISO_8859-8 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-8 \- ISO 8859-8 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-8 encodes the
+characters used in Modern Hebrew.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-8 characters
+The following table displays the characters in ISO 8859-8 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0 NO-BREAK SPACE
+242 162 A2 ¢ CENT SIGN
+243 163 A3 £ POUND SIGN
+244 164 A4 ¤ CURRENCY SIGN
+245 165 A5 ¥ YEN SIGN
+246 166 A6 ¦ BROKEN BAR
+247 167 A7 § SECTION SIGN
+250 168 A8 ¨ DIAERESIS
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA × MULTIPLICATION SIGN
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC ¬ NOT SIGN
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ® REGISTERED SIGN
+257 175 AF ¯ MACRON
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 ² SUPERSCRIPT TWO
+263 179 B3 ³ SUPERSCRIPT THREE
+264 180 B4 ´ ACUTE ACCENT
+265 181 B5 µ MICRO SIGN
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 · MIDDLE DOT
+270 184 B8 ¸ CEDILLA
+271 185 B9 ¹ SUPERSCRIPT ONE
+272 186 BA ÷ DIVISION SIGN
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC ¼ VULGAR FRACTION ONE QUARTER
+275 189 BD ½ VULGAR FRACTION ONE HALF
+276 190 BE ¾ VULGAR FRACTION THREE QUARTERS
+337 223 DF ‗ DOUBLE LOW LINE
+340 224 E0 א HEBREW LETTER ALEF
+341 225 E1 ב HEBREW LETTER BET
+342 226 E2 ג HEBREW LETTER GIMEL
+343 227 E3 ד HEBREW LETTER DALET
+344 228 E4 ה HEBREW LETTER HE
+345 229 E5 ו HEBREW LETTER VAV
+346 230 E6 ז HEBREW LETTER ZAYIN
+347 231 E7 ח HEBREW LETTER HET
+350 232 E8 ט HEBREW LETTER TET
+351 233 E9 י HEBREW LETTER YOD
+352 234 EA ך HEBREW LETTER FINAL KAF
+353 235 EB כ HEBREW LETTER KAF
+354 236 EC ל HEBREW LETTER LAMED
+355 237 ED ם HEBREW LETTER FINAL MEM
+356 238 EE מ HEBREW LETTER MEM
+357 239 EF ן HEBREW LETTER FINAL NUN
+360 240 F0 נ HEBREW LETTER NUN
+361 241 F1 ס HEBREW LETTER SAMEKH
+362 242 F2 ע HEBREW LETTER AYIN
+363 243 F3 ף HEBREW LETTER FINAL PE
+364 244 F4 פ HEBREW LETTER PE
+365 245 F5 ץ HEBREW LETTER FINAL TSADI
+366 246 F6 צ HEBREW LETTER TSADI
+367 247 F7 ק HEBREW LETTER QOF
+370 248 F8 ר HEBREW LETTER RESH
+371 249 F9 ש HEBREW LETTER SHIN
+372 250 FA ת HEBREW LETTER TAV
+375 253 FD ‎ LEFT-TO-RIGHT MARK
+376 254 FE ‏ RIGHT-TO-LEFT MARK
+.TE
+.SH NOTES
+ISO 8859-8 was also known as ISO-IR-138.
+ISO 8859-8 includes neither short vowels nor diacritical marks,
+and Yiddish is not provided for.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859-9.7 b/man7/iso_8859-9.7
new file mode 100644
index 0000000..6386144
--- /dev/null
+++ b/man7/iso_8859-9.7
@@ -0,0 +1,146 @@
+'\" t
+.\" Copyright 2002 Dimitri Papadopoulos (dpo@club-internet.fr)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH ISO_8859-9 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+iso_8859-9 \- ISO 8859-9 character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+The ISO 8859 standard includes several 8-bit extensions to the ASCII
+character set (also known as ISO 646-IRV).
+ISO 8859-9 encodes the
+characters used in Turkish.
+.SS ISO 8859 alphabets
+The full set of ISO 8859 alphabets includes:
+.TS
+l l.
+ISO 8859-1 West European languages (Latin-1)
+ISO 8859-2 Central and East European languages (Latin-2)
+ISO 8859-3 Southeast European and miscellaneous languages (Latin-3)
+ISO 8859-4 Scandinavian/Baltic languages (Latin-4)
+ISO 8859-5 Latin/Cyrillic
+ISO 8859-6 Latin/Arabic
+ISO 8859-7 Latin/Greek
+ISO 8859-8 Latin/Hebrew
+ISO 8859-9 Latin-1 modification for Turkish (Latin-5)
+ISO 8859-10 Lappish/Nordic/Eskimo languages (Latin-6)
+ISO 8859-11 Latin/Thai
+ISO 8859-13 Baltic Rim languages (Latin-7)
+ISO 8859-14 Celtic (Latin-8)
+ISO 8859-15 West European languages (Latin-9)
+ISO 8859-16 Romanian (Latin-10)
+.TE
+.SS ISO 8859-9 characters
+The following table displays the characters in ISO 8859-9 that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+240 160 A0   NO-BREAK SPACE
+241 161 A1 ¡ INVERTED EXCLAMATION MARK
+242 162 A2 ¢ CENT SIGN
+243 163 A3 £ POUND SIGN
+244 164 A4 ¤ CURRENCY SIGN
+245 165 A5 ¥ YEN SIGN
+246 166 A6 ¦ BROKEN BAR
+247 167 A7 § SECTION SIGN
+250 168 A8 ¨ DIAERESIS
+251 169 A9 © COPYRIGHT SIGN
+252 170 AA ª FEMININE ORDINAL INDICATOR
+253 171 AB « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+254 172 AC ¬ NOT SIGN
+255 173 AD ­ SOFT HYPHEN
+256 174 AE ® REGISTERED SIGN
+257 175 AF ¯ MACRON
+260 176 B0 ° DEGREE SIGN
+261 177 B1 ± PLUS-MINUS SIGN
+262 178 B2 ² SUPERSCRIPT TWO
+263 179 B3 ³ SUPERSCRIPT THREE
+264 180 B4 ´ ACUTE ACCENT
+265 181 B5 µ MICRO SIGN
+266 182 B6 ¶ PILCROW SIGN
+267 183 B7 · MIDDLE DOT
+270 184 B8 ¸ CEDILLA
+271 185 B9 ¹ SUPERSCRIPT ONE
+272 186 BA º MASCULINE ORDINAL INDICATOR
+273 187 BB » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+274 188 BC ¼ VULGAR FRACTION ONE QUARTER
+275 189 BD ½ VULGAR FRACTION ONE HALF
+276 190 BE ¾ VULGAR FRACTION THREE QUARTERS
+277 191 BF ¿ INVERTED QUESTION MARK
+300 192 C0 À LATIN CAPITAL LETTER A WITH GRAVE
+301 193 C1 Á LATIN CAPITAL LETTER A WITH ACUTE
+302 194 C2 Â LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+303 195 C3 Ã LATIN CAPITAL LETTER A WITH TILDE
+304 196 C4 Ä LATIN CAPITAL LETTER A WITH DIAERESIS
+305 197 C5 Å LATIN CAPITAL LETTER A WITH RING ABOVE
+306 198 C6 Æ LATIN CAPITAL LETTER AE
+307 199 C7 Ç LATIN CAPITAL LETTER C WITH CEDILLA
+310 200 C8 È LATIN CAPITAL LETTER E WITH GRAVE
+311 201 C9 É LATIN CAPITAL LETTER E WITH ACUTE
+312 202 CA Ê LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+313 203 CB Ë LATIN CAPITAL LETTER E WITH DIAERESIS
+314 204 CC Ì LATIN CAPITAL LETTER I WITH GRAVE
+315 205 CD Í LATIN CAPITAL LETTER I WITH ACUTE
+316 206 CE Î LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+317 207 CF Ï LATIN CAPITAL LETTER I WITH DIAERESIS
+320 208 D0 Ğ LATIN CAPITAL LETTER G WITH BREVE
+321 209 D1 Ñ LATIN CAPITAL LETTER N WITH TILDE
+322 210 D2 Ò LATIN CAPITAL LETTER O WITH GRAVE
+323 211 D3 Ó LATIN CAPITAL LETTER O WITH ACUTE
+324 212 D4 Ô LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+325 213 D5 Õ LATIN CAPITAL LETTER O WITH TILDE
+326 214 D6 Ö LATIN CAPITAL LETTER O WITH DIAERESIS
+327 215 D7 × MULTIPLICATION SIGN
+330 216 D8 Ø LATIN CAPITAL LETTER O WITH STROKE
+331 217 D9 Ù LATIN CAPITAL LETTER U WITH GRAVE
+332 218 DA Ú LATIN CAPITAL LETTER U WITH ACUTE
+333 219 DB Û LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+334 220 DC Ü LATIN CAPITAL LETTER U WITH DIAERESIS
+335 221 DD İ LATIN CAPITAL LETTER I WITH DOT ABOVE
+336 222 DE Ş LATIN CAPITAL LETTER S WITH CEDILLA
+337 223 DF ß LATIN SMALL LETTER SHARP S
+340 224 E0 à LATIN SMALL LETTER A WITH GRAVE
+341 225 E1 á LATIN SMALL LETTER A WITH ACUTE
+342 226 E2 â LATIN SMALL LETTER A WITH CIRCUMFLEX
+343 227 E3 ã LATIN SMALL LETTER A WITH TILDE
+344 228 E4 ä LATIN SMALL LETTER A WITH DIAERESIS
+345 229 E5 å LATIN SMALL LETTER A WITH RING ABOVE
+346 230 E6 æ LATIN SMALL LETTER AE
+347 231 E7 ç LATIN SMALL LETTER C WITH CEDILLA
+350 232 E8 è LATIN SMALL LETTER E WITH GRAVE
+351 233 E9 é LATIN SMALL LETTER E WITH ACUTE
+352 234 EA ê LATIN SMALL LETTER E WITH CIRCUMFLEX
+353 235 EB ë LATIN SMALL LETTER E WITH DIAERESIS
+354 236 EC ì LATIN SMALL LETTER I WITH GRAVE
+355 237 ED í LATIN SMALL LETTER I WITH ACUTE
+356 238 EE î LATIN SMALL LETTER I WITH CIRCUMFLEX
+357 239 EF ï LATIN SMALL LETTER I WITH DIAERESIS
+360 240 F0 ğ LATIN SMALL LETTER G WITH BREVE
+361 241 F1 ñ LATIN SMALL LETTER N WITH TILDE
+362 242 F2 ò LATIN SMALL LETTER O WITH GRAVE
+363 243 F3 ó LATIN SMALL LETTER O WITH ACUTE
+364 244 F4 ô LATIN SMALL LETTER O WITH CIRCUMFLEX
+365 245 F5 õ LATIN SMALL LETTER O WITH TILDE
+366 246 F6 ö LATIN SMALL LETTER O WITH DIAERESIS
+367 247 F7 ÷ DIVISION SIGN
+370 248 F8 ø LATIN SMALL LETTER O WITH STROKE
+371 249 F9 ù LATIN SMALL LETTER U WITH GRAVE
+372 250 FA ú LATIN SMALL LETTER U WITH ACUTE
+373 251 FB û LATIN SMALL LETTER U WITH CIRCUMFLEX
+374 252 FC ü LATIN SMALL LETTER U WITH DIAERESIS
+375 253 FD ı LATIN SMALL LETTER DOTLESS I
+376 254 FE ş LATIN SMALL LETTER S WITH CEDILLA
+377 255 FF ÿ LATIN SMALL LETTER Y WITH DIAERESIS
+.TE
+.SH NOTES
+ISO 8859-9 is also known as Latin-5.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/iso_8859_1.7 b/man7/iso_8859_1.7
new file mode 100644
index 0000000..1969dfb
--- /dev/null
+++ b/man7/iso_8859_1.7
@@ -0,0 +1 @@
+.so man7/iso_8859-1.7
diff --git a/man7/iso_8859_10.7 b/man7/iso_8859_10.7
new file mode 100644
index 0000000..9b4658f
--- /dev/null
+++ b/man7/iso_8859_10.7
@@ -0,0 +1 @@
+.so man7/iso_8859-10.7
diff --git a/man7/iso_8859_11.7 b/man7/iso_8859_11.7
new file mode 100644
index 0000000..cbd4cfe
--- /dev/null
+++ b/man7/iso_8859_11.7
@@ -0,0 +1 @@
+.so man7/iso_8859-11.7
diff --git a/man7/iso_8859_13.7 b/man7/iso_8859_13.7
new file mode 100644
index 0000000..8ad2335
--- /dev/null
+++ b/man7/iso_8859_13.7
@@ -0,0 +1 @@
+.so man7/iso_8859-13.7
diff --git a/man7/iso_8859_14.7 b/man7/iso_8859_14.7
new file mode 100644
index 0000000..4aa555d
--- /dev/null
+++ b/man7/iso_8859_14.7
@@ -0,0 +1 @@
+.so man7/iso_8859-14.7
diff --git a/man7/iso_8859_15.7 b/man7/iso_8859_15.7
new file mode 100644
index 0000000..a4095d7
--- /dev/null
+++ b/man7/iso_8859_15.7
@@ -0,0 +1 @@
+.so man7/iso_8859-15.7
diff --git a/man7/iso_8859_16.7 b/man7/iso_8859_16.7
new file mode 100644
index 0000000..b9c8e91
--- /dev/null
+++ b/man7/iso_8859_16.7
@@ -0,0 +1 @@
+.so man7/iso_8859-16.7
diff --git a/man7/iso_8859_2.7 b/man7/iso_8859_2.7
new file mode 100644
index 0000000..da36668
--- /dev/null
+++ b/man7/iso_8859_2.7
@@ -0,0 +1 @@
+.so man7/iso_8859-2.7
diff --git a/man7/iso_8859_3.7 b/man7/iso_8859_3.7
new file mode 100644
index 0000000..75e42ce
--- /dev/null
+++ b/man7/iso_8859_3.7
@@ -0,0 +1 @@
+.so man7/iso_8859-3.7
diff --git a/man7/iso_8859_4.7 b/man7/iso_8859_4.7
new file mode 100644
index 0000000..15a829e
--- /dev/null
+++ b/man7/iso_8859_4.7
@@ -0,0 +1 @@
+.so man7/iso_8859-4.7
diff --git a/man7/iso_8859_5.7 b/man7/iso_8859_5.7
new file mode 100644
index 0000000..1f20320
--- /dev/null
+++ b/man7/iso_8859_5.7
@@ -0,0 +1 @@
+.so man7/iso_8859-5.7
diff --git a/man7/iso_8859_6.7 b/man7/iso_8859_6.7
new file mode 100644
index 0000000..edcafdf
--- /dev/null
+++ b/man7/iso_8859_6.7
@@ -0,0 +1 @@
+.so man7/iso_8859-6.7
diff --git a/man7/iso_8859_7.7 b/man7/iso_8859_7.7
new file mode 100644
index 0000000..951384c
--- /dev/null
+++ b/man7/iso_8859_7.7
@@ -0,0 +1 @@
+.so man7/iso_8859-7.7
diff --git a/man7/iso_8859_8.7 b/man7/iso_8859_8.7
new file mode 100644
index 0000000..07cf216
--- /dev/null
+++ b/man7/iso_8859_8.7
@@ -0,0 +1 @@
+.so man7/iso_8859-8.7
diff --git a/man7/iso_8859_9.7 b/man7/iso_8859_9.7
new file mode 100644
index 0000000..0fcc7d4
--- /dev/null
+++ b/man7/iso_8859_9.7
@@ -0,0 +1 @@
+.so man7/iso_8859-9.7
diff --git a/man7/kernel_lockdown.7 b/man7/kernel_lockdown.7
new file mode 100644
index 0000000..aac19aa
--- /dev/null
+++ b/man7/kernel_lockdown.7
@@ -0,0 +1,109 @@
+.\"
+.\" Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH kernel_lockdown 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+kernel_lockdown \- kernel image access prevention feature
+.SH DESCRIPTION
+The Kernel Lockdown feature is designed to prevent both direct and indirect
+access to a running kernel image, attempting to protect against unauthorized
+modification of the kernel image and to prevent access to security and
+cryptographic data located in kernel memory, whilst still permitting driver
+modules to be loaded.
+.PP
+If a prohibited or restricted feature is accessed or used, the kernel will emit
+a message that looks like:
+.PP
+.in +4n
+.EX
+Lockdown: X: Y is restricted, see man kernel_lockdown.7
+.EE
+.in
+.PP
+where X indicates the process name and Y indicates what is restricted.
+.PP
+On an EFI-enabled x86 or arm64 machine, lockdown will be automatically enabled
+if the system boots in EFI Secure Boot mode.
+.\"
+.SS Coverage
+When lockdown is in effect, a number of features are disabled or have their
+use restricted.
+This includes special device files and kernel services that allow
+direct access of the kernel image:
+.PP
+.RS
+/dev/mem
+.br
+/dev/kmem
+.br
+/dev/kcore
+.br
+/dev/ioports
+.br
+BPF
+.br
+kprobes
+.RE
+.PP
+and the ability to directly configure and control devices, so as to prevent
+the use of a device to access or modify a kernel image:
+.IP \[bu] 3
+The use of module parameters that directly specify hardware parameters to
+drivers through the kernel command line or when loading a module.
+.IP \[bu]
+The use of direct PCI BAR access.
+.IP \[bu]
+The use of the ioperm and iopl instructions on x86.
+.IP \[bu]
+The use of the KD*IO console ioctls.
+.IP \[bu]
+The use of the TIOCSSERIAL serial ioctl.
+.IP \[bu]
+The alteration of MSR registers on x86.
+.IP \[bu]
+The replacement of the PCMCIA CIS.
+.IP \[bu]
+The overriding of ACPI tables.
+.IP \[bu]
+The use of ACPI error injection.
+.IP \[bu]
+The specification of the ACPI RDSP address.
+.IP \[bu]
+The use of ACPI custom methods.
+.PP
+Certain facilities are restricted:
+.IP \[bu] 3
+Only validly signed modules may be loaded (waived if the module file being
+loaded is vouched for by IMA appraisal).
+.IP \[bu]
+Only validly signed binaries may be kexec'd (waived if the binary image file
+to be executed is vouched for by IMA appraisal).
+.IP \[bu]
+Unencrypted hibernation/suspend to swap are disallowed as the kernel image is
+saved to a medium that can then be accessed.
+.IP \[bu]
+Use of debugfs is not permitted as this allows a whole range of actions
+including direct configuration of, access to and driving of hardware.
+.IP \[bu]
+IMA requires the addition of the "secure_boot" rules to the policy,
+whether or not they are specified on the command line,
+for both the built-in and custom policies in secure boot lockdown mode.
+.SH VERSIONS
+The Kernel Lockdown feature was added in Linux 5.4.
+.SH NOTES
+The Kernel Lockdown feature is enabled by CONFIG_SECURITY_LOCKDOWN_LSM.
+The
+.I lsm=lsm1,...,lsmN
+command line parameter controls the sequence of the initialization of
+Linux Security Modules.
+It must contain the string
+.I lockdown
+to enable the Kernel Lockdown feature.
+If the command line parameter is not specified,
+the initialization falls back to the value of the deprecated
+.I security=
+command line parameter and further to the value of CONFIG_LSM.
+.\" commit 000d388ed3bbed745f366ce71b2bb7c2ee70f449
diff --git a/man7/keyrings.7 b/man7/keyrings.7
new file mode 100644
index 0000000..1ebd25f
--- /dev/null
+++ b/man7/keyrings.7
@@ -0,0 +1,901 @@
+.\" Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\" and Copyright (C) 2016 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH keyrings 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+keyrings \- in-kernel key management and retention facility
+.SH DESCRIPTION
+The Linux key-management facility
+is primarily a way for various kernel components
+to retain or cache security data,
+authentication keys, encryption keys, and other data in the kernel.
+.PP
+System call interfaces are provided so that user-space programs can manage
+those objects and also use the facility for their own purposes; see
+.BR add_key (2),
+.BR request_key (2),
+and
+.BR keyctl (2).
+.PP
+A library and some user-space utilities are provided to allow access to the
+facility.
+See
+.BR keyctl (1),
+.BR keyctl (3),
+and
+.BR keyutils (7)
+for more information.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Keys
+A key has the following attributes:
+.TP
+Serial number (ID)
+This is a unique integer handle by which a key is referred to in system calls.
+The serial number is sometimes synonymously referred as the key ID.
+Programmatically, key serial numbers are represented using the type
+.IR key_serial_t .
+.TP
+Type
+A key's type defines what sort of data can be held in the key,
+how the proposed content of the key will be parsed,
+and how the payload will be used.
+.IP
+There are a number of general-purpose types available, plus some specialist
+types defined by specific kernel components.
+.TP
+Description (name)
+The key description is a printable string that is used as the search term
+for the key (in conjunction with the key type) as well as a display name.
+During searches, the description may be partially matched or exactly matched.
+.TP
+Payload (data)
+The payload is the actual content of a key.
+This is usually set when a key is created,
+but it is possible for the kernel to upcall to user space to finish the
+instantiation of a key if that key wasn't already known to the kernel
+when it was requested.
+For further details, see
+.BR request_key (2).
+.IP
+A key's payload can be read and updated if the key type supports it and if
+suitable permission is granted to the caller.
+.TP
+Access rights
+Much as files do,
+each key has an owning user ID, an owning group ID, and a security label.
+Each key also has a set of permissions,
+though there are more than for a normal UNIX file,
+and there is an additional category\[em]possessor\[em]beyond the usual user,
+group, and other (see
+.IR Possession ,
+below).
+.IP
+Note that keys are quota controlled, since they require unswappable kernel
+memory.
+The owning user ID specifies whose quota is to be debited.
+.TP
+Expiration time
+Each key can have an expiration time set.
+When that time is reached,
+the key is marked as being expired and accesses to it fail with the error
+.BR EKEYEXPIRED .
+If not deleted, updated, or replaced, then, after a set amount of time,
+an expired key is automatically removed (garbage collected)
+along with all links to it,
+and attempts to access the key fail with the error
+.BR ENOKEY .
+.TP
+Reference count
+Each key has a reference count.
+Keys are referenced by keyrings, by currently active users,
+and by a process's credentials.
+When the reference count reaches zero,
+the key is scheduled for garbage collection.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Key types
+The kernel provides several basic types of key:
+.TP
+.I """keyring"""
+.\" Note that keyrings use different fields in struct key in order to store
+.\" their data - index_key instead of type/description and name_link/keys
+.\" instead of payload.
+Keyrings are special keys which store a set of links
+to other keys (including other keyrings),
+analogous to a directory holding links to files.
+The main purpose of a keyring is to prevent other keys from
+being garbage collected because nothing refers to them.
+.IP
+Keyrings with descriptions (names)
+that begin with a period (\[aq].\[aq]) are reserved to the implementation.
+.TP
+.I """user"""
+This is a general-purpose key type.
+The key is kept entirely within kernel memory.
+The payload may be read and updated by user-space applications.
+.IP
+The payload for keys of this type is a blob of arbitrary data
+of up to 32,767 bytes.
+.IP
+The description may be any valid string, though it is preferred that it
+start with a colon-delimited prefix representing the service
+to which the key is of interest
+(for instance
+.IR """afs:mykey""" ).
+.TP
+.IR """logon""" " (since Linux 3.3)"
+.\" commit 9f6ed2ca257fa8650b876377833e6f14e272848b
+This key type is essentially the same as
+.IR """user""" ,
+but it does not provide reading (i.e., the
+.BR keyctl (2)
+.B KEYCTL_READ
+operation),
+meaning that the key payload is never visible from user space.
+This is suitable for storing username-password pairs
+that should not be readable from user space.
+.IP
+The description of a
+.I """logon"""
+key
+.I must
+start with a non-empty colon-delimited prefix whose purpose
+is to identify the service to which the key belongs.
+(Note that this differs from keys of the
+.I """user"""
+type, where the inclusion of a prefix is recommended but is not enforced.)
+.TP
+.IR """big_key""" " (since Linux 3.13)"
+.\" commit ab3c3587f8cda9083209a61dbe3a4407d3cada10
+This key type is similar to the
+.I """user"""
+key type, but it may hold a payload of up to 1\ MiB in size.
+This key type is useful for purposes such as holding Kerberos ticket caches.
+.IP
+The payload data may be stored in a tmpfs filesystem,
+rather than in kernel memory,
+if the data size exceeds the overhead of storing the data in the filesystem.
+(Storing the data in a filesystem requires filesystem structures
+to be allocated in the kernel.
+The size of these structures determines the size threshold
+above which the tmpfs storage method is used.)
+Since Linux 4.8,
+.\" commit 13100a72f40f5748a04017e0ab3df4cf27c809ef
+the payload data is encrypted when stored in tmpfs,
+thereby preventing it from being written unencrypted into swap space.
+.PP
+There are more specialized key types available also,
+but they aren't discussed here
+because they aren't intended for normal user-space use.
+.PP
+Key type names
+that begin with a period (\[aq].\[aq]) are reserved to the implementation.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Keyrings
+As previously mentioned, keyrings are a special type of key that contain
+links to other keys (which may include other keyrings).
+Keys may be linked to by multiple keyrings.
+Keyrings may be considered as analogous to UNIX directories
+where each directory contains a set of hard links to files.
+.PP
+Various operations (system calls) may be applied only to keyrings:
+.TP
+Adding
+A key may be added to a keyring by system calls that create keys.
+This prevents the new key from being immediately deleted
+when the system call releases its last reference to the key.
+.TP
+Linking
+A link may be added to a keyring pointing to a key that is already known,
+provided this does not create a self-referential cycle.
+.TP
+Unlinking
+A link may be removed from a keyring.
+When the last link to a key is removed,
+that key will be scheduled for deletion by the garbage collector.
+.TP
+Clearing
+All the links may be removed from a keyring.
+.TP
+Searching
+A keyring may be considered the root of a tree or subtree in which keyrings
+form the branches and non-keyrings the leaves.
+This tree may be searched for a key matching
+a particular type and description.
+.PP
+See
+.BR keyctl_clear (3),
+.BR keyctl_link (3),
+.BR keyctl_search (3),
+and
+.BR keyctl_unlink (3)
+for more information.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Anchoring keys
+To prevent a key from being garbage collected,
+it must be anchored to keep its reference count elevated
+when it is not in active use by the kernel.
+.PP
+Keyrings are used to anchor other keys:
+each link is a reference on a key.
+Note that keyrings themselves are just keys and
+are also subject to the same anchoring requirement to prevent
+them being garbage collected.
+.PP
+The kernel makes available a number of anchor keyrings.
+Note that some of these keyrings will be created only when first accessed.
+.TP
+Process keyrings
+Process credentials themselves reference keyrings with specific semantics.
+These keyrings are pinned as long as the set of credentials exists,
+which is usually as long as the process exists.
+.IP
+There are three keyrings with different inheritance/sharing rules:
+the
+.BR session\-keyring (7)
+(inherited and shared by all child processes),
+the
+.BR process\-keyring (7)
+(shared by all threads in a process) and
+the
+.BR thread\-keyring (7)
+(specific to a particular thread).
+.IP
+As an alternative to using the actual keyring IDs,
+in calls to
+.BR add_key (2),
+.BR keyctl (2),
+and
+.BR request_key (2),
+the special keyring values
+.BR KEY_SPEC_SESSION_KEYRING ,
+.BR KEY_SPEC_PROCESS_KEYRING ,
+and
+.B KEY_SPEC_THREAD_KEYRING
+can be used to refer to the caller's own instances of these keyrings.
+.TP
+User keyrings
+Each UID known to the kernel has a record that contains two keyrings: the
+.BR user\-keyring (7)
+and the
+.BR user\-session\-keyring (7).
+These exist for as long as the UID record in the kernel exists.
+.IP
+As an alternative to using the actual keyring IDs,
+in calls to
+.BR add_key (2),
+.BR keyctl (2),
+and
+.BR request_key (2),
+the special keyring values
+.B KEY_SPEC_USER_KEYRING
+and
+.B KEY_SPEC_USER_SESSION_KEYRING
+can be used to refer to the caller's own instances of these keyrings.
+.IP
+A link to the user keyring is placed in a new session keyring by
+.BR pam_keyinit (8)
+when a new login session is initiated.
+.TP
+Persistent keyrings
+There is a
+.BR persistent\-keyring (7)
+available to each UID known to the system.
+It may persist beyond the life of the UID record previously mentioned,
+but has an expiration time set such that it is automatically cleaned up
+after a set time.
+The persistent keyring permits, for example,
+.BR cron (8)
+scripts to use credentials that are left in the persistent keyring after
+the user logs out.
+.IP
+Note that the expiration time of the persistent keyring
+is reset every time the persistent key is requested.
+.TP
+Special keyrings
+There are special keyrings owned by the kernel that can anchor keys
+for special purposes.
+An example of this is the \fIsystem keyring\fR used for holding
+encryption keys for module signature verification.
+.IP
+These special keyrings are usually closed to direct alteration
+by user space.
+.PP
+An originally planned "group keyring",
+for storing keys associated with each GID known to the kernel,
+is not so far implemented, is unlikely to be implemented.
+Nevertheless, the constant
+.B KEY_SPEC_GROUP_KEYRING
+has been defined for this keyring.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Possession
+The concept of possession is important to understanding the keyrings
+security model.
+Whether a thread possesses a key is determined by the following rules:
+.IP (1) 5
+Any key or keyring that does not grant
+.I search
+permission to the caller is ignored in all the following rules.
+.IP (2)
+A thread possesses its
+.BR session\-keyring (7),
+.BR process\-keyring (7),
+and
+.BR thread\-keyring (7)
+directly because those keyrings are referred to by its credentials.
+.IP (3)
+If a keyring is possessed, then any key it links to is also possessed.
+.IP (4)
+If any key a keyring links to is itself a keyring, then rule (3) applies
+recursively.
+.IP (5)
+If a process is upcalled from the kernel to instantiate a key (see
+.BR request_key (2)),
+then it also possesses the requester's keyrings as in
+rule (1) as if it were the requester.
+.PP
+Note that possession is not a fundamental property of a key,
+but must rather be calculated each time the key is needed.
+.PP
+Possession is designed to allow set-user-ID programs run from, say
+a user's shell to access the user's keys.
+Granting permissions to the key possessor while denying them
+to the key owner and group allows the prevention of access to keys
+on the basis of UID and GID matches.
+.PP
+When it creates the session keyring,
+.BR pam_keyinit (8)
+adds a link to the
+.BR user\-keyring (7),
+thus making the user keyring and anything it contains possessed by default.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Access rights
+Each key has the following security-related attributes:
+.IP \[bu] 3
+The owning user ID
+.IP \[bu]
+The ID of a group that is permitted to access the key
+.IP \[bu]
+A security label
+.IP \[bu]
+A permissions mask
+.PP
+The permissions mask contains four sets of rights.
+The first three sets are mutually exclusive.
+One and only one will be in force for a particular access check.
+In order of descending priority, these three sets are:
+.TP
+.I user
+The set specifies the rights granted
+if the key's user ID matches the caller's filesystem user ID.
+.TP
+.I group
+The set specifies the rights granted
+if the user ID didn't match and the key's group ID matches the caller's
+filesystem GID or one of the caller's supplementary group IDs.
+.TP
+.I other
+The set specifies the rights granted
+if neither the key's user ID nor group ID matched.
+.PP
+The fourth set of rights is:
+.TP
+.I possessor
+The set specifies the rights granted
+if a key is determined to be possessed by the caller.
+.PP
+The complete set of rights for a key is the union of whichever
+of the first three sets is applicable plus the fourth set
+if the key is possessed.
+.PP
+The set of rights that may be granted in each of the four masks
+is as follows:
+.TP
+.I view
+The attributes of the key may be read.
+This includes the type,
+description, and access rights (excluding the security label).
+.TP
+.I read
+For a key: the payload of the key may be read.
+For a keyring: the list of serial numbers (keys) to
+which the keyring has links may be read.
+.TP
+.I write
+The payload of the key may be updated and the key may be revoked.
+For a keyring, links may be added to or removed from the keyring,
+and the keyring may be cleared completely (all links are removed),
+.TP
+.I search
+For a key (or a keyring): the key may be found by a search.
+For a keyring: keys and keyrings that are linked to by the
+keyring may be searched.
+.TP
+.I link
+Links may be created from keyrings to the key.
+The initial link to a key that is established when the key is created
+doesn't require this permission.
+.TP
+.I setattr
+The ownership details and security label of the key may be changed,
+the key's expiration time may be set, and the key may be revoked.
+.PP
+In addition to access rights, any active Linux Security Module (LSM) may
+prevent access to a key if its policy so dictates.
+A key may be given a
+security label or other attribute by the LSM;
+this label is retrievable via
+.BR keyctl_get_security (3).
+.PP
+See
+.BR keyctl_chown (3),
+.BR keyctl_describe (3),
+.BR keyctl_get_security (3),
+.BR keyctl_setperm (3),
+and
+.BR selinux (8)
+for more information.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Searching for keys
+One of the key features of the Linux key-management facility
+is the ability to find a key that a process is retaining.
+The
+.BR request_key (2)
+system call is the primary point of
+access for user-space applications to find a key.
+(Internally, the kernel has something similar available
+for use by internal components that make use of keys.)
+.PP
+The search algorithm works as follows:
+.IP (1) 5
+The process keyrings are searched in the following order: the
+.BR thread\-keyring (7)
+if it exists, the
+.BR process\-keyring (7)
+if it exists, and then either the
+.BR session\-keyring (7)
+if it exists or the
+.BR user\-session\-keyring (7)
+if that exists.
+.IP (2)
+If the caller was a process that was invoked by the
+.BR request_key (2)
+upcall mechanism, then the keyrings of the original caller of
+.BR request_key (2)
+will be searched as well.
+.IP (3)
+The search of a keyring tree is in breadth-first order:
+each keyring is searched first for a match,
+then the keyrings referred to by that keyring are searched.
+.IP (4)
+If a matching key is found that is valid,
+then the search terminates and that key is returned.
+.IP (5)
+If a matching key is found that has an error state attached,
+that error state is noted and the search continues.
+.IP (6)
+If no valid matching key is found,
+then the first noted error state is returned; otherwise, an
+.B ENOKEY
+error is returned.
+.PP
+It is also possible to search a specific keyring, in which case only steps
+(3) to (6) apply.
+.PP
+See
+.BR request_key (2)
+and
+.BR keyctl_search (3)
+for more information.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS On-demand key creation
+If a key cannot be found,
+.BR request_key (2)
+will, if given a
+.I callout_info
+argument, create a new key and then upcall to user space to
+instantiate the key.
+This allows keys to be created on an as-needed basis.
+.PP
+Typically,
+this will involve the kernel creating a new process that executes the
+.BR request\-key (8)
+program, which will then execute the appropriate handler based on its
+configuration.
+.PP
+The handler is passed a special authorization key that allows it
+and only it to instantiate the new key.
+This is also used to permit searches performed by the
+handler program to also search the requester's keyrings.
+.PP
+See
+.BR request_key (2),
+.BR keyctl_assume_authority (3),
+.BR keyctl_instantiate (3),
+.BR keyctl_negate (3),
+.BR keyctl_reject (3),
+.BR request\-key (8),
+and
+.BR request\-key.conf (5)
+for more information.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS /proc files
+The kernel provides various
+.I /proc
+files that expose information about keys or define limits on key usage.
+.TP
+.IR /proc/keys " (since Linux 2.6.10)"
+This file exposes a list of the keys for which the reading thread has
+.I view
+permission, providing various information about each key.
+The thread need not possess the key for it to be visible in this file.
+.\" David Howells, Dec 2016 linux-man@:
+.\" This [The thread need not possess the key for it to be visible in
+.\" this file.] is correct. See proc_keys_show() in security/keys/proc.c:
+.\"
+.\" rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW);
+.\" if (rc < 0)
+.\" return 0;
+.\"
+.\"Possibly it shouldn't be, but for now it is.
+.\"
+.IP
+The only keys included in the list are those that grant
+.I view
+permission to the reading process
+(regardless of whether or not it possesses them).
+LSM security checks are still performed,
+and may filter out further keys that the process is not authorized to view.
+.IP
+An example of the data that one might see in this file
+(with the columns numbered for easy reference below)
+is the following:
+.IP
+.EX
+ (1) (2) (3)(4) (5) (6) (7) (8) (9)
+009a2028 I\-\-Q\-\-\- 1 perm 3f010000 1000 1000 user krb_ccache:primary: 12
+1806c4ba I\-\-Q\-\-\- 1 perm 3f010000 1000 1000 keyring _pid: 2
+25d3a08f I\-\-Q\-\-\- 1 perm 1f3f0000 1000 65534 keyring _uid_ses.1000: 1
+28576bd8 I\-\-Q\-\-\- 3 perm 3f010000 1000 1000 keyring _krb: 1
+2c546d21 I\-\-Q\-\-\- 190 perm 3f030000 1000 1000 keyring _ses: 2
+30a4e0be I\-\-\-\-\-\- 4 2d 1f030000 1000 65534 keyring _persistent.1000: 1
+32100fab I\-\-Q\-\-\- 4 perm 1f3f0000 1000 65534 keyring _uid.1000: 2
+32a387ea I\-\-Q\-\-\- 1 perm 3f010000 1000 1000 keyring _pid: 2
+3ce56aea I\-\-Q\-\-\- 5 perm 3f030000 1000 1000 keyring _ses: 1
+.EE
+.IP
+The fields shown in each line of this file are as follows:
+.RS
+.TP
+ID (1)
+The ID (serial number) of the key, expressed in hexadecimal.
+.TP
+Flags (2)
+A set of flags describing the state of the key:
+.RS
+.TP
+I
+.\" KEY_FLAG_INSTANTIATED
+The key has been instantiated.
+.TP
+R
+.\" KEY_FLAG_REVOKED
+The key has been revoked.
+.TP
+D
+.\" KEY_FLAG_DEAD
+The key is dead (i.e., the key type has been unregistered).
+.\" unregister_key_type() in the kernel source
+(A key may be briefly in this state during garbage collection.)
+.TP
+Q
+.\" KEY_FLAG_IN_QUOTA
+The key contributes to the user's quota.
+.TP
+U
+.\" KEY_FLAG_USER_CONSTRUCT
+The key is under construction via a callback to user space;
+see
+.BR request\-key (2).
+.TP
+N
+.\" KEY_FLAG_NEGATIVE
+The key is negatively instantiated.
+.TP
+i
+.\" KEY_FLAG_INVALIDATED
+The key has been invalidated.
+.RE
+.TP
+Usage (3)
+This is a count of the number of kernel credential
+structures that are pinning the key
+(approximately: the number of threads and open file references
+that refer to this key).
+.TP
+Timeout (4)
+The amount of time until the key will expire,
+expressed in human-readable form (weeks, days, hours, minutes, and seconds).
+The string
+.I perm
+here means that the key is permanent (no timeout).
+The string
+.I expd
+means that the key has already expired,
+but has not yet been garbage collected.
+.TP
+Permissions (5)
+The key permissions, expressed as four hexadecimal bytes containing,
+from left to right, the possessor, user, group, and other permissions.
+Within each byte, the permission bits are as follows:
+.IP
+.PD 0
+.RS 12
+.TP
+0x01
+.I view
+.TP
+0x02
+.I read
+.TP
+0x04
+.I write
+.TP
+0x08
+.I search
+.TP
+0x10
+.I link
+.TP
+0x20
+.I setattr
+.RE
+.PD
+.TP
+UID (6)
+The user ID of the key owner.
+.TP
+GID (7)
+The group ID of the key.
+The value \-1 here means that the key has no group ID;
+this can occur in certain circumstances for keys created by the kernel.
+.TP
+Type (8)
+The key type (user, keyring, etc.)
+.TP
+Description (9)
+The key description (name).
+This field contains descriptive information about the key.
+For most key types, it has the form
+.IP
+.in +4n
+.EX
+name[: extra\-info]
+.EE
+.in
+.IP
+The
+.I name
+subfield is the key's description (name).
+The optional
+.I extra\-info
+field provides some further information about the key.
+The information that appears here depends on the key type, as follows:
+.RS
+.TP
+.IR """user""" " and " """logon"""
+The size in bytes of the key payload (expressed in decimal).
+.TP
+.I """keyring"""
+The number of keys linked to the keyring,
+or the string
+.I empty
+if there are no keys linked to the keyring.
+.TP
+.I """big_key"""
+The payload size in bytes, followed either by the string
+.IR [file] ,
+if the key payload exceeds the threshold that means that the
+payload is stored in a (swappable)
+.BR tmpfs (5)
+filesystem,
+or otherwise the string
+.IR [buff] ,
+indicating that the key is small enough to reside in kernel memory.
+.RE
+.IP
+For the
+.I """.request_key_auth"""
+key type
+(authorization key; see
+.BR request_key (2)),
+the description field has the form shown in the following example:
+.IP
+.in +4n
+.EX
+key:c9a9b19 pid:28880 ci:10
+.EE
+.in
+.IP
+The three subfields are as follows:
+.RS
+.TP
+.I key
+The hexadecimal ID of the key being instantiated in the requesting program.
+.TP
+.I pid
+The PID of the requesting program.
+.TP
+.I ci
+The length of the callout data with which the requested key should
+be instantiated
+(i.e., the length of the payload associated with the authorization key).
+.RE
+.RE
+.TP
+.IR /proc/key\-users " (since Linux 2.6.10)"
+This file lists various information for each user ID that
+has at least one key on the system.
+An example of the data that one might see in this file is the following:
+.IP
+.in +4n
+.EX
+ 0: 10 9/9 2/1000000 22/25000000
+ 42: 9 9/9 8/200 106/20000
+1000: 11 11/11 10/200 271/20000
+.EE
+.in
+.IP
+The fields shown in each line are as follows:
+.RS
+.TP
+.I uid
+The user ID.
+.TP
+.I usage
+This is a kernel-internal usage count for the kernel structure
+used to record key users.
+.TP
+.IR nkeys / nikeys
+The total number of keys owned by the user,
+and the number of those keys that have been instantiated.
+.TP
+.IR qnkeys / maxkeys
+The number of keys owned by the user,
+and the maximum number of keys that the user may own.
+.TP
+.IR qnbytes / maxbytes
+The number of bytes consumed in payloads of the keys owned by this user,
+and the upper limit on the number of bytes in key payloads for that user.
+.RE
+.TP
+.IR /proc/sys/kernel/keys/gc_delay " (since Linux 2.6.32)"
+.\" commit 5d135440faf7db8d566de0c6fab36b16cf9cfc3b
+The value in this file specifies the interval, in seconds,
+after which revoked and expired keys will be garbage collected.
+The purpose of having such an interval is so that there is a window
+of time where user space can see an error (respectively
+.B EKEYREVOKED
+and
+.BR EKEYEXPIRED )
+that indicates what happened to the key.
+.IP
+The default value in this file is 300 (i.e., 5 minutes).
+.TP
+.IR /proc/sys/kernel/keys/persistent_keyring_expiry " (since Linux 3.13)"
+.\" commit f36f8c75ae2e7d4da34f4c908cebdb4aa42c977e
+This file defines an interval, in seconds,
+to which the persistent keyring's expiration timer is reset
+each time the keyring is accessed (via
+.BR keyctl_get_persistent (3)
+or the
+.BR keyctl (2)
+.B KEYCTL_GET_PERSISTENT
+operation.)
+.IP
+The default value in this file is 259200 (i.e., 3 days).
+.PP
+The following files (which are writable by privileged processes)
+are used to enforce quotas on the number of keys
+and number of bytes of data that can be stored in key payloads:
+.TP
+.IR /proc/sys/kernel/keys/maxbytes " (since Linux 2.6.26)"
+.\" commit 0b77f5bfb45c13e1e5142374f9d6ca75292252a4
+.\" Previously: KEYQUOTA_MAX_BYTES 10000
+This is the maximum number of bytes of data that a nonroot user
+can hold in the payloads of the keys owned by the user.
+.IP
+The default value in this file is 20,000.
+.TP
+.IR /proc/sys/kernel/keys/maxkeys " (since Linux 2.6.26)"
+.\" commit 0b77f5bfb45c13e1e5142374f9d6ca75292252a4
+.\" Previously: KEYQUOTA_MAX_KEYS 100
+This is the maximum number of keys that a nonroot user may own.
+.IP
+The default value in this file is 200.
+.TP
+.IR /proc/sys/kernel/keys/root_maxbytes " (since Linux 2.6.26)"
+This is the maximum number of bytes of data that the root user
+(UID 0 in the root user namespace)
+can hold in the payloads of the keys owned by root.
+.IP
+.\"738c5d190f6540539a04baf36ce21d46b5da04bd
+The default value in this file is 25,000,000 (20,000 before Linux 3.17).
+.\" commit 0b77f5bfb45c13e1e5142374f9d6ca75292252a4
+.TP
+.IR /proc/sys/kernel/keys/root_maxkeys " (since Linux 2.6.26)"
+.\" commit 0b77f5bfb45c13e1e5142374f9d6ca75292252a4
+This is the maximum number of keys that the root user
+(UID 0 in the root user namespace)
+may own.
+.IP
+.\"738c5d190f6540539a04baf36ce21d46b5da04bd
+The default value in this file is 1,000,000 (200 before Linux 3.17).
+.PP
+With respect to keyrings,
+note that each link in a keyring consumes 4 bytes of the keyring payload.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SS Users
+The Linux key-management facility has a number of users and usages,
+but is not limited to those that already exist.
+.PP
+In-kernel users of this facility include:
+.TP
+Network filesystems - DNS
+The kernel uses the upcall mechanism provided by the keys to upcall to
+user space to do DNS lookups and then to cache the results.
+.TP
+AF_RXRPC and kAFS - Authentication
+The AF_RXRPC network protocol and the in-kernel AFS filesystem
+use keys to store the ticket needed to do secured or encrypted traffic.
+These are then looked up by
+network operations on AF_RXRPC and filesystem operations on kAFS.
+.TP
+NFS - User ID mapping
+The NFS filesystem uses keys to store mappings of
+foreign user IDs to local user IDs.
+.TP
+CIFS - Password
+The CIFS filesystem uses keys to store passwords for accessing remote shares.
+.TP
+Module verification
+The kernel build process can be made to cryptographically sign modules.
+That signature is then checked when a module is loaded.
+.PP
+User-space users of this facility include:
+.TP
+Kerberos key storage
+The MIT Kerberos 5 facility (libkrb5) can use keys to store authentication
+tokens which can be made to be automatically cleaned up a set time after
+the user last uses them,
+but until then permits them to hang around after the user
+has logged out so that
+.BR cron (8)
+scripts can use them.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.SH SEE ALSO
+.ad l
+.nh
+.BR keyctl (1),
+.BR add_key (2),
+.BR keyctl (2),
+.BR request_key (2),
+.BR keyctl (3),
+.BR keyutils (7),
+.BR persistent\-keyring (7),
+.BR process\-keyring (7),
+.BR session\-keyring (7),
+.BR thread\-keyring (7),
+.BR user\-keyring (7),
+.BR user\-session\-keyring (7),
+.BR pam_keyinit (8),
+.BR request\-key (8)
+.PP
+The kernel source files
+.I Documentation/crypto/asymmetric\-keys.txt
+and under
+.I Documentation/security/keys
+(or, before Linux 4.13, in the file
+.IR Documentation/security/keys.txt ).
diff --git a/man7/koi8-r.7 b/man7/koi8-r.7
new file mode 100644
index 0000000..65fc642
--- /dev/null
+++ b/man7/koi8-r.7
@@ -0,0 +1,169 @@
+'\" t
+.\" Copyright 2001 Alexey Mahotkin <alexm@hsys.msk.ru>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH KOI8-R 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+koi8-r \- Russian character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+RFC\ 1489 defines an 8-bit character set, KOI8-R.
+KOI8-R encodes the
+characters used in Russian.
+.SS KOI8-R characters
+The following table displays the characters in KOI8-R that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+200 128 80 ─ BOX DRAWINGS LIGHT HORIZONTAL
+201 129 81 │ BOX DRAWINGS LIGHT VERTICAL
+202 130 82 ┌ BOX DRAWINGS LIGHT DOWN AND RIGHT
+203 131 83 ┐ BOX DRAWINGS LIGHT DOWN AND LEFT
+204 132 84 └ BOX DRAWINGS LIGHT UP AND RIGHT
+205 133 85 ┘ BOX DRAWINGS LIGHT UP AND LEFT
+206 134 86 ├ BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+207 135 87 ┤ BOX DRAWINGS LIGHT VERTICAL AND LEFT
+210 136 88 ┬ BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+211 137 89 ┴ BOX DRAWINGS LIGHT UP AND HORIZONTAL
+212 138 8A ┼ BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+213 139 8B ▀ UPPER HALF BLOCK
+214 140 8C ▄ LOWER HALF BLOCK
+215 141 8D █ FULL BLOCK
+216 142 8E ▌ LEFT HALF BLOCK
+217 143 8F ▐ RIGHT HALF BLOCK
+220 144 90 ░ LIGHT SHADE
+221 145 91 ▒ MEDIUM SHADE
+222 146 92 ▓ DARK SHADE
+223 147 93 ⌠ TOP HALF INTEGRAL
+224 148 94 ■ BLACK SQUARE
+225 149 95 ∙ BULLET OPERATOR
+226 150 96 √ SQUARE ROOT
+227 151 97 ≈ ALMOST EQUAL TO
+230 152 98 ≤ LESS-THAN OR EQUAL TO
+231 153 99 ≥ GREATER-THAN OR EQUAL TO
+232 154 9A   NO-BREAK SPACE
+233 155 9B ⌡ BOTTOM HALF INTEGRAL
+234 156 9C ° DEGREE SIGN
+235 157 9D ² SUPERSCRIPT TWO
+236 158 9E · MIDDLE DOT
+237 159 9F ÷ DIVISION SIGN
+240 160 A0 ═ BOX DRAWINGS DOUBLE HORIZONTAL
+241 161 A1 ║ BOX DRAWINGS DOUBLE VERTICAL
+242 162 A2 ╒ BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+243 163 A3 ё CYRILLIC SMALL LETTER IO
+244 164 A4 ╓ BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+245 165 A5 ╔ BOX DRAWINGS DOUBLE DOWN AND RIGHT
+246 166 A6 ╕ BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+247 167 A7 ╖ BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+250 168 A8 ╗ BOX DRAWINGS DOUBLE DOWN AND LEFT
+251 169 A9 ╘ BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+252 170 AA ╙ BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+253 171 AB ╚ BOX DRAWINGS DOUBLE UP AND RIGHT
+254 172 AC ╛ BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+255 173 AD ╜ BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+256 174 AE ╝ BOX DRAWINGS DOUBLE UP AND LEFT
+257 175 AF ╞ BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+260 176 B0 ╟ BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+261 177 B1 ╠ BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+262 178 B2 ╡ BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+263 179 B3 Ё CYRILLIC CAPITAL LETTER IO
+264 180 B4 ╢ BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+265 181 B5 ╣ BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+266 182 B6 ╤ BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+267 183 B7 ╥ BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+270 184 B8 ╦ BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+271 185 B9 ╧ BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+272 186 BA ╨ BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+273 187 BB ╩ BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+274 188 BC ╪ T{
+BOX DRAWINGS VERTICAL SINGLE
+.br
+AND HORIZONTAL DOUBLE
+T}
+275 189 BD ╫ T{
+BOX DRAWINGS VERTICAL DOUBLE
+.br
+AND HORIZONTAL SINGLE
+T}
+276 190 BE ╬ BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+277 191 BF © COPYRIGHT SIGN
+300 192 C0 ю CYRILLIC SMALL LETTER YU
+301 193 C1 а CYRILLIC SMALL LETTER A
+302 194 C2 б CYRILLIC SMALL LETTER BE
+303 195 C3 ц CYRILLIC SMALL LETTER TSE
+304 196 C4 д CYRILLIC SMALL LETTER DE
+305 197 C5 е CYRILLIC SMALL LETTER IE
+306 198 C6 ф CYRILLIC SMALL LETTER EF
+307 199 C7 г CYRILLIC SMALL LETTER GHE
+310 200 C8 х CYRILLIC SMALL LETTER HA
+311 201 C9 и CYRILLIC SMALL LETTER I
+312 202 CA й CYRILLIC SMALL LETTER SHORT I
+313 203 CB к CYRILLIC SMALL LETTER KA
+314 204 CC л CYRILLIC SMALL LETTER EL
+315 205 CD м CYRILLIC SMALL LETTER EM
+316 206 CE н CYRILLIC SMALL LETTER EN
+317 207 CF о CYRILLIC SMALL LETTER O
+320 208 D0 п CYRILLIC SMALL LETTER PE
+321 209 D1 я CYRILLIC SMALL LETTER YA
+322 210 D2 р CYRILLIC SMALL LETTER ER
+323 211 D3 с CYRILLIC SMALL LETTER ES
+324 212 D4 т CYRILLIC SMALL LETTER TE
+325 213 D5 у CYRILLIC SMALL LETTER U
+326 214 D6 ж CYRILLIC SMALL LETTER ZHE
+327 215 D7 в CYRILLIC SMALL LETTER VE
+330 216 D8 ь CYRILLIC SMALL LETTER SOFT SIGN
+331 217 D9 ы CYRILLIC SMALL LETTER YERU
+332 218 DA з CYRILLIC SMALL LETTER ZE
+333 219 DB ш CYRILLIC SMALL LETTER SHA
+334 220 DC э CYRILLIC SMALL LETTER E
+335 221 DD щ CYRILLIC SMALL LETTER SHCHA
+336 222 DE ч CYRILLIC SMALL LETTER CHE
+337 223 DF ъ CYRILLIC SMALL LETTER HARD SIGN
+340 224 E0 Ю CYRILLIC CAPITAL LETTER YU
+341 225 E1 А CYRILLIC CAPITAL LETTER A
+342 226 E2 Б CYRILLIC CAPITAL LETTER BE
+343 227 E3 Ц CYRILLIC CAPITAL LETTER TSE
+344 228 E4 Д CYRILLIC CAPITAL LETTER DE
+345 229 E5 Е CYRILLIC CAPITAL LETTER IE
+346 230 E6 Ф CYRILLIC CAPITAL LETTER EF
+347 231 E7 Г CYRILLIC CAPITAL LETTER GHE
+350 232 E8 Х CYRILLIC CAPITAL LETTER HA
+351 233 E9 И CYRILLIC CAPITAL LETTER I
+352 234 EA Й CYRILLIC CAPITAL LETTER SHORT I
+353 235 EB К CYRILLIC CAPITAL LETTER KA
+354 236 EC Л CYRILLIC CAPITAL LETTER EL
+355 237 ED М CYRILLIC CAPITAL LETTER EM
+356 238 EE Н CYRILLIC CAPITAL LETTER EN
+357 239 EF О CYRILLIC CAPITAL LETTER O
+360 240 F0 П CYRILLIC CAPITAL LETTER PE
+361 241 F1 Я CYRILLIC CAPITAL LETTER YA
+362 242 F2 Р CYRILLIC CAPITAL LETTER ER
+363 243 F3 С CYRILLIC CAPITAL LETTER ES
+364 244 F4 Т CYRILLIC CAPITAL LETTER TE
+365 245 F5 У CYRILLIC CAPITAL LETTER U
+366 246 F6 Ж CYRILLIC CAPITAL LETTER ZHE
+367 247 F7 В CYRILLIC CAPITAL LETTER VE
+370 248 F8 Ь CYRILLIC CAPITAL LETTER SOFT SIGN
+371 249 F9 Ы CYRILLIC CAPITAL LETTER YERU
+372 250 FA З CYRILLIC CAPITAL LETTER ZE
+373 251 FB Ш CYRILLIC CAPITAL LETTER SHA
+374 252 FC Э CYRILLIC CAPITAL LETTER E
+375 253 FD Щ CYRILLIC CAPITAL LETTER SHCHA
+376 254 FE Ч CYRILLIC CAPITAL LETTER CHE
+377 255 FF Ъ CYRILLIC CAPITAL LETTER HARD SIGN
+.TE
+.SH NOTES
+The differences with KOI8-U are in the hex positions
+A4, A6, A7, AD, B4, B6, B7, and BD.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR cp1251 (7),
+.BR iso_8859\-5 (7),
+.BR koi8\-u (7),
+.BR utf\-8 (7)
diff --git a/man7/koi8-u.7 b/man7/koi8-u.7
new file mode 100644
index 0000000..c515c2a
--- /dev/null
+++ b/man7/koi8-u.7
@@ -0,0 +1,175 @@
+'\" t
+.\" Copyright 2009 Lefteris Dimitroulakis <edimitro at tee.gr>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" 2009-01-15, mtk, Some edits
+.\"
+.TH KOI8-U 7 2022-12-15 "Linux man-pages 6.05.01"
+.SH NAME
+koi8-u \- Ukrainian character set encoded in octal, decimal,
+and hexadecimal
+.SH DESCRIPTION
+RFC\ 2310 defines an 8-bit character set, KOI8-U.
+KOI8-U encodes the
+characters used in Ukrainian and Byelorussian.
+.SS KOI8-U characters
+The following table displays the characters in KOI8-U that
+are printable and unlisted in the
+.BR ascii (7)
+manual page.
+.TS
+l l l c lp-1.
+Oct Dec Hex Char Description
+_
+200 128 80 ─ BOX DRAWINGS LIGHT HORIZONTAL
+201 129 81 │ BOX DRAWINGS LIGHT VERTICAL
+202 130 82 ┌ BOX DRAWINGS LIGHT DOWN AND RIGHT
+203 131 83 ┐ BOX DRAWINGS LIGHT DOWN AND LEFT
+204 132 84 └ BOX DRAWINGS LIGHT UP AND RIGHT
+205 133 85 ┘ BOX DRAWINGS LIGHT UP AND LEFT
+206 134 86 ├ BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+207 135 87 ┤ BOX DRAWINGS LIGHT VERTICAL AND LEFT
+210 136 88 ┬ BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+211 137 89 ┴ BOX DRAWINGS LIGHT UP AND HORIZONTAL
+212 138 8A ┼ BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+213 139 8B ▀ UPPER HALF BLOCK
+214 140 8C ▄ LOWER HALF BLOCK
+215 141 8D █ FULL BLOCK
+216 142 8E ▌ LEFT HALF BLOCK
+217 143 8F ▐ RIGHT HALF BLOCK
+220 144 90 ░ LIGHT SHADE
+221 145 91 ▒ MEDIUM SHADE
+222 146 92 ▓ DARK SHADE
+223 147 93 ⌠ TOP HALF INTEGRAL
+224 148 94 ■ BLACK SQUARE
+225 149 95 ∙ BULLET OPERATOR
+226 150 96 √ SQUARE ROOT
+227 151 97 ≈ ALMOST EQUAL TO
+230 152 98 ≤ LESS-THAN OR EQUAL TO
+231 153 99 ≥ GREATER-THAN OR EQUAL TO
+232 154 9A   NO-BREAK SPACE
+233 155 9B ⌡ BOTTOM HALF INTEGRAL
+234 156 9C ° DEGREE SIGN
+235 157 9D ² SUPERSCRIPT TWO
+236 158 9E · MIDDLE DOT
+237 159 9F ÷ DIVISION SIGN
+240 160 A0 ═ BOX DRAWINGS DOUBLE HORIZONTAL
+241 161 A1 ║ BOX DRAWINGS DOUBLE VERTICAL
+242 162 A2 ╒ BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+243 163 A3 ё CYRILLIC SMALL LETTER IO
+244 164 A4 є CYRILLIC SMALL LETTER UKRAINIAN IE
+245 165 A5 ╔ BOX DRAWINGS DOUBLE DOWN AND RIGHT
+246 166 A6 і T{
+CYRILLIC SMALL LETTER
+.br
+BYELORUSSIAN-UKRAINIAN I
+T}
+247 167 A7 ї CYRILLIC SMALL LETTER YI (Ukrainian)
+250 168 A8 ╗ BOX DRAWINGS DOUBLE DOWN AND LEFT
+251 169 A9 ╘ BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+252 170 AA ╙ BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+253 171 AB ╚ BOX DRAWINGS DOUBLE UP AND RIGHT
+254 172 AC ╛ BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+255 173 AD ґ CYRILLIC SMALL LETTER GHE WITH UPTURN
+256 174 AE ╝ BOX DRAWINGS DOUBLE UP AND LEFT
+257 175 AF ╞ BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+260 176 B0 ╟ BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+261 177 B1 ╠ BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+262 178 B2 ╡ BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+263 179 B3 Ё CYRILLIC CAPITAL LETTER IO
+264 180 B4 Є CYRILLIC CAPITAL LETTER UKRAINIAN IE
+265 181 B5 ╣ BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+266 182 B6 І T{
+CYRILLIC CAPITAL LETTER
+.br
+BYELORUSSIAN-UKRAINIAN I
+T}
+267 183 B7 Ї CYRILLIC CAPITAL LETTER YI (Ukrainian)
+270 184 B8 ╦ BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+271 185 B9 ╧ BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+272 186 BA ╨ BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+273 187 BB ╩ BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+274 188 BC ╪ T{
+BOX DRAWINGS VERTICAL SINGLE
+.br
+AND HORIZONTAL DOUBLE
+T}
+275 189 BD Ґ CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+276 190 BE ╬ BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+277 191 BF © COPYRIGHT SIGN
+300 192 C0 ю CYRILLIC SMALL LETTER YU
+301 193 C1 а CYRILLIC SMALL LETTER A
+302 194 C2 б CYRILLIC SMALL LETTER BE
+303 195 C3 ц CYRILLIC SMALL LETTER TSE
+304 196 C4 д CYRILLIC SMALL LETTER DE
+305 197 C5 е CYRILLIC SMALL LETTER IE
+306 198 C6 ф CYRILLIC SMALL LETTER EF
+307 199 C7 г CYRILLIC SMALL LETTER GHE
+310 200 C8 х CYRILLIC SMALL LETTER HA
+311 201 C9 и CYRILLIC SMALL LETTER I
+312 202 CA й CYRILLIC SMALL LETTER SHORT I
+313 203 CB к CYRILLIC SMALL LETTER KA
+314 204 CC л CYRILLIC SMALL LETTER EL
+315 205 CD м CYRILLIC SMALL LETTER EM
+316 206 CE н CYRILLIC SMALL LETTER EN
+317 207 CF о CYRILLIC SMALL LETTER O
+320 208 D0 п CYRILLIC SMALL LETTER PE
+321 209 D1 я CYRILLIC SMALL LETTER YA
+322 210 D2 р CYRILLIC SMALL LETTER ER
+323 211 D3 с CYRILLIC SMALL LETTER ES
+324 212 D4 т CYRILLIC SMALL LETTER TE
+325 213 D5 у CYRILLIC SMALL LETTER U
+326 214 D6 ж CYRILLIC SMALL LETTER ZHE
+327 215 D7 в CYRILLIC SMALL LETTER VE
+330 216 D8 ь CYRILLIC SMALL LETTER SOFT SIGN
+331 217 D9 ы CYRILLIC SMALL LETTER YERU
+332 218 DA з CYRILLIC SMALL LETTER ZE
+333 219 DB ш CYRILLIC SMALL LETTER SHA
+334 220 DC э CYRILLIC SMALL LETTER E
+335 221 DD щ CYRILLIC SMALL LETTER SHCHA
+336 222 DE ч CYRILLIC SMALL LETTER CHE
+337 223 DF ъ CYRILLIC SMALL LETTER HARD SIGN
+340 224 E0 Ю CYRILLIC CAPITAL LETTER YU
+341 225 E1 А CYRILLIC CAPITAL LETTER A
+342 226 E2 Б CYRILLIC CAPITAL LETTER BE
+343 227 E3 Ц CYRILLIC CAPITAL LETTER TSE
+344 228 E4 Д CYRILLIC CAPITAL LETTER DE
+345 229 E5 Е CYRILLIC CAPITAL LETTER IE
+346 230 E6 Ф CYRILLIC CAPITAL LETTER EF
+347 231 E7 Г CYRILLIC CAPITAL LETTER GHE
+350 232 E8 Х CYRILLIC CAPITAL LETTER HA
+351 233 E9 И CYRILLIC CAPITAL LETTER I
+352 234 EA Й CYRILLIC CAPITAL LETTER SHORT I
+353 235 EB К CYRILLIC CAPITAL LETTER KA
+354 236 EC Л CYRILLIC CAPITAL LETTER EL
+355 237 ED М CYRILLIC CAPITAL LETTER EM
+356 238 EE Н CYRILLIC CAPITAL LETTER EN
+357 239 EF О CYRILLIC CAPITAL LETTER O
+360 240 F0 П CYRILLIC CAPITAL LETTER PE
+361 241 F1 Я CYRILLIC CAPITAL LETTER YA
+362 242 F2 Р CYRILLIC CAPITAL LETTER ER
+363 243 F3 С CYRILLIC CAPITAL LETTER ES
+364 244 F4 Т CYRILLIC CAPITAL LETTER TE
+365 245 F5 У CYRILLIC CAPITAL LETTER U
+366 246 F6 Ж CYRILLIC CAPITAL LETTER ZHE
+367 247 F7 В CYRILLIC CAPITAL LETTER VE
+370 248 F8 Ь CYRILLIC CAPITAL LETTER SOFT SIGN
+371 249 F9 Ы CYRILLIC CAPITAL LETTER YERU
+372 250 FA З CYRILLIC CAPITAL LETTER ZE
+373 251 FB Ш CYRILLIC CAPITAL LETTER SHA
+374 252 FC Э CYRILLIC CAPITAL LETTER E
+375 253 FD Щ CYRILLIC CAPITAL LETTER SHCHA
+376 254 FE Ч CYRILLIC CAPITAL LETTER CHE
+377 255 FF Ъ CYRILLIC CAPITAL LETTER HARD SIGN
+.TE
+.SH NOTES
+The differences from KOI8-R are in the hex positions
+A4, A6, A7, AD, B4, B6, B7, and BD.
+.SH SEE ALSO
+.BR ascii (7),
+.BR charsets (7),
+.BR cp1251 (7),
+.BR iso_8859\-5 (7),
+.BR koi8\-r (7),
+.BR utf\-8 (7)
diff --git a/man7/landlock.7 b/man7/landlock.7
new file mode 100644
index 0000000..96f8217
--- /dev/null
+++ b/man7/landlock.7
@@ -0,0 +1,586 @@
+'\" t
+.\" Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+.\" Copyright © 2019-2020 ANSSI
+.\" Copyright © 2021 Microsoft Corporation
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH Landlock 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+Landlock \- unprivileged access-control
+.SH DESCRIPTION
+Landlock is an access-control system that enables any processes to
+securely restrict themselves and their future children.
+Because Landlock is a stackable Linux Security Module (LSM),
+it makes it possible to create safe security sandboxes
+as new security layers in addition to
+the existing system-wide access-controls.
+This kind of sandbox is expected to help mitigate
+the security impact of bugs,
+and unexpected or malicious behaviors in applications.
+.PP
+A Landlock security policy is a set of access rights
+(e.g., open a file in read-only, make a directory, etc.)
+tied to a file hierarchy.
+Such policy can be configured and enforced by processes for themselves
+using three system calls:
+.IP \[bu] 3
+.BR landlock_create_ruleset (2)
+creates a new ruleset;
+.IP \[bu]
+.BR landlock_add_rule (2)
+adds a new rule to a ruleset;
+.IP \[bu]
+.BR landlock_restrict_self (2)
+enforces a ruleset on the calling thread.
+.PP
+To be able to use these system calls,
+the running kernel must support Landlock and
+it must be enabled at boot time.
+.\"
+.SS Landlock rules
+A Landlock rule describes an action on an object.
+An object is currently a file hierarchy,
+and the related filesystem actions are defined with access rights (see
+.BR landlock_add_rule (2)).
+A set of rules is aggregated in a ruleset,
+which can then restrict the thread enforcing it,
+and its future children.
+.\"
+.SS Filesystem actions
+These flags enable to restrict a sandboxed process to a
+set of actions on files and directories.
+Files or directories opened before the sandboxing
+are not subject to these restrictions.
+See
+.BR landlock_add_rule (2)
+and
+.BR landlock_create_ruleset (2)
+for more context.
+.PP
+A file can only receive these access rights:
+.TP
+.B LANDLOCK_ACCESS_FS_EXECUTE
+Execute a file.
+.TP
+.B LANDLOCK_ACCESS_FS_WRITE_FILE
+Open a file with write access.
+.IP
+When opening files for writing,
+you will often additionally need the
+.B LANDLOCK_ACCESS_FS_TRUNCATE
+right.
+In many cases,
+these system calls truncate existing files when overwriting them
+(e.g.,
+.BR creat (2)).
+.TP
+.B LANDLOCK_ACCESS_FS_READ_FILE
+Open a file with read access.
+.TP
+.B LANDLOCK_ACCESS_FS_TRUNCATE
+Truncate a file with
+.BR truncate (2),
+.BR ftruncate (2),
+.BR creat (2),
+or
+.BR open (2)
+with
+.BR O_TRUNC .
+Whether an opened file can be truncated with
+.BR ftruncate (2)
+is determined during
+.BR open (2),
+in the same way as read and write permissions are checked during
+.BR open (2)
+using
+.B LANDLOCK_ACCESS_FS_READ_FILE
+and
+.BR LANDLOCK_ACCESS_FS_WRITE_FILE .
+This access right is available since the third version of the Landlock ABI.
+.PP
+A directory can receive access rights related to files or directories.
+The following access right is applied to the directory itself,
+and the directories beneath it:
+.TP
+.B LANDLOCK_ACCESS_FS_READ_DIR
+Open a directory or list its content.
+.PP
+However,
+the following access rights only apply to the content of a directory,
+not the directory itself:
+.TP
+.B LANDLOCK_ACCESS_FS_REMOVE_DIR
+Remove an empty directory or rename one.
+.TP
+.B LANDLOCK_ACCESS_FS_REMOVE_FILE
+Unlink (or rename) a file.
+.TP
+.B LANDLOCK_ACCESS_FS_MAKE_CHAR
+Create (or rename or link) a character device.
+.TP
+.B LANDLOCK_ACCESS_FS_MAKE_DIR
+Create (or rename) a directory.
+.TP
+.B LANDLOCK_ACCESS_FS_MAKE_REG
+Create (or rename or link) a regular file.
+.TP
+.B LANDLOCK_ACCESS_FS_MAKE_SOCK
+Create (or rename or link) a UNIX domain socket.
+.TP
+.B LANDLOCK_ACCESS_FS_MAKE_FIFO
+Create (or rename or link) a named pipe.
+.TP
+.B LANDLOCK_ACCESS_FS_MAKE_BLOCK
+Create (or rename or link) a block device.
+.TP
+.B LANDLOCK_ACCESS_FS_MAKE_SYM
+Create (or rename or link) a symbolic link.
+.TP
+.B LANDLOCK_ACCESS_FS_REFER
+Link or rename a file from or to a different directory
+(i.e., reparent a file hierarchy).
+.IP
+This access right is available since the second version of the Landlock ABI.
+.IP
+This is the only access right which is denied by default by any ruleset,
+even if the right is not specified as handled at ruleset creation time.
+The only way to make a ruleset grant this right
+is to explicitly allow it for a specific directory
+by adding a matching rule to the ruleset.
+.IP
+In particular, when using the first Landlock ABI version,
+Landlock will always deny attempts to reparent files
+between different directories.
+.IP
+In addition to the source and destination directories having the
+.B LANDLOCK_ACCESS_FS_REFER
+access right,
+the attempted link or rename operation must meet the following constraints:
+.RS
+.IP \[bu] 3
+The reparented file may not gain more access rights in the destination directory
+than it previously had in the source directory.
+If this is attempted, the operation results in an
+.B EXDEV
+error.
+.IP \[bu]
+When linking or renaming, the
+.BI LANDLOCK_ACCESS_FS_MAKE_ *
+right for the respective file type must be granted
+for the destination directory.
+Otherwise, the operation results in an
+.B EACCES
+error.
+.IP \[bu]
+When renaming, the
+.BI LANDLOCK_ACCESS_FS_REMOVE_ *
+right for the respective file type must be granted
+for the source directory.
+Otherwise, the operation results in an
+.B EACCES
+error.
+.RE
+.IP
+If multiple requirements are not met, the
+.B EACCES
+error code takes precedence over
+.BR EXDEV .
+.\"
+.SS Layers of file path access rights
+Each time a thread enforces a ruleset on itself,
+it updates its Landlock domain with a new layer of policy.
+Indeed, this complementary policy is composed with the
+potentially other rulesets already restricting this thread.
+A sandboxed thread can then safely add more constraints to itself with a
+new enforced ruleset.
+.PP
+One policy layer grants access to a file path
+if at least one of its rules encountered on the path grants the access.
+A sandboxed thread can only access a file path
+if all its enforced policy layers grant the access
+as well as all the other system access controls
+(e.g., filesystem DAC, other LSM policies, etc.).
+.\"
+.SS Bind mounts and OverlayFS
+Landlock enables restricting access to file hierarchies,
+which means that these access rights can be propagated with bind mounts
+(cf.
+.BR mount_namespaces (7))
+but not with OverlayFS.
+.PP
+A bind mount mirrors a source file hierarchy to a destination.
+The destination hierarchy is then composed of the exact same files,
+on which Landlock rules can be tied,
+either via the source or the destination path.
+These rules restrict access when they are encountered on a path,
+which means that they can restrict access to
+multiple file hierarchies at the same time,
+whether these hierarchies are the result of bind mounts or not.
+.PP
+An OverlayFS mount point consists of upper and lower layers.
+These layers are combined in a merge directory, result of the mount point.
+This merge hierarchy may include files from the upper and lower layers,
+but modifications performed on the merge hierarchy
+only reflect on the upper layer.
+From a Landlock policy point of view,
+each of the OverlayFS layers and merge hierarchies is standalone and
+contains its own set of files and directories,
+which is different from a bind mount.
+A policy restricting an OverlayFS layer will not restrict
+the resulted merged hierarchy, and vice versa.
+Landlock users should then only think about file hierarchies they want to
+allow access to, regardless of the underlying filesystem.
+.\"
+.SS Inheritance
+Every new thread resulting from a
+.BR clone (2)
+inherits Landlock domain restrictions from its parent.
+This is similar to the
+.BR seccomp (2)
+inheritance or any other LSM dealing with tasks'
+.BR credentials (7).
+For instance, one process's thread may apply Landlock rules to itself,
+but they will not be automatically applied to other sibling threads
+(unlike POSIX thread credential changes, cf.
+.BR nptl (7)).
+.PP
+When a thread sandboxes itself,
+we have the guarantee that the related security policy
+will stay enforced on all this thread's descendants.
+This allows creating standalone and modular security policies
+per application,
+which will automatically be composed between themselves
+according to their run-time parent policies.
+.\"
+.SS Ptrace restrictions
+A sandboxed process has less privileges than a non-sandboxed process and
+must then be subject to additional restrictions
+when manipulating another process.
+To be allowed to use
+.BR ptrace (2)
+and related syscalls on a target process,
+a sandboxed process should have a subset of the target process rules,
+which means the tracee must be in a sub-domain of the tracer.
+.\"
+.SS Truncating files
+The operations covered by
+.B LANDLOCK_ACCESS_FS_WRITE_FILE
+and
+.B LANDLOCK_ACCESS_FS_TRUNCATE
+both change the contents of a file and sometimes overlap in
+non-intuitive ways.
+It is recommended to always specify both of these together.
+.PP
+A particularly surprising example is
+.BR creat (2).
+The name suggests that this system call requires
+the rights to create and write files.
+However, it also requires the truncate right
+if an existing file under the same name is already present.
+.PP
+It should also be noted that truncating files does not require the
+.B LANDLOCK_ACCESS_FS_WRITE_FILE
+right.
+Apart from the
+.BR truncate (2)
+system call, this can also be done through
+.BR open (2)
+with the flags
+.IR "O_RDONLY\ |\ O_TRUNC" .
+.PP
+When opening a file, the availability of the
+.B LANDLOCK_ACCESS_FS_TRUNCATE
+right is associated with the newly created file descriptor
+and will be used for subsequent truncation attempts using
+.BR ftruncate (2).
+The behavior is similar to opening a file for reading or writing,
+where permissions are checked during
+.BR open (2),
+but not during the subsequent
+.BR read (2)
+and
+.BR write (2)
+calls.
+.PP
+As a consequence,
+it is possible to have multiple open file descriptors for the same file,
+where one grants the right to truncate the file and the other does not.
+It is also possible to pass such file descriptors between processes,
+keeping their Landlock properties,
+even when these processes do not have an enforced Landlock ruleset.
+.SH VERSIONS
+Landlock was introduced in Linux 5.13.
+.PP
+To determine which Landlock features are available,
+users should query the Landlock ABI version:
+.TS
+box;
+ntb| ntb| lbx
+nt| nt| lbx.
+ABI Kernel Newly introduced access rights
+_ _ _
+1 5.13 LANDLOCK_ACCESS_FS_EXECUTE
+\^ \^ LANDLOCK_ACCESS_FS_WRITE_FILE
+\^ \^ LANDLOCK_ACCESS_FS_READ_FILE
+\^ \^ LANDLOCK_ACCESS_FS_READ_DIR
+\^ \^ LANDLOCK_ACCESS_FS_REMOVE_DIR
+\^ \^ LANDLOCK_ACCESS_FS_REMOVE_FILE
+\^ \^ LANDLOCK_ACCESS_FS_MAKE_CHAR
+\^ \^ LANDLOCK_ACCESS_FS_MAKE_DIR
+\^ \^ LANDLOCK_ACCESS_FS_MAKE_REG
+\^ \^ LANDLOCK_ACCESS_FS_MAKE_SOCK
+\^ \^ LANDLOCK_ACCESS_FS_MAKE_FIFO
+\^ \^ LANDLOCK_ACCESS_FS_MAKE_BLOCK
+\^ \^ LANDLOCK_ACCESS_FS_MAKE_SYM
+_ _ _
+2 5.19 LANDLOCK_ACCESS_FS_REFER
+_ _ _
+3 6.2 LANDLOCK_ACCESS_FS_TRUNCATE
+.TE
+.sp 1
+.PP
+Users should use the Landlock ABI version rather than the kernel version
+to determine which features are available.
+The mainline kernel versions listed here are only included for orientation.
+Kernels from other sources may contain backported features,
+and their version numbers may not match.
+.PP
+To query the running kernel's Landlock ABI version,
+programs may pass the
+.B LANDLOCK_CREATE_RULESET_VERSION
+flag to
+.BR landlock_create_ruleset (2).
+.PP
+When building fallback mechanisms for compatibility with older kernels,
+users are advised to consider the special semantics of the
+.B LANDLOCK_ACCESS_FS_REFER
+access right:
+In ABI v1,
+linking and moving of files between different directories is always forbidden,
+so programs relying on such operations are only compatible
+with Landlock ABI v2 and higher.
+.SH NOTES
+Landlock is enabled by
+.BR CONFIG_SECURITY_LANDLOCK .
+The
+.I lsm=lsm1,...,lsmN
+command line parameter controls the sequence of the initialization of
+Linux Security Modules.
+It must contain the string
+.I landlock
+to enable Landlock.
+If the command line parameter is not specified,
+the initialization falls back to the value of the deprecated
+.I security=
+command line parameter and further to the value of
+.BR CONFIG_LSM .
+We can check that Landlock is enabled by looking for
+.I landlock: Up and running.
+in kernel logs.
+.SH CAVEATS
+It is currently not possible to restrict some file-related actions
+accessible through these system call families:
+.BR chdir (2),
+.BR stat (2),
+.BR flock (2),
+.BR chmod (2),
+.BR chown (2),
+.BR setxattr (2),
+.BR utime (2),
+.BR ioctl (2),
+.BR fcntl (2),
+.BR access (2).
+Future Landlock evolutions will enable to restrict them.
+.SH EXAMPLES
+We first need to create the ruleset that will contain our rules.
+.PP
+For this example,
+the ruleset will contain rules that only allow read actions,
+but write actions will be denied.
+The ruleset then needs to handle both of these kinds of actions.
+See the
+.B DESCRIPTION
+section for the description of filesystem actions.
+.PP
+.in +4n
+.EX
+struct landlock_ruleset_attr attr = {0};
+int ruleset_fd;
+\&
+attr.handled_access_fs =
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_CHAR |
+ LANDLOCK_ACCESS_FS_MAKE_DIR |
+ LANDLOCK_ACCESS_FS_MAKE_REG |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO |
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK |
+ LANDLOCK_ACCESS_FS_MAKE_SYM |
+ LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_TRUNCATE;
+.EE
+.in
+.PP
+To be compatible with older Linux versions,
+we detect the available Landlock ABI version,
+and only use the available subset of access rights:
+.PP
+.in +4n
+.EX
+/*
+ * Table of available file system access rights by ABI version,
+ * numbers hardcoded to keep the example short.
+ */
+__u64 landlock_fs_access_rights[] = {
+ (LANDLOCK_ACCESS_FS_MAKE_SYM << 1) \- 1, /* v1 */
+ (LANDLOCK_ACCESS_FS_REFER << 1) \- 1, /* v2: add "refer" */
+ (LANDLOCK_ACCESS_FS_TRUNCATE << 1) \- 1, /* v3: add "truncate" */
+};
+\&
+int abi = landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION);
+if (abi == \-1) {
+ /*
+ * Kernel too old, not compiled with Landlock,
+ * or Landlock was not enabled at boot time.
+ */
+ perror("Unable to use Landlock");
+ return; /* Graceful fallback: Do nothing. */
+}
+abi = MIN(abi, 3);
+\&
+/* Only use the available rights in the ruleset. */
+attr.handled_access_fs &= landlock_fs_access_rights[abi \- 1];
+.EE
+.in
+.PP
+The available access rights for each ABI version are listed in the
+.B VERSIONS
+section.
+.PP
+If our program needed to create hard links
+or rename files between different directories
+.RB ( LANDLOCK_ACCESS_FS_REFER ),
+we would require the following change to the backwards compatibility logic:
+Directory reparenting is not possible
+in a process restricted with Landlock ABI version 1.
+Therefore,
+if the program needed to do file reparenting,
+and if only Landlock ABI version 1 was available,
+we could not restrict the process.
+.PP
+Now that the ruleset attributes are determined,
+we create the Landlock ruleset
+and acquire a file descriptor as a handle to it,
+using
+.BR landlock_create_ruleset (2):
+.PP
+.in +4n
+.EX
+ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+if (ruleset_fd == \-1) {
+ perror("Failed to create a ruleset");
+ exit(EXIT_FAILURE);
+}
+.EE
+.in
+.PP
+We can now add a new rule to the ruleset through the ruleset's file descriptor.
+The requested access rights must be a subset of the access rights
+which were specified in
+.I attr.handled_access_fs
+at ruleset creation time.
+.PP
+In this example, the rule will only allow reading the file hierarchy
+.IR /usr .
+Without another rule, write actions would then be denied by the ruleset.
+To add
+.I /usr
+to the ruleset, we open it with the
+.I O_PATH
+flag and fill the
+.I struct landlock_path_beneath_attr
+with this file descriptor.
+.PP
+.in +4n
+.EX
+struct landlock_path_beneath_attr path_beneath = {0};
+int err;
+\&
+path_beneath.allowed_access =
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR;
+\&
+path_beneath.parent_fd = open("/usr", O_PATH | O_CLOEXEC);
+if (path_beneath.parent_fd == \-1) {
+ perror("Failed to open file");
+ close(ruleset_fd);
+ exit(EXIT_FAILURE);
+}
+err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0);
+close(path_beneath.parent_fd);
+if (err) {
+ perror("Failed to update ruleset");
+ close(ruleset_fd);
+ exit(EXIT_FAILURE);
+}
+.EE
+.in
+.PP
+We now have a ruleset with one rule allowing read access to
+.I /usr
+while denying all other handled accesses for the filesystem.
+The next step is to restrict the current thread from gaining more
+privileges
+(e.g., thanks to a set-user-ID binary).
+.PP
+.in +4n
+.EX
+if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ perror("Failed to restrict privileges");
+ close(ruleset_fd);
+ exit(EXIT_FAILURE);
+}
+.EE
+.in
+.PP
+The current thread is now ready to sandbox itself with the ruleset.
+.PP
+.in +4n
+.EX
+if (landlock_restrict_self(ruleset_fd, 0)) {
+ perror("Failed to enforce ruleset");
+ close(ruleset_fd);
+ exit(EXIT_FAILURE);
+}
+close(ruleset_fd);
+.EE
+.in
+.PP
+If the
+.BR landlock_restrict_self (2)
+system call succeeds, the current thread is now restricted and
+this policy will be enforced on all its subsequently created children as well.
+Once a thread is landlocked, there is no way to remove its security policy;
+only adding more restrictions is allowed.
+These threads are now in a new Landlock domain,
+merge of their parent one (if any) with the new ruleset.
+.PP
+Full working code can be found in
+.UR https://git.kernel.org/\:pub/\:scm/\:linux/\:kernel/\:git/\:stable/\:linux.git/\:tree/\:samples/\:landlock/\:sandboxer.c
+.UE
+.SH SEE ALSO
+.BR landlock_create_ruleset (2),
+.BR landlock_add_rule (2),
+.BR landlock_restrict_self (2)
+.PP
+.UR https://landlock.io/
+.UE
diff --git a/man7/latin1.7 b/man7/latin1.7
new file mode 100644
index 0000000..1969dfb
--- /dev/null
+++ b/man7/latin1.7
@@ -0,0 +1 @@
+.so man7/iso_8859-1.7
diff --git a/man7/latin10.7 b/man7/latin10.7
new file mode 100644
index 0000000..b9c8e91
--- /dev/null
+++ b/man7/latin10.7
@@ -0,0 +1 @@
+.so man7/iso_8859-16.7
diff --git a/man7/latin2.7 b/man7/latin2.7
new file mode 100644
index 0000000..da36668
--- /dev/null
+++ b/man7/latin2.7
@@ -0,0 +1 @@
+.so man7/iso_8859-2.7
diff --git a/man7/latin3.7 b/man7/latin3.7
new file mode 100644
index 0000000..75e42ce
--- /dev/null
+++ b/man7/latin3.7
@@ -0,0 +1 @@
+.so man7/iso_8859-3.7
diff --git a/man7/latin4.7 b/man7/latin4.7
new file mode 100644
index 0000000..15a829e
--- /dev/null
+++ b/man7/latin4.7
@@ -0,0 +1 @@
+.so man7/iso_8859-4.7
diff --git a/man7/latin5.7 b/man7/latin5.7
new file mode 100644
index 0000000..0fcc7d4
--- /dev/null
+++ b/man7/latin5.7
@@ -0,0 +1 @@
+.so man7/iso_8859-9.7
diff --git a/man7/latin6.7 b/man7/latin6.7
new file mode 100644
index 0000000..9b4658f
--- /dev/null
+++ b/man7/latin6.7
@@ -0,0 +1 @@
+.so man7/iso_8859-10.7
diff --git a/man7/latin7.7 b/man7/latin7.7
new file mode 100644
index 0000000..8ad2335
--- /dev/null
+++ b/man7/latin7.7
@@ -0,0 +1 @@
+.so man7/iso_8859-13.7
diff --git a/man7/latin8.7 b/man7/latin8.7
new file mode 100644
index 0000000..4aa555d
--- /dev/null
+++ b/man7/latin8.7
@@ -0,0 +1 @@
+.so man7/iso_8859-14.7
diff --git a/man7/latin9.7 b/man7/latin9.7
new file mode 100644
index 0000000..a4095d7
--- /dev/null
+++ b/man7/latin9.7
@@ -0,0 +1 @@
+.so man7/iso_8859-15.7
diff --git a/man7/libc.7 b/man7/libc.7
new file mode 100644
index 0000000..7a62251
--- /dev/null
+++ b/man7/libc.7
@@ -0,0 +1,115 @@
+.\" Copyright (c) 2009 Linux Foundation, written by Michael Kerrisk
+.\" <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH libc 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+libc \- overview of standard C libraries on Linux
+.SH DESCRIPTION
+The term \[lq]libc\[rq] is commonly used as a shorthand for
+the \[lq]standard C library\[rq]
+a library of standard functions that can be used by all C programs
+(and sometimes by programs in other languages).
+Because of some history
+(see below),
+use of the term \[lq]libc\[rq]
+to refer to the standard C library is somewhat ambiguous on Linux.
+.SS glibc
+By far the most widely used C library on Linux is the
+.UR http://www.gnu.org\:/software\:/libc/
+GNU C Library
+.UE ,
+often referred to as
+.IR glibc .
+This is the C library that is nowadays used in all
+major Linux distributions.
+It is also the C library whose details are documented
+in the relevant pages of the
+.I man-pages
+project
+(primarily in Section 3 of the manual).
+Documentation of glibc is also available in the glibc manual,
+available via the command
+.IR "info libc" .
+Release 1.0 of glibc was made in September 1992.
+(There were earlier 0.x releases.)
+The next major release of glibc was 2.0,
+at the beginning of 1997.
+.PP
+The pathname
+.I /lib/libc.so.6
+(or something similar)
+is normally a symbolic link that
+points to the location of the glibc library,
+and executing this pathname will cause glibc to display
+various information about the version installed on your system.
+.SS Linux libc
+In the early to mid 1990s,
+there was for a while
+.IR "Linux libc" ,
+a fork of glibc 1.x created by Linux developers who felt that glibc
+development at the time was not sufficing for the needs of Linux.
+Often,
+this library was referred to (ambiguously) as just \[lq]libc\[rq].
+Linux libc released major versions 2, 3, 4, and 5,
+as well as many minor versions of those releases.
+Linux libc4 was the last version to use the a.out binary format,
+and the first version to provide (primitive) shared library support.
+Linux libc 5 was the first version to support the ELF binary format;
+this version used the shared library soname
+.IR libc.so.5 .
+For a while,
+Linux libc was the standard C library in many Linux distributions.
+.PP
+However,
+notwithstanding the original motivations of the Linux libc effort,
+by the time glibc 2.0 was released
+(in 1997),
+it was clearly superior to Linux libc,
+and all major Linux distributions that had been using Linux libc
+soon switched back to glibc.
+To avoid any confusion with Linux libc versions,
+glibc 2.0 and later used the shared library soname
+.IR libc.so.6 .
+.PP
+Since the switch from Linux libc to glibc 2.0 occurred long ago,
+.I man-pages
+no longer takes care to document Linux libc details.
+Nevertheless,
+the history is visible in vestiges of information
+about Linux libc that remain in a few manual pages,
+in particular,
+references to
+.I libc4
+and
+.IR libc5 .
+.SS Other C libraries
+There are various other less widely used C libraries for Linux.
+These libraries are generally smaller than glibc,
+both in terms of features and memory footprint,
+and often intended for building small binaries,
+perhaps targeted at development for embedded Linux systems.
+Among such libraries are
+.UR http://www\:.uclibc\:.org/
+.I uClibc
+.UE ,
+.UR http://www\:.fefe\:.de/\:dietlibc/
+.I dietlibc
+.UE ,
+and
+.UR http://www\:.musl\-libc\:.org/
+.I "musl libc"
+.UE .
+Details of these libraries are covered by the
+.I man-pages
+project,
+where they are known.
+.SH SEE ALSO
+.BR syscalls (2),
+.BR getauxval (3),
+.BR proc (5),
+.BR feature_test_macros (7),
+.BR man\-pages (7),
+.BR standards (7),
+.BR vdso (7)
diff --git a/man7/locale.7 b/man7/locale.7
new file mode 100644
index 0000000..49aa367
--- /dev/null
+++ b/man7/locale.7
@@ -0,0 +1,379 @@
+.\" Copyright (c) 1993 by Thomas Koenig (ig25@rz.uni-karlsruhe.de)
+.\" and Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" Modified Sat Jul 24 17:28:34 1993 by Rik Faith <faith@cs.unc.edu>
+.\" Modified Sun Jun 01 17:16:34 1997 by Jochen Hein
+.\" <jochen.hein@delphi.central.de>
+.\" Modified Thu Apr 25 00:43:19 2002 by Bruno Haible <bruno@clisp.org>
+.\"
+.TH locale 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+locale \- description of multilanguage support
+.SH SYNOPSIS
+.nf
+.B #include <locale.h>
+.fi
+.SH DESCRIPTION
+A locale is a set of language and cultural rules.
+These cover aspects
+such as language for messages, different character sets, lexicographic
+conventions, and so on.
+A program needs to be able to determine its locale
+and act accordingly to be portable to different cultures.
+.PP
+The header
+.I <locale.h>
+declares data types, functions, and macros which are useful in this
+task.
+.PP
+The functions it declares are
+.BR setlocale (3)
+to set the current locale, and
+.BR localeconv (3)
+to get information about number formatting.
+.PP
+There are different categories for locale information a program might
+need; they are declared as macros.
+Using them as the first argument
+to the
+.BR setlocale (3)
+function, it is possible to set one of these to the desired locale:
+.TP
+.BR LC_ADDRESS " (GNU extension, since glibc 2.2)"
+.\" See ISO/IEC Technical Report 14652
+Change settings that describe the formats (e.g., postal addresses)
+used to describe locations and geography-related items.
+Applications that need this information can use
+.BR nl_langinfo (3)
+to retrieve nonstandard elements, such as
+.B _NL_ADDRESS_COUNTRY_NAME
+(country name, in the language of the locale)
+and
+.B _NL_ADDRESS_LANG_NAME
+(language name, in the language of the locale),
+which return strings such as "Deutschland" and "Deutsch"
+(for German-language locales).
+(Other element names are listed in
+.IR <langinfo.h> .)
+.TP
+.B LC_COLLATE
+This category governs the collation rules used for
+sorting and regular expressions,
+including character equivalence classes and
+multicharacter collating elements.
+This locale category changes the behavior of the functions
+.BR strcoll (3)
+and
+.BR strxfrm (3),
+which are used to compare strings in the local alphabet.
+For example,
+the German sharp s is sorted as "ss".
+.TP
+.B LC_CTYPE
+This category determines the interpretation of byte sequences as characters
+(e.g., single versus multibyte characters), character classifications
+(e.g., alphabetic or digit), and the behavior of character classes.
+On glibc systems, this category also determines
+the character transliteration rules for
+.BR iconv (1)
+and
+.BR iconv (3).
+It changes the behavior of the character handling and
+classification functions, such as
+.BR isupper (3)
+and
+.BR toupper (3),
+and the multibyte character functions such as
+.BR mblen (3)
+or
+.BR wctomb (3).
+.TP
+.BR LC_IDENTIFICATION " (GNU extension, since glibc 2.2)"
+.\" See ISO/IEC Technical Report 14652
+Change settings that relate to the metadata for the locale.
+Applications that need this information can use
+.BR nl_langinfo (3)
+to retrieve nonstandard elements, such as
+.B _NL_IDENTIFICATION_TITLE
+(title of this locale document)
+and
+.B _NL_IDENTIFICATION_TERRITORY
+(geographical territory to which this locale document applies),
+which might return strings such as "English locale for the USA"
+and "USA".
+(Other element names are listed in
+.IR <langinfo.h> .)
+.TP
+.B LC_MONETARY
+This category determines the formatting used for
+monetary-related numeric values.
+This changes the information returned by
+.BR localeconv (3),
+which describes the way numbers are usually printed, with details such
+as decimal point versus decimal comma.
+This information is internally
+used by the function
+.BR strfmon (3).
+.TP
+.B LC_MESSAGES
+This category affects the language in which messages are displayed
+and what an affirmative or negative answer looks like.
+The GNU C library contains the
+.BR gettext (3),
+.BR ngettext (3),
+and
+.BR rpmatch (3)
+functions to ease the use of this information.
+The GNU gettext family of
+functions also obey the environment variable
+.B LANGUAGE
+(containing a colon-separated list of locales)
+if the category is set to a valid locale other than
+.BR """C""" .
+This category also affects the behavior of
+.BR catopen (3).
+.TP
+.BR LC_MEASUREMENT " (GNU extension, since glibc 2.2)"
+Change the settings relating to the measurement system in the locale
+(i.e., metric versus US customary units).
+Applications can use
+.BR nl_langinfo (3)
+to retrieve the nonstandard
+.B _NL_MEASUREMENT_MEASUREMENT
+element, which returns a pointer to a character
+that has the value 1 (metric) or 2 (US customary units).
+.TP
+.BR LC_NAME " (GNU extension, since glibc 2.2)"
+.\" See ISO/IEC Technical Report 14652
+Change settings that describe the formats used to address persons.
+Applications that need this information can use
+.BR nl_langinfo (3)
+to retrieve nonstandard elements, such as
+.B _NL_NAME_NAME_MR
+(general salutation for men)
+and
+.B _NL_NAME_NAME_MS
+(general salutation for women)
+elements, which return strings such as "Herr" and "Frau"
+(for German-language locales).
+(Other element names are listed in
+.IR <langinfo.h> .)
+.TP
+.B LC_NUMERIC
+This category determines the formatting rules used for nonmonetary
+numeric values\[em]for example,
+the thousands separator and the radix character
+(a period in most English-speaking countries,
+but a comma in many other regions).
+It affects functions such as
+.BR printf (3),
+.BR scanf (3),
+and
+.BR strtod (3).
+This information can also be read with the
+.BR localeconv (3)
+function.
+.TP
+.BR LC_PAPER " (GNU extension, since glibc 2.2)"
+.\" See ISO/IEC Technical Report 14652
+Change the settings relating to the dimensions of the standard paper size
+(e.g., US letter versus A4).
+Applications that need the dimensions can obtain them by using
+.BR nl_langinfo (3)
+to retrieve the nonstandard
+.B _NL_PAPER_WIDTH
+and
+.B _NL_PAPER_HEIGHT
+elements, which return
+.I int
+values specifying the dimensions in millimeters.
+.TP
+.BR LC_TELEPHONE " (GNU extension, since glibc 2.2)"
+.\" See ISO/IEC Technical Report 14652
+Change settings that describe the formats to be used with telephone services.
+Applications that need this information can use
+.BR nl_langinfo (3)
+to retrieve nonstandard elements, such as
+.B _NL_TELEPHONE_INT_PREFIX
+(international prefix used to call numbers in this locale),
+which returns a string such as "49" (for Germany).
+(Other element names are listed in
+.IR <langinfo.h> .)
+.TP
+.B LC_TIME
+This category governs the formatting used for date and time values.
+For example, most of Europe uses a 24-hour clock versus the
+12-hour clock used in the United States.
+The setting of this category affects the behavior of functions such as
+.BR strftime (3)
+and
+.BR strptime (3).
+.TP
+.B LC_ALL
+All of the above.
+.PP
+If the second argument to
+.BR setlocale (3)
+is an empty string,
+.IR \[dq]\[dq] ,
+for the default locale, it is determined using the following steps:
+.IP (1) 5
+If there is a non-null environment variable
+.BR LC_ALL ,
+the value of
+.B LC_ALL
+is used.
+.IP (2)
+If an environment variable with the same name as one of the categories
+above exists and is non-null, its value is used for that category.
+.IP (3)
+If there is a non-null environment variable
+.BR LANG ,
+the value of
+.B LANG
+is used.
+.PP
+Values about local numeric formatting is made available in a
+.I struct lconv
+returned by the
+.BR localeconv (3)
+function, which has the following declaration:
+.PP
+.in +4n
+.EX
+struct lconv {
+\&
+ /* Numeric (nonmonetary) information */
+\&
+ char *decimal_point; /* Radix character */
+ char *thousands_sep; /* Separator for digit groups to left
+ of radix character */
+ char *grouping; /* Each element is the number of digits in
+ a group; elements with higher indices
+ are further left. An element with value
+ CHAR_MAX means that no further grouping
+ is done. An element with value 0 means
+ that the previous element is used for
+ all groups further left. */
+\&
+ /* Remaining fields are for monetary information */
+\&
+ char *int_curr_symbol; /* First three chars are a currency
+ symbol from ISO 4217. Fourth char
+ is the separator. Fifth char
+ is \[aq]\e0\[aq]. */
+ char *currency_symbol; /* Local currency symbol */
+ char *mon_decimal_point; /* Radix character */
+ char *mon_thousands_sep; /* Like \fIthousands_sep\fP above */
+ char *mon_grouping; /* Like \fIgrouping\fP above */
+ char *positive_sign; /* Sign for positive values */
+ char *negative_sign; /* Sign for negative values */
+ char int_frac_digits; /* International fractional digits */
+ char frac_digits; /* Local fractional digits */
+ char p_cs_precedes; /* 1 if currency_symbol precedes a
+ positive value, 0 if succeeds */
+ char p_sep_by_space; /* 1 if a space separates
+ currency_symbol from a positive
+ value */
+ char n_cs_precedes; /* 1 if currency_symbol precedes a
+ negative value, 0 if succeeds */
+ char n_sep_by_space; /* 1 if a space separates
+ currency_symbol from a negative
+ value */
+ /* Positive and negative sign positions:
+ 0 Parentheses surround the quantity and currency_symbol.
+ 1 The sign string precedes the quantity and currency_symbol.
+ 2 The sign string succeeds the quantity and currency_symbol.
+ 3 The sign string immediately precedes the currency_symbol.
+ 4 The sign string immediately succeeds the currency_symbol. */
+ char p_sign_posn;
+ char n_sign_posn;
+};
+.EE
+.in
+.SS POSIX.1-2008 extensions to the locale API
+POSIX.1-2008 standardized a number of extensions to the locale API,
+based on implementations that first appeared in glibc 2.3.
+These extensions are designed to address the problem that
+the traditional locale APIs do not mix well with multithreaded applications
+and with applications that must deal with multiple locales.
+.PP
+The extensions take the form of new functions for creating and
+manipulating locale objects
+.RB ( newlocale (3),
+.BR freelocale (3),
+.BR duplocale (3),
+and
+.BR uselocale (3))
+and various new library functions with the suffix "_l" (e.g.,
+.BR toupper_l (3))
+that extend the traditional locale-dependent APIs (e.g.,
+.BR toupper (3))
+to allow the specification of a locale object that should apply when
+executing the function.
+.SH ENVIRONMENT
+The following environment variable is used by
+.BR newlocale (3)
+and
+.BR setlocale (3),
+and thus affects all unprivileged localized programs:
+.TP
+.B LOCPATH
+A list of pathnames, separated by colons (\[aq]:\[aq]),
+that should be used to find locale data.
+If this variable is set,
+only the individual compiled locale data files from
+.B LOCPATH
+and the system default locale data path are used;
+any available locale archives are not used (see
+.BR localedef (1)).
+The individual compiled locale data files are searched for under
+subdirectories which depend on the currently used locale.
+For example, when
+.I en_GB.UTF\-8
+is used for a category, the following subdirectories are searched for,
+in this order:
+.IR en_GB.UTF\-8 ,
+.IR en_GB.utf8 ,
+.IR en_GB ,
+.IR en.UTF\-8 ,
+.IR en.utf8 ,
+and
+.IR en .
+.SH FILES
+.TP
+.I /usr/lib/locale/locale\-archive
+Usual default locale archive location.
+.TP
+.I /usr/lib/locale
+Usual default path for compiled individual locale files.
+.SH STANDARDS
+POSIX.1-2001.
+.\"
+.\" The GNU gettext functions are specified in LI18NUX2000.
+.SH SEE ALSO
+.BR iconv (1),
+.BR locale (1),
+.BR localedef (1),
+.BR catopen (3),
+.BR gettext (3),
+.BR iconv (3),
+.BR localeconv (3),
+.BR mbstowcs (3),
+.BR newlocale (3),
+.BR ngettext (3),
+.BR nl_langinfo (3),
+.BR rpmatch (3),
+.BR setlocale (3),
+.BR strcoll (3),
+.BR strfmon (3),
+.BR strftime (3),
+.BR strxfrm (3),
+.BR uselocale (3),
+.BR wcstombs (3),
+.BR locale (5),
+.BR charsets (7),
+.BR unicode (7),
+.BR utf\-8 (7)
diff --git a/man7/mailaddr.7 b/man7/mailaddr.7
new file mode 100644
index 0000000..8218daa
--- /dev/null
+++ b/man7/mailaddr.7
@@ -0,0 +1,134 @@
+.\" Copyright (c) 1983, 1987 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" @(#)mailaddr.7 6.5 (Berkeley) 2/14/89
+.\"
+.\" Extensively rewritten by Arnt Gulbrandsen <agulbra@troll.no>. My
+.\" changes are placed under the same copyright as the original BSD page.
+.\"
+.\" Adjusted by Arnt Gulbrandsen <arnt@gulbrandsen.priv.no> in 2004 to
+.\" account for changes since 1995. Route-addrs are now even less
+.\" common, etc. Some minor wording improvements. Same copyright.
+.\"
+.\" %%%LICENSE_START(PERMISSIVE_MISC)
+.\" Redistribution and use in source and binary forms are permitted
+.\" provided that the above copyright notice and this paragraph are
+.\" duplicated in all such forms and that any documentation,
+.\" advertising materials, and other materials related to such
+.\" distribution and use acknowledge that the software was developed
+.\" by the University of California, Berkeley. The name of the
+.\" University may not be used to endorse or promote products derived
+.\" from this software without specific prior written permission.
+.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+.\" %%%LICENSE_END
+.\"
+.TH mailaddr 7 2023-02-05 "Linux man-pages 6.05.01"
+.UC 5
+.SH NAME
+mailaddr \- mail addressing description
+.SH DESCRIPTION
+.nh
+This manual page gives a brief introduction to SMTP mail addresses,
+as used on the Internet.
+These addresses are in the general format
+.PP
+.in +4n
+.EX
+user@domain
+.EE
+.in
+.PP
+where a domain is a hierarchical dot-separated list of subdomains.
+These examples are valid forms of the same address:
+.PP
+.in +4n
+.EX
+john.doe@monet.example.com
+John Doe <john.doe@monet.example.com>
+john.doe@monet.example.com (John Doe)
+.EE
+.in
+.PP
+The domain part ("monet.example.com") is a mail-accepting domain.
+It can be a host and in the past it usually was, but it doesn't have to be.
+The domain part is not case sensitive.
+.PP
+The local part ("john.doe") is often a username,
+but its meaning is defined by the local software.
+Sometimes it is case sensitive,
+although that is unusual.
+If you see a local-part that looks like garbage,
+it is usually because of a gateway between an internal e-mail
+system and the net, here are some examples:
+.PP
+.in +4n
+.EX
+"surname/admd=telemail/c=us/o=hp/prmd=hp"@some.where
+USER%SOMETHING@some.where
+machine!machine!name@some.where
+I2461572@some.where
+.EE
+.in
+.PP
+(These are, respectively, an X.400 gateway, a gateway to an arbitrary
+internal mail system that lacks proper internet support, an UUCP
+gateway, and the last one is just boring username policy.)
+.PP
+The real-name part ("John Doe") can either be placed before
+<>, or in () at the end.
+(Strictly speaking the two aren't the same,
+but the difference is beyond the scope of this page.)
+The name may have to be quoted using "", for example, if it contains ".":
+.PP
+.in +4n
+.EX
+"John Q. Doe" <john.doe@monet.example.com>
+.EE
+.in
+.SS Abbreviation
+Some mail systems let users abbreviate the domain name.
+For instance,
+users at example.com may get away with "john.doe@monet" to
+send mail to John Doe.
+.I This behavior is deprecated.
+Sometimes it works, but you should not depend on it.
+.SS Route-addrs
+In the past, sometimes one had to route a message through
+several hosts to get it to its final destination.
+Addresses which show these relays are termed "route-addrs".
+These use the syntax:
+.PP
+.in +4n
+.EX
+<@hosta,@hostb:user@hostc>
+.EE
+.in
+.PP
+This specifies that the message should be sent to hosta,
+from there to hostb, and finally to hostc.
+Many hosts disregard route-addrs and send directly to hostc.
+.PP
+Route-addrs are very unusual now.
+They occur sometimes in old mail archives.
+It is generally possible to ignore all but the "user@hostc"
+part of the address to determine the actual address.
+.SS Postmaster
+Every site is required to have a user or user alias designated
+"postmaster" to which problems with the mail system may be
+addressed.
+The "postmaster" address is not case sensitive.
+.SH FILES
+.I /etc/aliases
+.br
+.I \[ti]/.forward
+.SH SEE ALSO
+.BR mail (1),
+.BR aliases (5),
+.BR forward (5),
+.BR sendmail (8)
+.PP
+.UR http://www.ietf.org\:/rfc\:/rfc5322.txt
+IETF RFC\ 5322
+.UE
diff --git a/man7/man-pages.7 b/man7/man-pages.7
new file mode 100644
index 0000000..6d8b0ea
--- /dev/null
+++ b/man7/man-pages.7
@@ -0,0 +1,1227 @@
+'\" t
+.\" (C) Copyright 1992-1999 Rickard E. Faith and David A. Wheeler
+.\" (faith@cs.unc.edu and dwheeler@ida.org)
+.\" and (C) Copyright 2007 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" 2007-05-30 created by mtk, using text from old man.7 plus
+.\" rewrites and additional text.
+.\"
+.TH man-pages 7 2023-03-30 "Linux man-pages 6.05.01"
+.SH NAME
+man-pages \- conventions for writing Linux man pages
+.SH SYNOPSIS
+.B man
+.RI [ section ]
+.I title
+.SH DESCRIPTION
+This page describes the conventions that should be employed
+when writing man pages for the Linux \fIman-pages\fP project,
+which documents the user-space API provided by the Linux kernel
+and the GNU C library.
+The project thus provides most of the pages in Section 2,
+many of the pages that appear in Sections 3, 4, and 7,
+and a few of the pages that appear in Sections 1, 5, and 8
+of the man pages on a Linux system.
+The conventions described on this page may also be useful
+for authors writing man pages for other projects.
+.SS Sections of the manual pages
+The manual Sections are traditionally defined as follows:
+.TP
+.B 1 User commands (Programs)
+Commands that can be executed by the user from within
+a shell.
+.TP
+.B 2 System calls
+Functions which wrap operations performed by the kernel.
+.TP
+.B 3 Library calls
+All library functions excluding the system call wrappers
+(Most of the
+.I libc
+functions).
+.TP
+.B 4 Special files (devices)
+Files found in
+.I /dev
+which allow to access to devices through the kernel.
+.TP
+.B 5 File formats and configuration files
+Describes various human-readable file formats and configuration files.
+.TP
+.B 6 Games
+Games and funny little programs available on the system.
+.TP
+.B 7 Overview, conventions, and miscellaneous
+Overviews or descriptions of various topics, conventions, and protocols,
+character set standards, the standard filesystem layout, and miscellaneous
+other things.
+.TP
+.B 8 System management commands
+Commands like
+.BR mount (8),
+many of which only root can execute.
+.\" .TP
+.\" .B 9 Kernel routines
+.\" This is an obsolete manual section.
+.\" Once it was thought a good idea to document the Linux kernel here,
+.\" but in fact very little has been documented, and the documentation
+.\" that exists is outdated already.
+.\" There are better sources of
+.\" information for kernel developers.
+.SS Macro package
+New manual pages should be marked up using the
+.B groff an.tmac
+package described in
+.BR man (7).
+This choice is mainly for consistency: the vast majority of
+existing Linux manual pages are marked up using these macros.
+.SS Conventions for source file layout
+Please limit source code line length to no more than about 75 characters
+wherever possible.
+This helps avoid line-wrapping in some mail clients when patches are
+submitted inline.
+.SS Title line
+The first command in a man page should be a
+.B TH
+command:
+.PP
+.RS
+.B \&.TH
+.I "title section date source manual-section"
+.RE
+.PP
+The arguments of the command are as follows:
+.TP
+.I title
+The title of the man page, written in all caps (e.g.,
+.IR MAN-PAGES ).
+.TP
+.I section
+The section number in which the man page should be placed (e.g.,
+.IR 7 ).
+.TP
+.I date
+The date of the last nontrivial change that was made to the man page.
+(Within the
+.I man-pages
+project, the necessary updates to these timestamps are handled
+automatically by scripts, so there is no need to manually update
+them as part of a patch.)
+Dates should be written in the form YYYY-MM-DD.
+.TP
+.I source
+The name and version of the project that provides the manual page
+(not necessarily the package that provides the functionality).
+.TP
+.I manual-section
+Normally, this should be empty,
+since the default value will be good.
+.\"
+.SS Sections within a manual page
+The list below shows conventional or suggested sections.
+Most manual pages should include at least the
+.B highlighted
+sections.
+Arrange a new manual page so that sections
+are placed in the order shown in the list.
+.PP
+.RS
+.TS
+l l.
+\fBNAME\fP
+LIBRARY [Normally only in Sections 2, 3]
+\fBSYNOPSIS\fP
+CONFIGURATION [Normally only in Section 4]
+\fBDESCRIPTION\fP
+OPTIONS [Normally only in Sections 1, 8]
+EXIT STATUS [Normally only in Sections 1, 8]
+RETURN VALUE [Normally only in Sections 2, 3]
+.\" May 07: Few current man pages have an ERROR HANDLING section,,,
+.\" ERROR HANDLING,
+ERRORS [Typically only in Sections 2, 3]
+.\" May 07: Almost no current man pages have a USAGE section,,,
+.\" USAGE,
+.\" DIAGNOSTICS,
+.\" May 07: Almost no current man pages have a SECURITY section,,,
+.\" SECURITY,
+ENVIRONMENT
+FILES
+ATTRIBUTES [Normally only in Sections 2, 3]
+VERSIONS [Normally only in Sections 2, 3]
+STANDARDS
+HISTORY
+NOTES
+CAVEATS
+BUGS
+EXAMPLES
+.\" AUTHORS sections are discouraged
+AUTHORS [Discouraged]
+REPORTING BUGS [Not used in man-pages]
+COPYRIGHT [Not used in man-pages]
+\fBSEE ALSO\fP
+.TE
+.RE
+.PP
+.IR "Where a traditional heading would apply" ", " "please use it" ;
+this kind of consistency can make the information easier to understand.
+If you must, you can create your own
+headings if they make things easier to understand (this can
+be especially useful for pages in Sections 4 and 5).
+However, before doing this, consider whether you could use the
+traditional headings, with some subsections (\fI.SS\fP) within
+those sections.
+.PP
+The following list elaborates on the contents of each of
+the above sections.
+.TP
+.B NAME
+The name of this manual page.
+.IP
+See
+.BR man (7)
+for important details of the line(s) that should follow the
+\fB.SH NAME\fP command.
+All words in this line (including the word immediately
+following the "\e\-") should be in lowercase,
+except where English or technical terminological convention
+dictates otherwise.
+.TP
+.B LIBRARY
+The library providing a symbol.
+.IP
+It shows the common name of the library,
+and in parentheses,
+the name of the library file
+and, if needed, the linker flag needed to link a program against it:
+.RI ( libfoo "[, " \-lfoo ]).
+.TP
+.B SYNOPSIS
+A brief summary of the command or function's interface.
+.IP
+For commands, this shows the syntax of the command and its arguments
+(including options);
+boldface is used for as-is text and italics are used to
+indicate replaceable arguments.
+Brackets ([]) surround optional arguments, vertical bars (|)
+separate choices, and ellipses (\&...) can be repeated.
+For functions, it shows any required data declarations or
+.B #include
+directives, followed by the function declaration.
+.IP
+Where a feature test macro must be defined in order to obtain
+the declaration of a function (or a variable) from a header file,
+then the SYNOPSIS should indicate this, as described in
+.BR feature_test_macros (7).
+.\" FIXME . Say something here about compiler options
+.TP
+.B CONFIGURATION
+Configuration details for a device.
+.IP
+This section normally appears only in Section 4 pages.
+.TP
+.B DESCRIPTION
+An explanation of what the program, function, or format does.
+.IP
+Discuss how it interacts with files and standard input, and what it
+produces on standard output or standard error.
+Omit internals and implementation details unless they're critical for
+understanding the interface.
+Describe the usual case;
+for information on command-line options of a program use the
+.B OPTIONS
+section.
+.\" If there is some kind of input grammar or complex set of subcommands,
+.\" consider describing them in a separate
+.\" .B USAGE
+.\" section (and just place an overview in the
+.\" .B DESCRIPTION
+.\" section).
+.IP
+When describing new behavior or new flags for
+a system call or library function,
+be careful to note the kernel or C library version
+that introduced the change.
+The preferred method of noting this information for flags is as part of a
+.B .TP
+list, in the following form (here, for a new system call flag):
+.RS 16
+.TP
+.BR XYZ_FLAG " (since Linux 3.7)"
+Description of flag...
+.RE
+.IP
+Including version information is especially useful to users
+who are constrained to using older kernel or C library versions
+(which is typical in embedded systems, for example).
+.TP
+.B OPTIONS
+A description of the command-line options accepted by a
+program and how they change its behavior.
+.IP
+This section should appear only for Section 1 and 8 manual pages.
+.\" .TP
+.\" .B USAGE
+.\" describes the grammar of any sublanguage this implements.
+.TP
+.B EXIT STATUS
+A list of the possible exit status values of a program and
+the conditions that cause these values to be returned.
+.IP
+This section should appear only for Section 1 and 8 manual pages.
+.TP
+.B RETURN VALUE
+For Section 2 and 3 pages, this section gives a
+list of the values the library routine will return to the caller
+and the conditions that cause these values to be returned.
+.TP
+.B ERRORS
+For Section 2 and 3 manual pages, this is a list of the
+values that may be placed in
+.I errno
+in the event of an error, along with information about the cause
+of the errors.
+.IP
+Where several different conditions produce the same error,
+the preferred approach is to create separate list entries
+(with duplicate error names) for each of the conditions.
+This makes the separate conditions clear, may make the list easier to read,
+and allows metainformation
+(e.g., kernel version number where the condition first became applicable)
+to be more easily marked for each condition.
+.IP
+.IR "The error list should be in alphabetical order" .
+.TP
+.B ENVIRONMENT
+A list of all environment variables that affect the program or function
+and how they affect it.
+.TP
+.B FILES
+A list of the files the program or function uses, such as
+configuration files, startup files,
+and files the program directly operates on.
+.IP
+Give the full pathname of these files, and use the installation
+process to modify the directory part to match user preferences.
+For many programs, the default installation location is in
+.IR /usr/local ,
+so your base manual page should use
+.I /usr/local
+as the base.
+.\" May 07: Almost no current man pages have a DIAGNOSTICS section;
+.\" "RETURN VALUE" or "EXIT STATUS" is preferred.
+.\" .TP
+.\" .B DIAGNOSTICS
+.\" gives an overview of the most common error messages and how to
+.\" cope with them.
+.\" You don't need to explain system error messages
+.\" or fatal signals that can appear during execution of any program
+.\" unless they're special in some way to the program.
+.\"
+.\" May 07: Almost no current man pages have a SECURITY section.
+.\".TP
+.\".B SECURITY
+.\"discusses security issues and implications.
+.\"Warn about configurations or environments that should be avoided,
+.\"commands that may have security implications, and so on, especially
+.\"if they aren't obvious.
+.\"Discussing security in a separate section isn't necessary;
+.\"if it's easier to understand, place security information in the
+.\"other sections (such as the
+.\" .B DESCRIPTION
+.\" or
+.\" .B USAGE
+.\" section).
+.\" However, please include security information somewhere!
+.TP
+.B ATTRIBUTES
+A summary of various attributes of the function(s) documented on this page.
+See
+.BR attributes (7)
+for further details.
+.TP
+.B VERSIONS
+A summary of systems where the API performs differently,
+or where there's a similar API.
+.TP
+.B STANDARDS
+A description of any standards or conventions that relate to the function
+or command described by the manual page.
+.IP
+The preferred terms to use for the various standards are listed as
+headings in
+.BR standards (7).
+.IP
+This section should note the current standards to which the API conforms to.
+.IP
+If the API is not governed by any standards but commonly
+exists on other systems, note them.
+If the call is Linux-specific or GNU-specific, note this.
+If it's available in the BSDs, note that.
+.IP
+If this section consists of just a list of standards
+(which it commonly does),
+terminate the list with a period (\[aq].\[aq]).
+.TP
+.B HISTORY
+A brief summary of the Linux kernel or glibc versions where a
+system call or library function appeared,
+or changed significantly in its operation.
+.IP
+As a general rule, every new interface should
+include a HISTORY section in its manual page.
+Unfortunately,
+many existing manual pages don't include this information
+(since there was no policy to do so when they were written).
+Patches to remedy this are welcome,
+but, from the perspective of programmers writing new code,
+this information probably matters only in the case of kernel
+interfaces that have been added in Linux 2.4 or later
+(i.e., changes since Linux 2.2),
+and library functions that have been added to glibc since glibc 2.1
+(i.e., changes since glibc 2.0).
+.IP
+The
+.BR syscalls (2)
+manual page also provides information about kernel versions
+in which various system calls first appeared.
+.PP
+Old versions of standards should be mentioned here,
+rather than in STANDARDS,
+for example,
+SUS, SUSv2, and XPG, or the SVr4 and 4.xBSD implementation standards.
+.TP
+.B NOTES
+Miscellaneous notes.
+.IP
+For Section 2 and 3 man pages you may find it useful to include
+subsections (\fBSS\fP) named \fILinux Notes\fP and \fIglibc Notes\fP.
+.IP
+In Section 2, use the heading
+.I "C library/kernel differences"
+to mark off notes that describe the differences (if any) between
+the C library wrapper function for a system call and
+the raw system call interface provided by the kernel.
+.TP
+.B CAVEATS
+Warnings about typical user misuse of an API,
+that don't constitute an API bug or design defect.
+.TP
+.B BUGS
+A list of limitations, known defects or inconveniences,
+and other questionable activities.
+.TP
+.B EXAMPLES
+One or more examples demonstrating how this function, file, or
+command is used.
+.IP
+For details on writing example programs,
+see \fIExample programs\fP below.
+.TP
+.B AUTHORS
+A list of authors of the documentation or program.
+.IP
+\fBUse of an AUTHORS section is strongly discouraged\fP.
+Generally, it is better not to clutter every page with a list
+of (over time potentially numerous) authors;
+if you write or significantly amend a page,
+add a copyright notice as a comment in the source file.
+If you are the author of a device driver and want to include
+an address for reporting bugs, place this under the BUGS section.
+.TP
+.B REPORTING BUGS
+The
+.I man-pages
+project doesn't use a REPORTING BUGS section in manual pages.
+Information on reporting bugs is instead supplied in the
+script-generated COLOPHON section.
+However, various projects do use a REPORTING BUGS section.
+It is recommended to place it near the foot of the page.
+.TP
+.B COPYRIGHT
+The
+.I man-pages
+project doesn't use a COPYRIGHT section in manual pages.
+Copyright information is instead maintained in the page source.
+In pages where this section is present,
+it is recommended to place it near the foot of the page, just above SEE ALSO.
+.TP
+.B SEE ALSO
+A comma-separated list of related man pages, possibly followed by
+other related pages or documents.
+.IP
+The list should be ordered by section number and
+then alphabetically by name.
+Do not terminate this list with a period.
+.IP
+Where the SEE ALSO list contains many long manual page names,
+to improve the visual result of the output, it may be useful to employ the
+.I .ad l
+(don't right justify)
+and
+.I .nh
+(don't hyphenate)
+directives.
+Hyphenation of individual page names can be prevented
+by preceding words with the string "\e%".
+.IP
+Given the distributed, autonomous nature of FOSS projects
+and their documentation, it is sometimes necessary\[em]and in many cases
+desirable\[em]that the SEE ALSO section includes references to
+manual pages provided by other projects.
+.SH FORMATTING AND WORDING CONVENTIONS
+The following subsections note some details for preferred formatting and
+wording conventions in various sections of the pages in the
+.I man-pages
+project.
+.SS SYNOPSIS
+Wrap the function prototype(s) in a
+.IR .nf / .fi
+pair to prevent filling.
+.PP
+In general, where more than one function prototype is shown in the SYNOPSIS,
+the prototypes should
+.I not
+be separated by blank lines.
+However, blank lines (achieved using
+.IR .PP )
+may be added in the following cases:
+.IP \[bu] 3
+to separate long lists of function prototypes into related groups
+(see for example
+.BR list (3));
+.IP \[bu]
+in other cases that may improve readability.
+.PP
+In the SYNOPSIS, a long function prototype may need to be
+continued over to the next line.
+The continuation line is indented according to the following rules:
+.IP (1) 5
+If there is a single such prototype that needs to be continued,
+then align the continuation line so that when the page is
+rendered on a fixed-width font device (e.g., on an xterm) the
+continuation line starts just below the start of the argument
+list in the line above.
+(Exception: the indentation may be
+adjusted if necessary to prevent a very long continuation line
+or a further continuation line where the function prototype is
+very long.)
+As an example:
+.IP
+.in +4n
+.nf
+.BI "int tcsetattr(int " fd ", int " optional_actions ,
+.BI " const struct termios *" termios_p );
+.fi
+.in
+.IP (2)
+But, where multiple functions in the SYNOPSIS require
+continuation lines, and the function names have different
+lengths, then align all continuation lines to start in the
+same column.
+This provides a nicer rendering in PDF output
+(because the SYNOPSIS uses a variable width font where
+spaces render narrower than most characters).
+As an example:
+.IP
+.in +4n
+.nf
+.BI "int getopt(int " argc ", char * const " argv[] ,
+.BI " const char *" optstring );
+.BI "int getopt_long(int " argc ", char * const " argv[] ,
+.BI " const char *" optstring ,
+.BI " const struct option *" longopts ", int *" longindex );
+.fi
+.in
+.SS RETURN VALUE
+The preferred wording to describe how
+.I errno
+is set is
+.RI \[dq] errno
+is set to indicate the error"
+or similar.
+.\" Before man-pages 5.11, many different wordings were used, which
+.\" was confusing, and potentially made scripted edits more difficult.
+This wording is consistent with the wording used in both POSIX.1 and FreeBSD.
+.SS ATTRIBUTES
+.\" See man-pages commit c466875ecd64ed3d3cd3e578406851b7dfb397bf
+Note the following:
+.IP \[bu] 3
+Wrap the table in this section in a
+.IR ".ad\ l" / .ad
+pair to disable text filling and a
+.IR .nh / .hy
+pair to disable hyphenation.
+.IP \[bu]
+Ensure that the table occupies the full page width through the use of an
+.I lbx
+description for one of the columns
+(usually the first column,
+though in some cases the last column if it contains a lot of text).
+.IP \[bu]
+Make free use of
+.IR T{ / T}
+macro pairs to allow table cells to be broken over multiple lines
+(also bearing in mind that pages may sometimes be rendered to a
+width of less than 80 columns).
+.PP
+For examples of all of the above, see the source code of various pages.
+.SH STYLE GUIDE
+The following subsections describe the preferred style for the
+.I man-pages
+project.
+For details not covered below, the Chicago Manual of Style
+is usually a good source;
+try also grepping for preexisting usage in the project source tree.
+.SS Use of gender-neutral language
+As far as possible, use gender-neutral language in the text of man
+pages.
+Use of "they" ("them", "themself", "their") as a gender-neutral singular
+pronoun is acceptable.
+.\"
+.SS Formatting conventions for manual pages describing commands
+For manual pages that describe a command (typically in Sections 1 and 8),
+the arguments are always specified using italics,
+.IR "even in the SYNOPSIS section" .
+.PP
+The name of the command, and its options, should
+always be formatted in bold.
+.\"
+.SS Formatting conventions for manual pages describing functions
+For manual pages that describe functions (typically in Sections 2 and 3),
+the arguments are always specified using italics,
+.IR "even in the SYNOPSIS section" ,
+where the rest of the function is specified in bold:
+.PP
+.BI " int myfunction(int " argc ", char **" argv );
+.PP
+Variable names should, like argument names, be specified in italics.
+.PP
+Any reference to the subject of the current manual page
+should be written with the name in bold followed by
+a pair of parentheses in Roman (normal) font.
+For example, in the
+.BR fcntl (2)
+man page, references to the subject of the page would be written as:
+.BR fcntl ().
+The preferred way to write this in the source file is:
+.PP
+.EX
+ .BR fcntl ()
+.EE
+.PP
+(Using this format, rather than the use of "\efB...\efP()"
+makes it easier to write tools that parse man page source files.)
+.\"
+.SS Use semantic newlines
+In the source of a manual page,
+new sentences should be started on new lines,
+long sentences should be split into lines at clause breaks
+(commas, semicolons, colons, and so on),
+and long clauses should be split at phrase boundaries.
+This convention, sometimes known as "semantic newlines",
+makes it easier to see the effect of patches,
+which often operate at the level of
+individual sentences, clauses, or phrases.
+.\"
+.SS Lists
+There are different kinds of lists:
+.TP
+Tagged paragraphs
+These are used for a list of tags and their descriptions.
+When the tags are constants (either macros or numbers)
+they are in bold.
+Use the
+.B .TP
+macro.
+.IP
+An example is this "Tagged paragraphs" subsection is itself.
+.TP
+Ordered lists
+Elements are preceded by a number in parentheses (1), (2).
+These represent a set of steps that have an order.
+.IP
+When there are substeps,
+they will be numbered like (4.2).
+.TP
+Positional lists
+Elements are preceded by a number (index) in square brackets [4], [5].
+These represent fields in a set.
+The first index will be:
+.RS
+.TP
+.B 0
+When it represents fields of a C data structure,
+to be consistent with arrays.
+.PD 0
+.TP
+.B 1
+When it represents fields of a file,
+to be consistent with tools like
+.BR cut (1).
+.PD
+.RE
+.TP
+Alternatives list
+Elements are preceded by a letter in parentheses (a), (b).
+These represent a set of (normally) exclusive alternatives.
+.TP
+Bullet lists
+Elements are preceded by bullet symbols
+.RB ( \e[bu] ).
+Anything that doesn't fit elsewhere is
+usually covered by this type of list.
+.TP
+Numbered notes
+Not really a list,
+but the syntax is identical to "positional lists".
+.PP
+There should always be exactly
+2 spaces between the list symbol and the elements.
+This doesn't apply to "tagged paragraphs",
+which use the default indentation rules.
+.\"
+.SS Formatting conventions (general)
+Paragraphs should be separated by suitable markers (usually either
+.I .PP
+or
+.IR .IP ).
+Do
+.I not
+separate paragraphs using blank lines, as this results in poor rendering
+in some output formats (such as PostScript and PDF).
+.PP
+Filenames (whether pathnames, or references to header files)
+are always in italics (e.g.,
+.IR <stdio.h> ),
+except in the SYNOPSIS section, where included files are in bold (e.g.,
+.BR "#include <stdio.h>" ).
+When referring to a standard header file include,
+specify the header file surrounded by angle brackets,
+in the usual C way (e.g.,
+.IR <stdio.h> ).
+.PP
+Special macros, which are usually in uppercase, are in bold (e.g.,
+.BR MAXINT ).
+Exception: don't boldface NULL.
+.PP
+When enumerating a list of error codes, the codes are in bold (this list
+usually uses the
+.B \&.TP
+macro).
+.PP
+Complete commands should, if long,
+be written as an indented line on their own,
+with a blank line before and after the command, for example
+.PP
+.in +4n
+.EX
+man 7 man\-pages
+.EE
+.in
+.PP
+If the command is short, then it can be included inline in the text,
+in italic format, for example,
+.IR "man 7 man-pages" .
+In this case, it may be worth using nonbreaking spaces
+(\e[ti]) at suitable places in the command.
+Command options should be written in italics (e.g.,
+.IR \-l ).
+.PP
+Expressions, if not written on a separate indented line, should
+be specified in italics.
+Again, the use of nonbreaking spaces may be appropriate
+if the expression is inlined with normal text.
+.PP
+When showing example shell sessions,
+user input should be formatted in bold,
+for example
+.PP
+.in +4n
+.EX
+$ \fBdate\fP
+Thu Jul 7 13:01:27 CEST 2016
+.EE
+.in
+.PP
+Any reference to another man page
+should be written with the name in bold,
+.I always
+followed by the section number,
+formatted in Roman (normal) font, without any
+separating spaces (e.g.,
+.BR intro (2)).
+The preferred way to write this in the source file is:
+.PP
+.EX
+ .BR intro (2)
+.EE
+.PP
+(Including the section number in cross references lets tools like
+.BR man2html (1)
+create properly hyperlinked pages.)
+.PP
+Control characters should be written in bold face,
+with no quotes; for example,
+.BR \[ha]X .
+.SS Spelling
+Starting with release 2.59,
+.I man-pages
+follows American spelling conventions
+(previously, there was a random mix of British and American spellings);
+please write all new pages and patches according to these conventions.
+.PP
+Aside from the well-known spelling differences,
+there are a few other subtleties to watch for:
+.IP \[bu] 3
+American English tends to use the forms "backward", "upward", "toward",
+and so on
+rather than the British forms "backwards", "upwards", "towards", and so on.
+.IP \[bu]
+Opinions are divided on "acknowledgement" vs "acknowledgment".
+The latter is predominant, but not universal usage in American English.
+POSIX and the BSD license use the former spelling.
+In the Linux man-pages project, we use "acknowledgement".
+.SS BSD version numbers
+The classical scheme for writing BSD version numbers is
+.IR x.yBSD ,
+where
+.I x.y
+is the version number (e.g., 4.2BSD).
+Avoid forms such as
+.IR "BSD 4.3" .
+.SS Capitalization
+In subsection ("SS") headings,
+capitalize the first word in the heading, but otherwise use lowercase,
+except where English usage (e.g., proper nouns) or programming
+language requirements (e.g., identifier names) dictate otherwise.
+For example:
+.PP
+.in +4n
+.EX
+\&.SS Unicode under Linux
+.EE
+.in
+.\"
+.SS Indentation of structure definitions, shell session logs, and so on
+When structure definitions, shell session logs, and so on are included
+in running text, indent them by 4 spaces (i.e., a block enclosed by
+.I ".in\ +4n"
+and
+.IR ".in" ),
+format them using the
+.I .EX
+and
+.I .EE
+macros, and surround them with suitable paragraph markers (either
+.I .PP
+or
+.IR .IP ).
+For example:
+.PP
+.in +4n
+.EX
+\&.PP
+\&.in +4n
+\&.EX
+int
+main(int argc, char *argv[])
+{
+ return 0;
+}
+\&.EE
+\&.in
+\&.PP
+.EE
+.in
+.SS Preferred terms
+The following table lists some preferred terms to use in man pages,
+mainly to ensure consistency across pages.
+.ad l
+.TS
+l l l
+---
+l l ll.
+Term Avoid using Notes
+
+bit mask bitmask
+built-in builtin
+Epoch epoch T{
+For the UNIX Epoch (00:00:00, 1 Jan 1970 UTC)
+T}
+filename file name
+filesystem file system
+hostname host name
+inode i-node
+lowercase lower case, lower-case
+nonzero non-zero
+pathname path name
+pseudoterminal pseudo-terminal
+privileged port T{
+reserved port,
+system port
+T}
+real-time T{
+realtime,
+real time
+T}
+run time runtime
+saved set-group-ID T{
+saved group ID,
+saved set-GID
+T}
+saved set-user-ID T{
+saved user ID,
+saved set-UID
+T}
+set-group-ID set-GID, setgid
+set-user-ID set-UID, setuid
+superuser T{
+super user,
+super-user
+T}
+superblock T{
+super block,
+super-block
+T}
+symbolic link symlink
+timestamp time stamp
+timezone time zone
+uppercase upper case, upper-case
+usable useable
+user space userspace
+username user name
+x86-64 x86_64 T{
+Except if referring to result of "uname\ \-m" or similar
+T}
+zeros zeroes
+.TE
+.PP
+See also the discussion
+.I Hyphenation of attributive compounds
+below.
+.SS Terms to avoid
+The following table lists some terms to avoid using in man pages,
+along with some suggested alternatives,
+mainly to ensure consistency across pages.
+.ad l
+.TS
+l l l
+---
+l l l.
+Avoid Use instead Notes
+
+32bit 32-bit T{
+same for 8-bit, 16-bit, etc.
+T}
+current process calling process T{
+A common mistake made by kernel programmers when writing man pages
+T}
+manpage T{
+man page, manual page
+T}
+minus infinity negative infinity
+non-root unprivileged user
+non-superuser unprivileged user
+nonprivileged unprivileged
+OS operating system
+plus infinity positive infinity
+pty pseudoterminal
+tty terminal
+Unices UNIX systems
+Unixes UNIX systems
+.TE
+.ad
+.\"
+.SS Trademarks
+Use the correct spelling and case for trademarks.
+The following is a list of the correct spellings of various
+relevant trademarks that are sometimes misspelled:
+.IP
+.TS
+l.
+DG/UX
+HP-UX
+UNIX
+UnixWare
+.TE
+.SS NULL, NUL, null pointer, and null byte
+A
+.I null pointer
+is a pointer that points to nothing,
+and is normally indicated by the constant
+.IR NULL .
+On the other hand,
+.I NUL
+is the
+.IR "null byte" ,
+a byte with the value 0, represented in C via the character constant
+.IR \[aq]\e0\[aq] .
+.PP
+The preferred term for the pointer is "null pointer" or simply "NULL";
+avoid writing "NULL pointer".
+.PP
+The preferred term for the byte is "null byte".
+Avoid writing "NUL", since it is too easily confused with "NULL".
+Avoid also the terms "zero byte" and "null character".
+The byte that terminates a C string should be described
+as "the terminating null byte";
+strings may be described as "null-terminated",
+but avoid the use of "NUL-terminated".
+.SS Hyperlinks
+For hyperlinks, use the
+.IR .UR / .UE
+macro pair
+(see
+.BR groff_man (7)).
+This produces proper hyperlinks that can be used in a web browser,
+when rendering a page with, say:
+.PP
+.in +4n
+.EX
+BROWSER=firefox man -H pagename
+.EE
+.in
+.SS Use of e.g., i.e., etc., a.k.a., and similar
+In general, the use of abbreviations such as "e.g.", "i.e.", "etc.",
+"cf.", and "a.k.a." should be avoided,
+in favor of suitable full wordings
+("for example", "that is", "and so on", "compare to", "also known as").
+.PP
+The only place where such abbreviations may be acceptable is in
+.I short
+parenthetical asides (e.g., like this one).
+.PP
+Always include periods in such abbreviations, as shown here.
+In addition, "e.g." and "i.e." should always be followed by a comma.
+.SS Em-dashes
+The way to write an em-dash\[em]the glyph that appears
+at either end of this subphrase\[em]in *roff is with the macro "\e[em]".
+(On an ASCII terminal, an em-dash typically renders as two hyphens,
+but in other typographical contexts it renders as a long dash.)
+Em-dashes should be written
+.I without
+surrounding spaces.
+.SS Hyphenation of attributive compounds
+Compound terms should be hyphenated when used attributively
+(i.e., to qualify a following noun). Some examples:
+.IP
+.TS
+l.
+32-bit value
+command-line argument
+floating-point number
+run-time check
+user-space function
+wide-character string
+.TE
+.SS Hyphenation with multi, non, pre, re, sub, and so on
+The general tendency in modern English is not to hyphenate
+after prefixes such as "multi", "non", "pre", "re", "sub", and so on.
+Manual pages should generally follow this rule when these prefixes are
+used in natural English constructions with simple suffixes.
+The following list gives some examples of the preferred forms:
+.IP
+.TS
+l.
+interprocess
+multithreaded
+multiprocess
+nonblocking
+nondefault
+nonempty
+noninteractive
+nonnegative
+nonportable
+nonzero
+preallocated
+precreate
+prerecorded
+reestablished
+reinitialize
+rearm
+reread
+subcomponent
+subdirectory
+subsystem
+.TE
+.PP
+Hyphens should be retained when the prefixes are used in nonstandard
+English words, with trademarks, proper nouns, acronyms, or compound terms.
+Some examples:
+.IP
+.TS
+l.
+non-ASCII
+non-English
+non-NULL
+non-real-time
+.TE
+.PP
+Finally, note that "re-create" and "recreate" are two different verbs,
+and the former is probably what you want.
+.\"
+.SS Generating optimal glyphs
+Where a real minus character is required (e.g., for numbers such as \-1,
+for man page cross references such as
+.BR utf\-8 (7),
+or when writing options that have a leading dash, such as in
+.IR "ls\ \-l"),
+use the following form in the man page source:
+.PP
+.in +4n
+.EX
+\e\-
+.EE
+.in
+.PP
+This guideline applies also to code examples.
+.PP
+The use of real minus signs serves the following purposes:
+.\" https://lore.kernel.org/linux-man/20210121061158.5ul7226fgbrmodbt@localhost.localdomain/
+.IP \[bu] 3
+To provide better renderings on various targets other than
+ASCII terminals,
+notably in PDF and on Unicode/UTF\-8-capable terminals.
+.IP \[bu]
+To generate glyphs that when copied from rendered pages will
+produce real minus signs when pasted into a terminal.
+.PP
+To produce unslanted single quotes that render well in ASCII, UTF-8, and PDF,
+use "\e[aq]" ("apostrophe quote"); for example
+.PP
+.in +4n
+.EX
+\e[aq]C\e[aq]
+.EE
+.in
+.PP
+where
+.I C
+is the quoted character.
+This guideline applies also to character constants used in code examples.
+.PP
+Where a proper caret (\[ha]) that renders well in both a terminal and PDF
+is required, use "\\[ha]".
+This is especially necessary in code samples,
+to get a nicely rendered caret when rendering to PDF.
+.PP
+Using a naked "\[ti]" character results in a poor rendering in PDF.
+Instead use "\\[ti]".
+This is especially necessary in code samples,
+to get a nicely rendered tilde when rendering to PDF.
+.\"
+.SS Example programs and shell sessions
+Manual pages may include example programs demonstrating how to
+use a system call or library function.
+However, note the following:
+.IP \[bu] 3
+Example programs should be written in C.
+.IP \[bu]
+An example program is necessary and useful only if it demonstrates
+something beyond what can easily be provided in a textual
+description of the interface.
+An example program that does nothing
+other than call an interface usually serves little purpose.
+.IP \[bu]
+Example programs should ideally be short
+(e.g., a good example can often be provided in less than 100 lines of code),
+though in some cases longer programs may be necessary
+to properly illustrate the use of an API.
+.IP \[bu]
+Expressive code is appreciated.
+.IP \[bu]
+Comments should included where helpful.
+Complete sentences in free-standing comments should be
+terminated by a period.
+Periods should generally be omitted in "tag" comments
+(i.e., comments that are placed on the same line of code);
+such comments are in any case typically brief phrases
+rather than complete sentences.
+.IP \[bu]
+Example programs should do error checking after system calls and
+library function calls.
+.IP \[bu]
+Example programs should be complete, and compile without
+warnings when compiled with \fIcc\ \-Wall\fP.
+.IP \[bu]
+Where possible and appropriate, example programs should allow
+experimentation, by varying their behavior based on inputs
+(ideally from command-line arguments, or alternatively, via
+input read by the program).
+.IP \[bu]
+Example programs should be laid out according to Kernighan and
+Ritchie style, with 4-space indents.
+(Avoid the use of TAB characters in source code!)
+The following command can be used to format your source code to
+something close to the preferred style:
+.IP
+.in +4n
+.EX
+indent \-npro \-kr \-i4 \-ts4 \-sob \-l72 \-ss \-nut \-psl prog.c
+.EE
+.in
+.IP \[bu]
+For consistency, all example programs should terminate using either of:
+.IP
+.in +4n
+.EX
+exit(EXIT_SUCCESS);
+exit(EXIT_FAILURE);
+.EE
+.in
+.IP
+Avoid using the following forms to terminate a program:
+.IP
+.in +4n
+.EX
+exit(0);
+exit(1);
+return n;
+.EE
+.in
+.IP \[bu]
+If there is extensive explanatory text before the
+program source code, mark off the source code
+with a subsection heading
+.IR "Program source" ,
+as in:
+.IP
+.in +4n
+.EX
+\&.SS Program source
+.EE
+.in
+.IP
+Always do this if the explanatory text includes a shell session log.
+.PP
+If you include a shell session log demonstrating the use of a program
+or other system feature:
+.IP \[bu] 3
+Place the session log above the source code listing.
+.IP \[bu]
+Indent the session log by four spaces.
+.IP \[bu]
+Boldface the user input text,
+to distinguish it from output produced by the system.
+.PP
+For some examples of what example programs should look like, see
+.BR wait (2)
+and
+.BR pipe (2).
+.SH EXAMPLES
+For canonical examples of how man pages in the
+.I man-pages
+package should look, see
+.BR pipe (2)
+and
+.BR fcntl (2).
+.SH SEE ALSO
+.BR man (1),
+.BR man2html (1),
+.BR attributes (7),
+.BR groff (7),
+.BR groff_man (7),
+.BR man (7),
+.BR mdoc (7)
diff --git a/man7/man.7 b/man7/man.7
new file mode 100644
index 0000000..b0788f3
--- /dev/null
+++ b/man7/man.7
@@ -0,0 +1,507 @@
+.\" (C) Copyright 1992-1999 Rickard E. Faith and David A. Wheeler
+.\" (faith@cs.unc.edu and dwheeler@ida.org)
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" Modified Sun Jul 25 11:06:05 1993 by Rik Faith (faith@cs.unc.edu)
+.\" Modified Sat Jun 8 00:39:52 1996 by aeb
+.\" Modified Wed Jun 16 23:00:00 1999 by David A. Wheeler (dwheeler@ida.org)
+.\" Modified Thu Jul 15 12:43:28 1999 by aeb
+.\" Modified Sun Jan 6 18:26:25 2002 by Martin Schulze <joey@infodrom.org>
+.\" Modified Tue Jul 27 20:12:02 2004 by Colin Watson <cjwatson@debian.org>
+.\" 2007-05-30, mtk: various rewrites and moved much text to new man-pages.7.
+.\"
+.TH man 7 2023-07-29 "Linux man-pages 6.05.01"
+.SH NAME
+man \- macros to format man pages
+.SH SYNOPSIS
+.B groff \-Tascii \-man
+.I file
+\&...
+.br
+.B groff \-Tps \-man
+.I file
+\&...
+.PP
+.B man
+.RI [ section ]
+.I title
+.SH DESCRIPTION
+This manual page explains the
+.B "groff an.tmac"
+macro package (often called the
+.B man
+macro package).
+This macro package should be used by developers when
+writing or porting man pages for Linux.
+It is fairly compatible with other
+versions of this macro package, so porting man pages should not be a major
+problem (exceptions include the NET-2 BSD release, which uses a totally
+different macro package called mdoc; see
+.BR mdoc (7)).
+.PP
+Note that NET-2 BSD mdoc man pages can be used with
+.B groff
+simply by specifying the
+.B \-mdoc
+option instead of the
+.B \-man
+option.
+Using the
+.B \-mandoc
+option is, however, recommended, since this will automatically detect which
+macro package is in use.
+.PP
+For conventions that should be employed when writing man pages
+for the Linux \fIman-pages\fP package, see
+.BR man\-pages (7).
+.SS Title line
+The first command in a man page (after comment lines,
+that is, lines that start with \fB.\e"\fP) should be
+.PP
+.RS
+.B .TH
+.I "title section date source manual"
+.RE
+.PP
+For details of the arguments that should be supplied to the
+.B TH
+command, see
+.BR man\-pages (7).
+.PP
+Note that BSD mdoc-formatted pages begin with the
+.B Dd
+command, not the
+.B TH
+command.
+.SS Sections
+Sections are started with
+.B .SH
+followed by the heading name.
+.\" The following doesn't seem to be required (see Debian bug 411303),
+.\" If the name contains spaces and appears
+.\" on the same line as
+.\" .BR .SH ,
+.\" then place the heading in double quotes.
+.PP
+The only mandatory heading is NAME, which should be the first section and
+be followed on the next line by a one-line description of the program:
+.PP
+.RS
+\&.SH NAME
+.br
+item \e- description
+.RE
+.PP
+It is extremely important that this format is followed, and that there is a
+backslash before the single dash which follows the item name.
+This syntax is used by the
+.BR mandb (8)
+program to create a database of short descriptions for the
+.BR whatis (1)
+and
+.BR apropos (1)
+commands.
+(See
+.BR lexgrog (1)
+for further details on the syntax of the NAME section.)
+.PP
+For a list of other sections that might appear in a manual page, see
+.BR man\-pages (7).
+.SS Fonts
+The commands to select the type face are:
+.TP 4
+.B .B
+Bold
+.TP
+.B .BI
+Bold alternating with italics
+(especially useful for function specifications)
+.TP
+.B .BR
+Bold alternating with Roman
+(especially useful for referring to other
+manual pages)
+.TP
+.B .I
+Italics
+.TP
+.B .IB
+Italics alternating with bold
+.TP
+.B .IR
+Italics alternating with Roman
+.TP
+.B .RB
+Roman alternating with bold
+.TP
+.B .RI
+Roman alternating with italics
+.TP
+.B .SB
+Small alternating with bold
+.TP
+.B .SM
+Small (useful for acronyms)
+.PP
+Traditionally, each command can have up to six arguments, but the GNU
+implementation removes this limitation (you might still want to limit
+yourself to 6 arguments for portability's sake).
+Arguments are delimited by spaces.
+Double quotes can be used to specify an argument which contains spaces.
+For the macros that produce alternating type faces,
+the arguments will be printed next to each other without
+intervening spaces, so that the
+.B .BR
+command can be used to specify a word in bold followed by a mark of
+punctuation in Roman.
+If no arguments are given, the command is applied to the following line
+of text.
+.SS Other macros and strings
+Below are other relevant macros and predefined strings.
+Unless noted otherwise, all macros
+cause a break (end the current line of text).
+Many of these macros set or use the "prevailing indent".
+The "prevailing indent" value is set by any macro with the parameter
+.I i
+below;
+macros may omit
+.I i
+in which case the current prevailing indent will be used.
+As a result, successive indented paragraphs can use the same indent without
+respecifying the indent value.
+A normal (nonindented) paragraph resets the prevailing indent value
+to its default value (0.5 inches).
+By default, a given indent is measured in ens;
+try to use ens or ems as units for
+indents, since these will automatically adjust to font size changes.
+The other key macro definitions are:
+.SS Normal paragraphs
+.TP 9m
+.B .LP
+Same as
+.B .PP
+(begin a new paragraph).
+.TP
+.B .P
+Same as
+.B .PP
+(begin a new paragraph).
+.TP
+.B .PP
+Begin a new paragraph and reset prevailing indent.
+.SS Relative margin indent
+.TP 9m
+.BI .RS " i"
+Start relative margin indent: moves the left margin
+.I i
+to the right (if
+.I i
+is omitted, the prevailing indent value is used).
+A new prevailing indent is set to 0.5 inches.
+As a result, all following paragraph(s) will be
+indented until the corresponding
+.BR .RE .
+.TP
+.B .RE
+End relative margin indent and
+restores the previous value of the prevailing indent.
+.SS Indented paragraph macros
+.TP 9m
+.BI .HP " i"
+Begin paragraph with a hanging indent
+(the first line of the paragraph is at the left margin of
+normal paragraphs, and the rest of the paragraph's lines are indented).
+.TP
+.BI .IP " x i"
+Indented paragraph with optional hanging tag.
+If the tag
+.I x
+is omitted, the entire following paragraph is indented by
+.IR i .
+If the tag
+.I x
+is provided, it is hung at the left margin
+before the following indented paragraph
+(this is just like
+.B .TP
+except the tag is included with the command instead of being on the
+following line).
+If the tag is too long, the text after the tag will be moved down to the
+next line (text will not be lost or garbled).
+For bulleted lists, use this macro with \e(bu (bullet) or \e(em (em dash)
+as the tag, and for numbered lists, use the number or letter followed by
+a period as the tag;
+this simplifies translation to other formats.
+.TP
+.BI .TP " i"
+Begin paragraph with hanging tag.
+The tag is given on the next line, but
+its results are like those of the
+.B .IP
+command.
+.SS Hypertext link macros
+.TP
+.BI .UR " url"
+Insert a hypertext link to the URI (URL)
+.IR url ,
+with all text up to the following
+.B .UE
+macro as the link text.
+.TP
+.BR .UE \~\c
+.RI [ trailer ]
+Terminate the link text of the preceding
+.B .UR
+macro, with the optional
+.I trailer
+(if present, usually a closing parenthesis and/or end-of-sentence
+punctuation) immediately following.
+For non-HTML output devices (e.g.,
+.BR "man \-Tutf8" ),
+the link text is followed by the URL in angle brackets; if there is no
+link text, the URL is printed as its own link text, surrounded by angle
+brackets.
+(Angle brackets may not be available on all output devices.)
+For the HTML output device, the link text is hyperlinked to the URL; if
+there is no link text, the URL is printed as its own link text.
+.PP
+These macros have been supported since GNU Troff 1.20 (2009-01-05) and
+Heirloom Doctools Troff since 160217 (2016-02-17).
+.SS Miscellaneous macros
+.TP 9m
+.B .DT
+Reset tabs to default tab values (every 0.5 inches);
+does not cause a break.
+.TP
+.BI .PD " d"
+Set inter-paragraph vertical distance to d
+(if omitted, d=0.4v);
+does not cause a break.
+.TP
+.BI .SS " t"
+Subheading
+.I t
+(like
+.BR .SH ,
+but used for a subsection inside a section).
+.SS Predefined strings
+The
+.B man
+package has the following predefined strings:
+.TP
+\e*R
+Registration Symbol: \*R
+.TP
+\e*S
+Change to default font size
+.TP
+\e*(Tm
+Trademark Symbol: \*(Tm
+.TP
+\e*(lq
+Left angled double quote: \*(lq
+.TP
+\e*(rq
+Right angled double quote: \*(rq
+.SS Safe subset
+Although technically
+.B man
+is a troff macro package, in reality a large number of other tools
+process man page files that don't implement all of troff's abilities.
+Thus, it's best to avoid some of troff's more exotic abilities
+where possible to permit these other tools to work correctly.
+Avoid using the various troff preprocessors
+(if you must, go ahead and use
+.BR tbl (1),
+but try to use the
+.B IP
+and
+.B TP
+commands instead for two-column tables).
+Avoid using computations; most other tools can't process them.
+Use simple commands that are easy to translate to other formats.
+The following troff macros are believed to be safe (though in many cases
+they will be ignored by translators):
+.BR \e" ,
+.BR . ,
+.BR ad ,
+.BR bp ,
+.BR br ,
+.BR ce ,
+.BR de ,
+.BR ds ,
+.BR el ,
+.BR ie ,
+.BR if ,
+.BR fi ,
+.BR ft ,
+.BR hy ,
+.BR ig ,
+.BR in ,
+.BR na ,
+.BR ne ,
+.BR nf ,
+.BR nh ,
+.BR ps ,
+.BR so ,
+.BR sp ,
+.BR ti ,
+.BR tr .
+.PP
+You may also use many troff escape sequences (those sequences beginning
+with \e).
+When you need to include the backslash character as normal text,
+use \ee.
+Other sequences you may use, where x or xx are any characters and N
+is any digit, include:
+.BR \e\[aq] ,
+.BR \e\[ga] ,
+.BR \e- ,
+.BR \e. ,
+.BR \e" ,
+.BR \e% ,
+.BR \e*x ,
+.BR \e*(xx ,
+.BR \e(xx ,
+.BR \e$N ,
+.BR \enx ,
+.BR \en(xx ,
+.BR \efx ,
+and
+.BR \ef(xx .
+Avoid using the escape sequences for drawing graphics.
+.PP
+Do not use the optional parameter for
+.B bp
+(break page).
+Use only positive values for
+.B sp
+(vertical space).
+Don't define a macro
+.RB ( de )
+with the same name as a macro in this or the
+mdoc macro package with a different meaning; it's likely that
+such redefinitions will be ignored.
+Every positive indent
+.RB ( in )
+should be paired with a matching negative indent
+(although you should be using the
+.B RS
+and
+.B RE
+macros instead).
+The condition test
+.RB ( if,ie )
+should only have \[aq]t\[aq] or \[aq]n\[aq] as the condition.
+Only translations
+.RB ( tr )
+that can be ignored should be used.
+Font changes
+.RB ( ft
+and the \fB\ef\fP escape sequence)
+should only have the values 1, 2, 3, 4, R, I, B, P, or CW
+(the ft command may also have no parameters).
+.PP
+If you use capabilities beyond these, check the
+results carefully on several tools.
+Once you've confirmed that the additional capability is safe,
+let the maintainer of this
+document know about the safe command or sequence
+that should be added to this list.
+.SH FILES
+.IR /usr/share/groff/ [*/] tmac/an.tmac
+.br
+.I /usr/man/whatis
+.SH NOTES
+By all means include full URLs (or URIs) in the text itself;
+some tools such as
+.BR man2html (1)
+can automatically turn them into hypertext links.
+You can also use the
+.B UR
+and
+.B UE
+macros to identify links to related information.
+If you include URLs, use the full URL
+(e.g.,
+.UR http://www.kernel.org
+.UE )
+to ensure that tools can automatically find the URLs.
+.PP
+Tools processing these files should open the file and examine the first
+nonwhitespace character.
+A period (.) or single quote (\[aq]) at the beginning
+of a line indicates a troff-based file (such as man or mdoc).
+A left angle bracket (<) indicates an SGML/XML-based
+file (such as HTML or Docbook).
+Anything else suggests simple ASCII
+text (e.g., a "catman" result).
+.PP
+Many man pages begin with \fB\[aq]\e"\fP followed by a
+space and a list of characters,
+indicating how the page is to be preprocessed.
+For portability's sake to non-troff translators we recommend
+that you avoid using anything other than
+.BR tbl (1),
+and Linux can detect that automatically.
+However, you might want to include this information so your man page
+can be handled by other (less capable) systems.
+Here are the definitions of the preprocessors invoked by these characters:
+.TP 3
+.B e
+eqn(1)
+.TP
+.B g
+grap(1)
+.TP
+.B p
+pic(1)
+.TP
+.B r
+refer(1)
+.TP
+.B t
+tbl(1)
+.TP
+.B v
+vgrind(1)
+.SH BUGS
+Most of the macros describe formatting (e.g., font type and spacing) instead
+of marking semantic content (e.g., this text is a reference to another page),
+compared to formats like mdoc and DocBook (even HTML has more semantic
+markings).
+This situation makes it harder to vary the
+.B man
+format for different media,
+to make the formatting consistent for a given media, and to automatically
+insert cross-references.
+By sticking to the safe subset described above, it should be easier to
+automate transitioning to a different reference page format in the future.
+.PP
+The Sun macro
+.B TX
+is not implemented.
+.\" .SH AUTHORS
+.\" .IP \[em] 3m
+.\" James Clark (jjc@jclark.com) wrote the implementation of the macro package.
+.\" .IP \[em]
+.\" Rickard E. Faith (faith@cs.unc.edu) wrote the initial version of
+.\" this manual page.
+.\" .IP \[em]
+.\" Jens Schweikhardt (schweikh@noc.fdn.de) wrote the Linux Man-Page Mini-HOWTO
+.\" (which influenced this manual page).
+.\" .IP \[em]
+.\" David A. Wheeler (dwheeler@ida.org) heavily modified this
+.\" manual page, such as adding detailed information on sections and macros.
+.SH SEE ALSO
+.BR apropos (1),
+.BR groff (1),
+.BR lexgrog (1),
+.BR man (1),
+.BR man2html (1),
+.BR whatis (1),
+.BR groff_man (7),
+.BR groff_www (7),
+.BR man\-pages (7),
+.BR mdoc (7)
diff --git a/man7/math_error.7 b/man7/math_error.7
new file mode 100644
index 0000000..d3b3c1a
--- /dev/null
+++ b/man7/math_error.7
@@ -0,0 +1,246 @@
+.\" Copyright (c) 2008, Linux Foundation, written by Michael Kerrisk
+.\" <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH math_error 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+math_error \- detecting errors from mathematical functions
+.SH SYNOPSIS
+.nf
+.B #include <math.h>
+.B #include <errno.h>
+.B #include <fenv.h>
+.fi
+.SH DESCRIPTION
+When an error occurs,
+most library functions indicate this fact by returning a special value
+(e.g., \-1 or NULL).
+Because they typically return a floating-point number,
+the mathematical functions declared in
+.I <math.h>
+indicate an error using other mechanisms.
+There are two error-reporting mechanisms:
+the older one sets
+.IR errno ;
+the newer one uses the floating-point exception mechanism (the use of
+.BR feclearexcept (3)
+and
+.BR fetestexcept (3),
+as outlined below)
+described in
+.BR fenv (3).
+.PP
+A portable program that needs to check for an error from a mathematical
+function should set
+.I errno
+to zero, and make the following call
+.PP
+.in +4n
+.EX
+feclearexcept(FE_ALL_EXCEPT);
+.EE
+.in
+.PP
+before calling a mathematical function.
+.PP
+Upon return from the mathematical function, if
+.I errno
+is nonzero, or the following call (see
+.BR fenv (3))
+returns nonzero
+.PP
+.in +4n
+.EX
+fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW |
+ FE_UNDERFLOW);
+.EE
+.in
+.PP
+.\" enum
+.\" {
+.\" FE_INVALID = 0x01,
+.\" __FE_DENORM = 0x02,
+.\" FE_DIVBYZERO = 0x04,
+.\" FE_OVERFLOW = 0x08,
+.\" FE_UNDERFLOW = 0x10,
+.\" FE_INEXACT = 0x20
+.\" };
+then an error occurred in the mathematical function.
+.PP
+The error conditions that can occur for mathematical functions
+are described below.
+.SS Domain error
+A
+.I domain error
+occurs when a mathematical function is supplied with an argument whose
+value falls outside the domain for which the function
+is defined (e.g., giving a negative argument to
+.BR log (3)).
+When a domain error occurs,
+math functions commonly return a NaN
+(though some functions return a different value in this case);
+.I errno
+is set to
+.BR EDOM ,
+and an "invalid"
+.RB ( FE_INVALID )
+floating-point exception is raised.
+.SS Pole error
+A
+.I pole error
+occurs when the mathematical result of a function is an exact infinity
+(e.g., the logarithm of 0 is negative infinity).
+When a pole error occurs,
+the function returns the (signed) value
+.BR HUGE_VAL ,
+.BR HUGE_VALF ,
+or
+.BR HUGE_VALL ,
+depending on whether the function result type is
+.IR double ,
+.IR float ,
+or
+.IR "long double" .
+The sign of the result is that which is mathematically correct for
+the function.
+.I errno
+is set to
+.BR ERANGE ,
+and a "divide-by-zero"
+.RB ( FE_DIVBYZERO )
+floating-point exception is raised.
+.SS Range error
+A
+.I range error
+occurs when the magnitude of the function result means that it
+cannot be represented in the result type of the function.
+The return value of the function depends on whether the range error
+was an overflow or an underflow.
+.PP
+A floating result
+.I overflows
+if the result is finite,
+but is too large to represented in the result type.
+When an overflow occurs,
+the function returns the value
+.BR HUGE_VAL ,
+.BR HUGE_VALF ,
+or
+.BR HUGE_VALL ,
+depending on whether the function result type is
+.IR double ,
+.IR float ,
+or
+.IR "long double" .
+.I errno
+is set to
+.BR ERANGE ,
+and an "overflow"
+.RB ( FE_OVERFLOW )
+floating-point exception is raised.
+.PP
+A floating result
+.I underflows
+if the result is too small to be represented in the result type.
+If an underflow occurs,
+a mathematical function typically returns 0.0
+(C99 says a function shall return "an implementation-defined value
+whose magnitude is no greater than the smallest normalized
+positive number in the specified type").
+.I errno
+may be set to
+.BR ERANGE ,
+and an "underflow"
+.RB ( FE_UNDERFLOW )
+floating-point exception may be raised.
+.PP
+Some functions deliver a range error if the supplied argument value,
+or the correct function result, would be
+.IR subnormal .
+A subnormal value is one that is nonzero,
+but with a magnitude that is so small that
+it can't be presented in normalized form
+(i.e., with a 1 in the most significant bit of the significand).
+The representation of a subnormal number will contain one
+or more leading zeros in the significand.
+.SH NOTES
+The
+.I math_errhandling
+identifier specified by C99 and POSIX.1 is not supported by glibc.
+.\" See CONFORMANCE in the glibc 2.8 (and earlier) source.
+This identifier is supposed to indicate which of the two
+error-notification mechanisms
+.RI ( errno ,
+exceptions retrievable via
+.BR fetestexcept (3))
+is in use.
+The standards require that at least one be in use,
+but permit both to be available.
+The current (glibc 2.8) situation under glibc is messy.
+Most (but not all) functions raise exceptions on errors.
+Some also set
+.IR errno .
+A few functions set
+.IR errno ,
+but don't raise an exception.
+A very few functions do neither.
+See the individual manual pages for details.
+.PP
+To avoid the complexities of using
+.I errno
+and
+.BR fetestexcept (3)
+for error checking,
+it is often advised that one should instead check for bad argument
+values before each call.
+.\" http://www.securecoding.cert.org/confluence/display/seccode/FLP32-C.+Prevent+or+detect+domain+and+range+errors+in+math+functions
+For example, the following code ensures that
+.BR log (3)'s
+argument is not a NaN and is not zero (a pole error) or
+less than zero (a domain error):
+.PP
+.in +4n
+.EX
+double x, r;
+\&
+if (isnan(x) || islessequal(x, 0)) {
+ /* Deal with NaN / pole error / domain error */
+}
+\&
+r = log(x);
+.EE
+.in
+.PP
+The discussion on this page does not apply to the complex
+mathematical functions (i.e., those declared by
+.IR <complex.h> ),
+which in general are not required to return errors by C99
+and POSIX.1.
+.PP
+The
+.BR gcc (1)
+.I "\-fno\-math\-errno"
+option causes the executable to employ implementations of some
+mathematical functions that are faster than the standard
+implementations, but do not set
+.I errno
+on error.
+(The
+.BR gcc (1)
+.I "\-ffast\-math"
+option also enables
+.IR "\-fno\-math\-errno" .)
+An error can still be tested for using
+.BR fetestexcept (3).
+.SH SEE ALSO
+.BR gcc (1),
+.BR errno (3),
+.BR fenv (3),
+.BR fpclassify (3),
+.BR INFINITY (3),
+.BR isgreater (3),
+.BR matherr (3),
+.BR nan (3)
+.PP
+.I "info libc"
diff --git a/man7/mount_namespaces.7 b/man7/mount_namespaces.7
new file mode 100644
index 0000000..0ce2fee
--- /dev/null
+++ b/man7/mount_namespaces.7
@@ -0,0 +1,1371 @@
+'\" t
+.\" Copyright (c) 2016, 2019, 2021 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH mount_namespaces 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+mount_namespaces \- overview of Linux mount namespaces
+.SH DESCRIPTION
+For an overview of namespaces, see
+.BR namespaces (7).
+.PP
+Mount namespaces provide isolation of the list of mounts seen
+by the processes in each namespace instance.
+Thus, the processes in each of the mount namespace instances
+will see distinct single-directory hierarchies.
+.PP
+The views provided by the
+.IR /proc/ pid /mounts ,
+.IR /proc/ pid /mountinfo ,
+and
+.IR /proc/ pid /mountstats
+files (all described in
+.BR proc (5))
+correspond to the mount namespace in which the process with the PID
+.I pid
+resides.
+(All of the processes that reside in the same mount namespace
+will see the same view in these files.)
+.PP
+A new mount namespace is created using either
+.BR clone (2)
+or
+.BR unshare (2)
+with the
+.B CLONE_NEWNS
+flag.
+When a new mount namespace is created,
+its mount list is initialized as follows:
+.IP \[bu] 3
+If the namespace is created using
+.BR clone (2),
+the mount list of the child's namespace is a copy
+of the mount list in the parent process's mount namespace.
+.IP \[bu]
+If the namespace is created using
+.BR unshare (2),
+the mount list of the new namespace is a copy of
+the mount list in the caller's previous mount namespace.
+.PP
+Subsequent modifications to the mount list
+.RB ( mount (2)
+and
+.BR umount (2))
+in either mount namespace will not (by default) affect the
+mount list seen in the other namespace
+(but see the following discussion of shared subtrees).
+.\"
+.SH SHARED SUBTREES
+After the implementation of mount namespaces was completed,
+experience showed that the isolation that they provided was,
+in some cases, too great.
+For example, in order to make a newly loaded optical disk
+available in all mount namespaces,
+a mount operation was required in each namespace.
+For this use case, and others,
+the shared subtree feature was introduced in Linux 2.6.15.
+This feature allows for automatic, controlled propagation of
+.BR mount (2)
+and
+.BR umount (2)
+.I events
+between namespaces
+(or, more precisely, between the mounts that are members of a
+.I peer group
+that are propagating events to one another).
+.PP
+Each mount is marked (via
+.BR mount (2))
+as having one of the following
+.IR "propagation types" :
+.TP
+.B MS_SHARED
+This mount shares events with members of a peer group.
+.BR mount (2)
+and
+.BR umount (2)
+events immediately under this mount will propagate
+to the other mounts that are members of the peer group.
+.I Propagation
+here means that the same
+.BR mount (2)
+or
+.BR umount (2)
+will automatically occur
+under all of the other mounts in the peer group.
+Conversely,
+.BR mount (2)
+and
+.BR umount (2)
+events that take place under
+peer mounts will propagate to this mount.
+.TP
+.B MS_PRIVATE
+This mount is private; it does not have a peer group.
+.BR mount (2)
+and
+.BR umount (2)
+events do not propagate into or out of this mount.
+.TP
+.B MS_SLAVE
+.BR mount (2)
+and
+.BR umount (2)
+events propagate into this mount from
+a (master) shared peer group.
+.BR mount (2)
+and
+.BR umount (2)
+events under this mount do not propagate to any peer.
+.IP
+Note that a mount can be the slave of another peer group
+while at the same time sharing
+.BR mount (2)
+and
+.BR umount (2)
+events
+with a peer group of which it is a member.
+(More precisely, one peer group can be the slave of another peer group.)
+.TP
+.B MS_UNBINDABLE
+This is like a private mount,
+and in addition this mount can't be bind mounted.
+Attempts to bind mount this mount
+.RB ( mount (2)
+with the
+.B MS_BIND
+flag) will fail.
+.IP
+When a recursive bind mount
+.RB ( mount (2)
+with the
+.B MS_BIND
+and
+.B MS_REC
+flags) is performed on a directory subtree,
+any bind mounts within the subtree are automatically pruned
+(i.e., not replicated)
+when replicating that subtree to produce the target subtree.
+.PP
+For a discussion of the propagation type assigned to a new mount,
+see NOTES.
+.PP
+The propagation type is a per-mount-point setting;
+some mounts may be marked as shared
+(with each shared mount being a member of a distinct peer group),
+while others are private
+(or slaved or unbindable).
+.PP
+Note that a mount's propagation type determines whether
+.BR mount (2)
+and
+.BR umount (2)
+of mounts
+.I immediately under
+the mount are propagated.
+Thus, the propagation type does not affect propagation of events for
+grandchildren and further removed descendant mounts.
+What happens if the mount itself is unmounted is determined by
+the propagation type that is in effect for the
+.I parent
+of the mount.
+.PP
+Members are added to a
+.I peer group
+when a mount is marked as shared and either:
+.IP (a) 5
+the mount is replicated during the creation of a new mount namespace; or
+.IP (b)
+a new bind mount is created from the mount.
+.PP
+In both of these cases, the new mount joins the peer group
+of which the existing mount is a member.
+.PP
+A new peer group is also created when a child mount is created under
+an existing mount that is marked as shared.
+In this case, the new child mount is also marked as shared and
+the resulting peer group consists of all the mounts
+that are replicated under the peers of parent mounts.
+.PP
+A mount ceases to be a member of a peer group when either
+the mount is explicitly unmounted,
+or when the mount is implicitly unmounted because a mount namespace is removed
+(because it has no more member processes).
+.PP
+The propagation type of the mounts in a mount namespace
+can be discovered via the "optional fields" exposed in
+.IR /proc/ pid /mountinfo .
+(See
+.BR proc (5)
+for details of this file.)
+The following tags can appear in the optional fields
+for a record in that file:
+.TP
+.I shared:X
+This mount is shared in peer group
+.IR X .
+Each peer group has a unique ID that is automatically
+generated by the kernel,
+and all mounts in the same peer group will show the same ID.
+(These IDs are assigned starting from the value 1,
+and may be recycled when a peer group ceases to have any members.)
+.TP
+.I master:X
+This mount is a slave to shared peer group
+.IR X .
+.TP
+.IR propagate_from:X " (since Linux 2.6.26)"
+.\" commit 97e7e0f71d6d948c25f11f0a33878d9356d9579e
+This mount is a slave and receives propagation from shared peer group
+.IR X .
+This tag will always appear in conjunction with a
+.I master:X
+tag.
+Here,
+.I X
+is the closest dominant peer group under the process's root directory.
+If
+.I X
+is the immediate master of the mount,
+or if there is no dominant peer group under the same root,
+then only the
+.I master:X
+field is present and not the
+.I propagate_from:X
+field.
+For further details, see below.
+.TP
+.I unbindable
+This is an unbindable mount.
+.PP
+If none of the above tags is present, then this is a private mount.
+.SS MS_SHARED and MS_PRIVATE example
+Suppose that on a terminal in the initial mount namespace,
+we mark one mount as shared and another as private,
+and then view the mounts in
+.IR /proc/self/mountinfo :
+.PP
+.in +4n
+.EX
+sh1# \fBmount \-\-make\-shared /mntS\fP
+sh1# \fBmount \-\-make\-private /mntP\fP
+sh1# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+77 61 8:17 / /mntS rw,relatime shared:1
+83 61 8:15 / /mntP rw,relatime
+.EE
+.in
+.PP
+From the
+.I /proc/self/mountinfo
+output, we see that
+.I /mntS
+is a shared mount in peer group 1, and that
+.I /mntP
+has no optional tags, indicating that it is a private mount.
+The first two fields in each record in this file are the unique
+ID for this mount, and the mount ID of the parent mount.
+We can further inspect this file to see that the parent mount of
+.I /mntS
+and
+.I /mntP
+is the root directory,
+.IR / ,
+which is mounted as private:
+.PP
+.in +4n
+.EX
+sh1# \fBcat /proc/self/mountinfo | awk \[aq]$1 == 61\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+61 0 8:2 / / rw,relatime
+.EE
+.in
+.PP
+On a second terminal,
+we create a new mount namespace where we run a second shell
+and inspect the mounts:
+.PP
+.in +4n
+.EX
+$ \fBPS1=\[aq]sh2# \[aq] sudo unshare \-m \-\-propagation unchanged sh\fP
+sh2# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+222 145 8:17 / /mntS rw,relatime shared:1
+225 145 8:15 / /mntP rw,relatime
+.EE
+.in
+.PP
+The new mount namespace received a copy of the initial mount namespace's
+mounts.
+These new mounts maintain the same propagation types,
+but have unique mount IDs.
+(The
+.I \-\-propagation\~unchanged
+option prevents
+.BR unshare (1)
+from marking all mounts as private when creating a new mount namespace,
+.\" Since util-linux 2.27
+which it does by default.)
+.PP
+In the second terminal, we then create submounts under each of
+.I /mntS
+and
+.I /mntP
+and inspect the set-up:
+.PP
+.in +4n
+.EX
+sh2# \fBmkdir /mntS/a\fP
+sh2# \fBmount /dev/sdb6 /mntS/a\fP
+sh2# \fBmkdir /mntP/b\fP
+sh2# \fBmount /dev/sdb7 /mntP/b\fP
+sh2# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+222 145 8:17 / /mntS rw,relatime shared:1
+225 145 8:15 / /mntP rw,relatime
+178 222 8:22 / /mntS/a rw,relatime shared:2
+230 225 8:23 / /mntP/b rw,relatime
+.EE
+.in
+.PP
+From the above, it can be seen that
+.I /mntS/a
+was created as shared (inheriting this setting from its parent mount) and
+.I /mntP/b
+was created as a private mount.
+.PP
+Returning to the first terminal and inspecting the set-up,
+we see that the new mount created under the shared mount
+.I /mntS
+propagated to its peer mount (in the initial mount namespace),
+but the new mount created under the private mount
+.I /mntP
+did not propagate:
+.PP
+.in +4n
+.EX
+sh1# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+77 61 8:17 / /mntS rw,relatime shared:1
+83 61 8:15 / /mntP rw,relatime
+179 77 8:22 / /mntS/a rw,relatime shared:2
+.EE
+.in
+.\"
+.SS MS_SLAVE example
+Making a mount a slave allows it to receive propagated
+.BR mount (2)
+and
+.BR umount (2)
+events from a master shared peer group,
+while preventing it from propagating events to that master.
+This is useful if we want to (say) receive a mount event when
+an optical disk is mounted in the master shared peer group
+(in another mount namespace),
+but want to prevent
+.BR mount (2)
+and
+.BR umount (2)
+events under the slave mount
+from having side effects in other namespaces.
+.PP
+We can demonstrate the effect of slaving by first marking
+two mounts as shared in the initial mount namespace:
+.PP
+.in +4n
+.EX
+sh1# \fBmount \-\-make\-shared /mntX\fP
+sh1# \fBmount \-\-make\-shared /mntY\fP
+sh1# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+132 83 8:23 / /mntX rw,relatime shared:1
+133 83 8:22 / /mntY rw,relatime shared:2
+.EE
+.in
+.PP
+On a second terminal,
+we create a new mount namespace and inspect the mounts:
+.PP
+.in +4n
+.EX
+sh2# \fBunshare \-m \-\-propagation unchanged sh\fP
+sh2# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+168 167 8:23 / /mntX rw,relatime shared:1
+169 167 8:22 / /mntY rw,relatime shared:2
+.EE
+.in
+.PP
+In the new mount namespace, we then mark one of the mounts as a slave:
+.PP
+.in +4n
+.EX
+sh2# \fBmount \-\-make\-slave /mntY\fP
+sh2# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+168 167 8:23 / /mntX rw,relatime shared:1
+169 167 8:22 / /mntY rw,relatime master:2
+.EE
+.in
+.PP
+From the above output, we see that
+.I /mntY
+is now a slave mount that is receiving propagation events from
+the shared peer group with the ID 2.
+.PP
+Continuing in the new namespace, we create submounts under each of
+.I /mntX
+and
+.IR /mntY :
+.PP
+.in +4n
+.EX
+sh2# \fBmkdir /mntX/a\fP
+sh2# \fBmount /dev/sda3 /mntX/a\fP
+sh2# \fBmkdir /mntY/b\fP
+sh2# \fBmount /dev/sda5 /mntY/b\fP
+.EE
+.in
+.PP
+When we inspect the state of the mounts in the new mount namespace,
+we see that
+.I /mntX/a
+was created as a new shared mount
+(inheriting the "shared" setting from its parent mount) and
+.I /mntY/b
+was created as a private mount:
+.PP
+.in +4n
+.EX
+sh2# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+168 167 8:23 / /mntX rw,relatime shared:1
+169 167 8:22 / /mntY rw,relatime master:2
+173 168 8:3 / /mntX/a rw,relatime shared:3
+175 169 8:5 / /mntY/b rw,relatime
+.EE
+.in
+.PP
+Returning to the first terminal (in the initial mount namespace),
+we see that the mount
+.I /mntX/a
+propagated to the peer (the shared
+.IR /mntX ),
+but the mount
+.I /mntY/b
+was not propagated:
+.PP
+.in +4n
+.EX
+sh1# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+132 83 8:23 / /mntX rw,relatime shared:1
+133 83 8:22 / /mntY rw,relatime shared:2
+174 132 8:3 / /mntX/a rw,relatime shared:3
+.EE
+.in
+.PP
+Now we create a new mount under
+.I /mntY
+in the first shell:
+.PP
+.in +4n
+.EX
+sh1# \fBmkdir /mntY/c\fP
+sh1# \fBmount /dev/sda1 /mntY/c\fP
+sh1# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+132 83 8:23 / /mntX rw,relatime shared:1
+133 83 8:22 / /mntY rw,relatime shared:2
+174 132 8:3 / /mntX/a rw,relatime shared:3
+178 133 8:1 / /mntY/c rw,relatime shared:4
+.EE
+.in
+.PP
+When we examine the mounts in the second mount namespace,
+we see that in this case the new mount has been propagated
+to the slave mount,
+and that the new mount is itself a slave mount (to peer group 4):
+.PP
+.in +4n
+.EX
+sh2# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+168 167 8:23 / /mntX rw,relatime shared:1
+169 167 8:22 / /mntY rw,relatime master:2
+173 168 8:3 / /mntX/a rw,relatime shared:3
+175 169 8:5 / /mntY/b rw,relatime
+179 169 8:1 / /mntY/c rw,relatime master:4
+.EE
+.in
+.\"
+.SS MS_UNBINDABLE example
+One of the primary purposes of unbindable mounts is to avoid
+the "mount explosion" problem when repeatedly performing bind mounts
+of a higher-level subtree at a lower-level mount.
+The problem is illustrated by the following shell session.
+.PP
+Suppose we have a system with the following mounts:
+.PP
+.in +4n
+.EX
+# \fBmount | awk \[aq]{print $1, $2, $3}\[aq]\fP
+/dev/sda1 on /
+/dev/sdb6 on /mntX
+/dev/sdb7 on /mntY
+.EE
+.in
+.PP
+Suppose furthermore that we wish to recursively bind mount
+the root directory under several users' home directories.
+We do this for the first user, and inspect the mounts:
+.PP
+.in +4n
+.EX
+# \fBmount \-\-rbind / /home/cecilia/\fP
+# \fBmount | awk \[aq]{print $1, $2, $3}\[aq]\fP
+/dev/sda1 on /
+/dev/sdb6 on /mntX
+/dev/sdb7 on /mntY
+/dev/sda1 on /home/cecilia
+/dev/sdb6 on /home/cecilia/mntX
+/dev/sdb7 on /home/cecilia/mntY
+.EE
+.in
+.PP
+When we repeat this operation for the second user,
+we start to see the explosion problem:
+.PP
+.in +4n
+.EX
+# \fBmount \-\-rbind / /home/henry\fP
+# \fBmount | awk \[aq]{print $1, $2, $3}\[aq]\fP
+/dev/sda1 on /
+/dev/sdb6 on /mntX
+/dev/sdb7 on /mntY
+/dev/sda1 on /home/cecilia
+/dev/sdb6 on /home/cecilia/mntX
+/dev/sdb7 on /home/cecilia/mntY
+/dev/sda1 on /home/henry
+/dev/sdb6 on /home/henry/mntX
+/dev/sdb7 on /home/henry/mntY
+/dev/sda1 on /home/henry/home/cecilia
+/dev/sdb6 on /home/henry/home/cecilia/mntX
+/dev/sdb7 on /home/henry/home/cecilia/mntY
+.EE
+.in
+.PP
+Under
+.IR /home/henry ,
+we have not only recursively added the
+.I /mntX
+and
+.I /mntY
+mounts, but also the recursive mounts of those directories under
+.I /home/cecilia
+that were created in the previous step.
+Upon repeating the step for a third user,
+it becomes obvious that the explosion is exponential in nature:
+.PP
+.in +4n
+.EX
+# \fBmount \-\-rbind / /home/otto\fP
+# \fBmount | awk \[aq]{print $1, $2, $3}\[aq]\fP
+/dev/sda1 on /
+/dev/sdb6 on /mntX
+/dev/sdb7 on /mntY
+/dev/sda1 on /home/cecilia
+/dev/sdb6 on /home/cecilia/mntX
+/dev/sdb7 on /home/cecilia/mntY
+/dev/sda1 on /home/henry
+/dev/sdb6 on /home/henry/mntX
+/dev/sdb7 on /home/henry/mntY
+/dev/sda1 on /home/henry/home/cecilia
+/dev/sdb6 on /home/henry/home/cecilia/mntX
+/dev/sdb7 on /home/henry/home/cecilia/mntY
+/dev/sda1 on /home/otto
+/dev/sdb6 on /home/otto/mntX
+/dev/sdb7 on /home/otto/mntY
+/dev/sda1 on /home/otto/home/cecilia
+/dev/sdb6 on /home/otto/home/cecilia/mntX
+/dev/sdb7 on /home/otto/home/cecilia/mntY
+/dev/sda1 on /home/otto/home/henry
+/dev/sdb6 on /home/otto/home/henry/mntX
+/dev/sdb7 on /home/otto/home/henry/mntY
+/dev/sda1 on /home/otto/home/henry/home/cecilia
+/dev/sdb6 on /home/otto/home/henry/home/cecilia/mntX
+/dev/sdb7 on /home/otto/home/henry/home/cecilia/mntY
+.EE
+.in
+.PP
+The mount explosion problem in the above scenario can be avoided
+by making each of the new mounts unbindable.
+The effect of doing this is that recursive mounts of the root
+directory will not replicate the unbindable mounts.
+We make such a mount for the first user:
+.PP
+.in +4n
+.EX
+# \fBmount \-\-rbind \-\-make\-unbindable / /home/cecilia\fP
+.EE
+.in
+.PP
+Before going further, we show that unbindable mounts are indeed unbindable:
+.PP
+.in +4n
+.EX
+# \fBmkdir /mntZ\fP
+# \fBmount \-\-bind /home/cecilia /mntZ\fP
+mount: wrong fs type, bad option, bad superblock on /home/cecilia,
+ missing codepage or helper program, or other error
+\&
+ In some cases useful info is found in syslog \- try
+ dmesg | tail or so.
+.EE
+.in
+.PP
+Now we create unbindable recursive bind mounts for the other two users:
+.PP
+.in +4n
+.EX
+# \fBmount \-\-rbind \-\-make\-unbindable / /home/henry\fP
+# \fBmount \-\-rbind \-\-make\-unbindable / /home/otto\fP
+.EE
+.in
+.PP
+Upon examining the list of mounts,
+we see there has been no explosion of mounts,
+because the unbindable mounts were not replicated
+under each user's directory:
+.PP
+.in +4n
+.EX
+# \fBmount | awk \[aq]{print $1, $2, $3}\[aq]\fP
+/dev/sda1 on /
+/dev/sdb6 on /mntX
+/dev/sdb7 on /mntY
+/dev/sda1 on /home/cecilia
+/dev/sdb6 on /home/cecilia/mntX
+/dev/sdb7 on /home/cecilia/mntY
+/dev/sda1 on /home/henry
+/dev/sdb6 on /home/henry/mntX
+/dev/sdb7 on /home/henry/mntY
+/dev/sda1 on /home/otto
+/dev/sdb6 on /home/otto/mntX
+/dev/sdb7 on /home/otto/mntY
+.EE
+.in
+.\"
+.SS Propagation type transitions
+The following table shows the effect that applying a new propagation type
+(i.e.,
+.IR mount\~\-\-make\-xxxx )
+has on the existing propagation type of a mount.
+The rows correspond to existing propagation types,
+and the columns are the new propagation settings.
+For reasons of space, "private" is abbreviated as "priv" and
+"unbindable" as "unbind".
+.TS
+lb2 lb2 lb2 lb2 lb1
+lb | l l l l l.
+ make-shared make-slave make-priv make-unbind
+_
+shared shared slave/priv [1] priv unbind
+slave slave+shared slave [2] priv unbind
+slave+shared slave+shared slave priv unbind
+private shared priv [2] priv unbind
+unbindable shared unbind [2] priv unbind
+.TE
+.sp 1
+Note the following details to the table:
+.IP [1] 4
+If a shared mount is the only mount in its peer group,
+making it a slave automatically makes it private.
+.IP [2]
+Slaving a nonshared mount has no effect on the mount.
+.\"
+.SS Bind (MS_BIND) semantics
+Suppose that the following command is performed:
+.PP
+.in +4n
+.EX
+mount \-\-bind A/a B/b
+.EE
+.in
+.PP
+Here,
+.I A
+is the source mount,
+.I B
+is the destination mount,
+.I a
+is a subdirectory path under the mount point
+.IR A ,
+and
+.I b
+is a subdirectory path under the mount point
+.IR B .
+The propagation type of the resulting mount,
+.IR B/b ,
+depends on the propagation types of the mounts
+.I A
+and
+.IR B ,
+and is summarized in the following table.
+.PP
+.TS
+lb2 lb1 lb2 lb2 lb2 lb0
+lb2 lb1 lb2 lb2 lb2 lb0
+lb lb | l l l l l.
+ source(A)
+ shared private slave unbind
+_
+dest(B) shared shared shared slave+shared invalid
+ nonshared shared private slave invalid
+.TE
+.sp 1
+Note that a recursive bind of a subtree follows the same semantics
+as for a bind operation on each mount in the subtree.
+(Unbindable mounts are automatically pruned at the target mount point.)
+.PP
+For further details, see
+.I Documentation/filesystems/sharedsubtree.rst
+in the kernel source tree.
+.\"
+.SS Move (MS_MOVE) semantics
+Suppose that the following command is performed:
+.PP
+.in +4n
+.EX
+mount \-\-move A B/b
+.EE
+.in
+.PP
+Here,
+.I A
+is the source mount,
+.I B
+is the destination mount, and
+.I b
+is a subdirectory path under the mount point
+.IR B .
+The propagation type of the resulting mount,
+.IR B/b ,
+depends on the propagation types of the mounts
+.I A
+and
+.IR B ,
+and is summarized in the following table.
+.PP
+.TS
+lb2 lb1 lb2 lb2 lb2 lb0
+lb2 lb1 lb2 lb2 lb2 lb0
+lb lb | l l l l l.
+ source(A)
+ shared private slave unbind
+_
+dest(B) shared shared shared slave+shared invalid
+ nonshared shared private slave unbindable
+.TE
+.sp 1
+Note: moving a mount that resides under a shared mount is invalid.
+.PP
+For further details, see
+.I Documentation/filesystems/sharedsubtree.rst
+in the kernel source tree.
+.\"
+.SS Mount semantics
+Suppose that we use the following command to create a mount:
+.PP
+.in +4n
+.EX
+mount device B/b
+.EE
+.in
+.PP
+Here,
+.I B
+is the destination mount, and
+.I b
+is a subdirectory path under the mount point
+.IR B .
+The propagation type of the resulting mount,
+.IR B/b ,
+follows the same rules as for a bind mount,
+where the propagation type of the source mount
+is considered always to be private.
+.\"
+.SS Unmount semantics
+Suppose that we use the following command to tear down a mount:
+.PP
+.in +4n
+.EX
+umount A
+.EE
+.in
+.PP
+Here,
+.I A
+is a mount on
+.IR B/b ,
+where
+.I B
+is the parent mount and
+.I b
+is a subdirectory path under the mount point
+.IR B .
+If
+.B B
+is shared, then all most-recently-mounted mounts at
+.I b
+on mounts that receive propagation from mount
+.I B
+and do not have submounts under them are unmounted.
+.\"
+.SS The /proc/ pid /mountinfo "propagate_from" tag
+The
+.I propagate_from:X
+tag is shown in the optional fields of a
+.IR /proc/ pid /mountinfo
+record in cases where a process can't see a slave's immediate master
+(i.e., the pathname of the master is not reachable from
+the filesystem root directory)
+and so cannot determine the
+chain of propagation between the mounts it can see.
+.PP
+In the following example, we first create a two-link master-slave chain
+between the mounts
+.IR /mnt ,
+.IR /tmp/etc ,
+and
+.IR /mnt/tmp/etc .
+Then the
+.BR chroot (1)
+command is used to make the
+.I /tmp/etc
+mount point unreachable from the root directory,
+creating a situation where the master of
+.I /mnt/tmp/etc
+is not reachable from the (new) root directory of the process.
+.PP
+First, we bind mount the root directory onto
+.I /mnt
+and then bind mount
+.I /proc
+at
+.I /mnt/proc
+so that after the later
+.BR chroot (1)
+the
+.BR proc (5)
+filesystem remains visible at the correct location
+in the chroot-ed environment.
+.PP
+.in +4n
+.EX
+# \fBmkdir \-p /mnt/proc\fP
+# \fBmount \-\-bind / /mnt\fP
+# \fBmount \-\-bind /proc /mnt/proc\fP
+.EE
+.in
+.PP
+Next, we ensure that the
+.I /mnt
+mount is a shared mount in a new peer group (with no peers):
+.PP
+.in +4n
+.EX
+# \fBmount \-\-make\-private /mnt\fP # Isolate from any previous peer group
+# \fBmount \-\-make\-shared /mnt\fP
+# \fBcat /proc/self/mountinfo | grep \[aq]/mnt\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+239 61 8:2 / /mnt ... shared:102
+248 239 0:4 / /mnt/proc ... shared:5
+.EE
+.in
+.PP
+Next, we bind mount
+.I /mnt/etc
+onto
+.IR /tmp/etc :
+.PP
+.in +4n
+.EX
+# \fBmkdir \-p /tmp/etc\fP
+# \fBmount \-\-bind /mnt/etc /tmp/etc\fP
+# \fBcat /proc/self/mountinfo | egrep \[aq]/mnt|/tmp/\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+239 61 8:2 / /mnt ... shared:102
+248 239 0:4 / /mnt/proc ... shared:5
+267 40 8:2 /etc /tmp/etc ... shared:102
+.EE
+.in
+.PP
+Initially, these two mounts are in the same peer group,
+but we then make the
+.I /tmp/etc
+a slave of
+.IR /mnt/etc ,
+and then make
+.I /tmp/etc
+shared as well,
+so that it can propagate events to the next slave in the chain:
+.PP
+.in +4n
+.EX
+# \fBmount \-\-make\-slave /tmp/etc\fP
+# \fBmount \-\-make\-shared /tmp/etc\fP
+# \fBcat /proc/self/mountinfo | egrep \[aq]/mnt|/tmp/\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+239 61 8:2 / /mnt ... shared:102
+248 239 0:4 / /mnt/proc ... shared:5
+267 40 8:2 /etc /tmp/etc ... shared:105 master:102
+.EE
+.in
+.PP
+Then we bind mount
+.I /tmp/etc
+onto
+.IR /mnt/tmp/etc .
+Again, the two mounts are initially in the same peer group,
+but we then make
+.I /mnt/tmp/etc
+a slave of
+.IR /tmp/etc :
+.PP
+.in +4n
+.EX
+# \fBmkdir \-p /mnt/tmp/etc\fP
+# \fBmount \-\-bind /tmp/etc /mnt/tmp/etc\fP
+# \fBmount \-\-make\-slave /mnt/tmp/etc\fP
+# \fBcat /proc/self/mountinfo | egrep \[aq]/mnt|/tmp/\[aq] | sed \[aq]s/ \- .*//\[aq]\fP
+239 61 8:2 / /mnt ... shared:102
+248 239 0:4 / /mnt/proc ... shared:5
+267 40 8:2 /etc /tmp/etc ... shared:105 master:102
+273 239 8:2 /etc /mnt/tmp/etc ... master:105
+.EE
+.in
+.PP
+From the above, we see that
+.I /mnt
+is the master of the slave
+.IR /tmp/etc ,
+which in turn is the master of the slave
+.IR /mnt/tmp/etc .
+.PP
+We then
+.BR chroot (1)
+to the
+.I /mnt
+directory, which renders the mount with ID 267 unreachable
+from the (new) root directory:
+.PP
+.in +4n
+.EX
+# \fBchroot /mnt\fP
+.EE
+.in
+.PP
+When we examine the state of the mounts inside the chroot-ed environment,
+we see the following:
+.PP
+.in +4n
+.EX
+# \fBcat /proc/self/mountinfo | sed \[aq]s/ \- .*//\[aq]\fP
+239 61 8:2 / / ... shared:102
+248 239 0:4 / /proc ... shared:5
+273 239 8:2 /etc /tmp/etc ... master:105 propagate_from:102
+.EE
+.in
+.PP
+Above, we see that the mount with ID 273
+is a slave whose master is the peer group 105.
+The mount point for that master is unreachable, and so a
+.I propagate_from
+tag is displayed, indicating that the closest dominant peer group
+(i.e., the nearest reachable mount in the slave chain)
+is the peer group with the ID 102 (corresponding to the
+.I /mnt
+mount point before the
+.BR chroot (1)
+was performed).
+.\"
+.SH STANDARDS
+Linux.
+.SH HISTORY
+Linux 2.4.19.
+.\"
+.SH NOTES
+The propagation type assigned to a new mount depends
+on the propagation type of the parent mount.
+If the mount has a parent (i.e., it is a non-root mount
+point) and the propagation type of the parent is
+.BR MS_SHARED ,
+then the propagation type of the new mount is also
+.BR MS_SHARED .
+Otherwise, the propagation type of the new mount is
+.BR MS_PRIVATE .
+.PP
+Notwithstanding the fact that the default propagation type
+for new mount is in many cases
+.BR MS_PRIVATE ,
+.B MS_SHARED
+is typically more useful.
+For this reason,
+.BR systemd (1)
+automatically remounts all mounts as
+.B MS_SHARED
+on system startup.
+Thus, on most modern systems, the default propagation type is in practice
+.BR MS_SHARED .
+.PP
+Since, when one uses
+.BR unshare (1)
+to create a mount namespace,
+the goal is commonly to provide full isolation of the mounts
+in the new namespace,
+.BR unshare (1)
+(since
+.I util\-linux
+2.27) in turn reverses the step performed by
+.BR systemd (1),
+by making all mounts private in the new namespace.
+That is,
+.BR unshare (1)
+performs the equivalent of the following in the new mount namespace:
+.PP
+.in +4n
+.EX
+mount \-\-make\-rprivate /
+.EE
+.in
+.PP
+To prevent this, one can use the
+.I \-\-propagation\~unchanged
+option to
+.BR unshare (1).
+.PP
+An application that creates a new mount namespace directly using
+.BR clone (2)
+or
+.BR unshare (2)
+may desire to prevent propagation of mount events to other mount namespaces
+(as is done by
+.BR unshare (1)).
+This can be done by changing the propagation type of
+mounts in the new namespace to either
+.B MS_SLAVE
+or
+.BR MS_PRIVATE ,
+using a call such as the following:
+.PP
+.in +4n
+.EX
+mount(NULL, "/", MS_SLAVE | MS_REC, NULL);
+.EE
+.in
+.PP
+For a discussion of propagation types when moving mounts
+.RB ( MS_MOVE )
+and creating bind mounts
+.RB ( MS_BIND ),
+see
+.IR Documentation/filesystems/sharedsubtree.rst .
+.\"
+.\" ============================================================
+.\"
+.SS Restrictions on mount namespaces
+Note the following points with respect to mount namespaces:
+.IP [1] 4
+Each mount namespace has an owner user namespace.
+As explained above, when a new mount namespace is created,
+its mount list is initialized as a copy of the mount list
+of another mount namespace.
+If the new namespace and the namespace from which the mount list
+was copied are owned by different user namespaces,
+then the new mount namespace is considered
+.IR "less privileged" .
+.IP [2]
+When creating a less privileged mount namespace,
+shared mounts are reduced to slave mounts.
+This ensures that mappings performed in less
+privileged mount namespaces will not propagate to more privileged
+mount namespaces.
+.IP [3]
+Mounts that come as a single unit from a more privileged mount namespace are
+locked together and may not be separated in a less privileged mount
+namespace.
+(The
+.BR unshare (2)
+.B CLONE_NEWNS
+operation brings across all of the mounts from the original
+mount namespace as a single unit,
+and recursive mounts that propagate between
+mount namespaces propagate as a single unit.)
+.IP
+In this context, "may not be separated" means that the mounts
+are locked so that they may not be individually unmounted.
+Consider the following example:
+.IP
+.in +4n
+.EX
+$ \fBsudo sh\fP
+# \fBmount \-\-bind /dev/null /etc/shadow\fP
+# \fBcat /etc/shadow\fP # Produces no output
+.EE
+.in
+.IP
+The above steps, performed in a more privileged mount namespace,
+have created a bind mount that
+obscures the contents of the shadow password file,
+.IR /etc/shadow .
+For security reasons, it should not be possible to
+.BR umount (2)
+that mount in a less privileged mount namespace,
+since that would reveal the contents of
+.IR /etc/shadow .
+.IP
+Suppose we now create a new mount namespace
+owned by a new user namespace.
+The new mount namespace will inherit copies of all of the mounts
+from the previous mount namespace.
+However, those mounts will be locked because the new mount namespace
+is less privileged.
+Consequently, an attempt to
+.BR umount (2)
+the mount fails as show
+in the following step:
+.IP
+.in +4n
+.EX
+# \fBunshare \-\-user \-\-map\-root\-user \-\-mount \e\fP
+ \fBstrace \-o /tmp/log \e\fP
+ \fBumount /mnt/dir\fP
+umount: /etc/shadow: not mounted.
+# \fBgrep \[aq]\[ha]umount\[aq] /tmp/log\fP
+umount2("/etc/shadow", 0) = \-1 EINVAL (Invalid argument)
+.EE
+.in
+.IP
+The error message from
+.BR mount (8)
+is a little confusing, but the
+.BR strace (1)
+output reveals that the underlying
+.BR umount2 (2)
+system call failed with the error
+.BR EINVAL ,
+which is the error that the kernel returns to indicate that
+the mount is locked.
+.IP
+Note, however, that it is possible to stack (and unstack) a
+mount on top of one of the inherited locked mounts in a
+less privileged mount namespace:
+.IP
+.in +4n
+.EX
+# \fBecho \[aq]aaaaa\[aq] > /tmp/a\fP # File to mount onto /etc/shadow
+# \fBunshare \-\-user \-\-map\-root\-user \-\-mount \e\fP
+ \fBsh \-c \[aq]mount \-\-bind /tmp/a /etc/shadow; cat /etc/shadow\[aq]\fP
+aaaaa
+# \fBumount /etc/shadow\fP
+.EE
+.in
+.IP
+The final
+.BR umount (8)
+command above, which is performed in the initial mount namespace,
+makes the original
+.I /etc/shadow
+file once more visible in that namespace.
+.IP [4]
+Following on from point [3],
+note that it is possible to
+.BR umount (2)
+an entire subtree of mounts that
+propagated as a unit into a less privileged mount namespace,
+as illustrated in the following example.
+.IP
+First, we create new user and mount namespaces using
+.BR unshare (1).
+In the new mount namespace,
+the propagation type of all mounts is set to private.
+We then create a shared bind mount at
+.IR /mnt ,
+and a small hierarchy of mounts underneath that mount.
+.IP
+.in +4n
+.EX
+$ \fBPS1=\[aq]ns1# \[aq] sudo unshare \-\-user \-\-map\-root\-user \e\fP
+ \fB\-\-mount \-\-propagation private bash\fP
+ns1# \fBecho $$\fP # We need the PID of this shell later
+778501
+ns1# \fBmount \-\-make\-shared \-\-bind /mnt /mnt\fP
+ns1# \fBmkdir /mnt/x\fP
+ns1# \fBmount \-\-make\-private \-t tmpfs none /mnt/x\fP
+ns1# \fBmkdir /mnt/x/y\fP
+ns1# \fBmount \-\-make\-private \-t tmpfs none /mnt/x/y\fP
+ns1# \fBgrep /mnt /proc/self/mountinfo | sed \[aq]s/ \- .*//\[aq]\fP
+986 83 8:5 /mnt /mnt rw,relatime shared:344
+989 986 0:56 / /mnt/x rw,relatime
+990 989 0:57 / /mnt/x/y rw,relatime
+.EE
+.in
+.IP
+Continuing in the same shell session,
+we then create a second shell in a new user namespace and a new
+(less privileged) mount namespace and
+check the state of the propagated mounts rooted at
+.IR /mnt .
+.IP
+.in +4n
+.EX
+ns1# \fBPS1=\[aq]ns2# \[aq] unshare \-\-user \-\-map\-root\-user \e\fP
+ \fB\-\-mount \-\-propagation unchanged bash\fP
+ns2# \fBgrep /mnt /proc/self/mountinfo | sed \[aq]s/ \- .*//\[aq]\fP
+1239 1204 8:5 /mnt /mnt rw,relatime master:344
+1240 1239 0:56 / /mnt/x rw,relatime
+1241 1240 0:57 / /mnt/x/y rw,relatime
+.EE
+.in
+.IP
+Of note in the above output is that the propagation type of the mount
+.I /mnt
+has been reduced to slave, as explained in point [2].
+This means that submount events will propagate from the master
+.I /mnt
+in "ns1", but propagation will not occur in the opposite direction.
+.IP
+From a separate terminal window, we then use
+.BR nsenter (1)
+to enter the mount and user namespaces corresponding to "ns1".
+In that terminal window, we then recursively bind mount
+.I /mnt/x
+at the location
+.IR /mnt/ppp .
+.IP
+.in +4n
+.EX
+$ \fBPS1=\[aq]ns3# \[aq] sudo nsenter \-t 778501 \-\-user \-\-mount\fP
+ns3# \fBmount \-\-rbind \-\-make\-private /mnt/x /mnt/ppp\fP
+ns3# \fBgrep /mnt /proc/self/mountinfo | sed \[aq]s/ \- .*//\[aq]\fP
+986 83 8:5 /mnt /mnt rw,relatime shared:344
+989 986 0:56 / /mnt/x rw,relatime
+990 989 0:57 / /mnt/x/y rw,relatime
+1242 986 0:56 / /mnt/ppp rw,relatime
+1243 1242 0:57 / /mnt/ppp/y rw,relatime shared:518
+.EE
+.in
+.IP
+Because the propagation type of the parent mount,
+.IR /mnt ,
+was shared, the recursive bind mount propagated a small subtree of
+mounts under the slave mount
+.I /mnt
+into "ns2",
+as can be verified by executing the following command in that shell session:
+.IP
+.in +4n
+.EX
+ns2# \fBgrep /mnt /proc/self/mountinfo | sed \[aq]s/ \- .*//\[aq]\fP
+1239 1204 8:5 /mnt /mnt rw,relatime master:344
+1240 1239 0:56 / /mnt/x rw,relatime
+1241 1240 0:57 / /mnt/x/y rw,relatime
+1244 1239 0:56 / /mnt/ppp rw,relatime
+1245 1244 0:57 / /mnt/ppp/y rw,relatime master:518
+.EE
+.in
+.IP
+While it is not possible to
+.BR umount (2)
+a part of the propagated subtree
+.RI ( /mnt/ppp/y )
+in "ns2",
+it is possible to
+.BR umount (2)
+the entire subtree,
+as shown by the following commands:
+.IP
+.in +4n
+.EX
+ns2# \fBumount /mnt/ppp/y\fP
+umount: /mnt/ppp/y: not mounted.
+ns2# \fBumount \-l /mnt/ppp | sed \[aq]s/ \- .*//\[aq]\fP # Succeeds...
+ns2# \fBgrep /mnt /proc/self/mountinfo\fP
+1239 1204 8:5 /mnt /mnt rw,relatime master:344
+1240 1239 0:56 / /mnt/x rw,relatime
+1241 1240 0:57 / /mnt/x/y rw,relatime
+.EE
+.in
+.IP [5]
+The
+.BR mount (2)
+flags
+.BR MS_RDONLY ,
+.BR MS_NOSUID ,
+.BR MS_NOEXEC ,
+and the "atime" flags
+.RB ( MS_NOATIME ,
+.BR MS_NODIRATIME ,
+.BR MS_RELATIME )
+settings become locked
+.\" commit 9566d6742852c527bf5af38af5cbb878dad75705
+.\" Author: Eric W. Biederman <ebiederm@xmission.com>
+.\" Date: Mon Jul 28 17:26:07 2014 -0700
+.\"
+.\" mnt: Correct permission checks in do_remount
+.\"
+when propagated from a more privileged to
+a less privileged mount namespace,
+and may not be changed in the less privileged mount namespace.
+.IP
+This point is illustrated in the following example where,
+in a more privileged mount namespace,
+we create a bind mount that is marked as read-only.
+For security reasons,
+it should not be possible to make the mount writable in
+a less privileged mount namespace, and indeed the kernel prevents this:
+.IP
+.in +4n
+.EX
+$ \fBsudo mkdir /mnt/dir\fP
+$ \fBsudo mount \-\-bind \-o ro /some/path /mnt/dir\fP
+$ \fBsudo unshare \-\-user \-\-map\-root\-user \-\-mount \e\fP
+ \fBmount \-o remount,rw /mnt/dir\fP
+mount: /mnt/dir: permission denied.
+.EE
+.in
+.IP [6]
+.\" (As of 3.18-rc1 (in Al Viro's 2014-08-30 vfs.git#for-next tree))
+A file or directory that is a mount point in one namespace that is not
+a mount point in another namespace, may be renamed, unlinked, or removed
+.RB ( rmdir (2))
+in the mount namespace in which it is not a mount point
+(subject to the usual permission checks).
+Consequently, the mount point is removed in the mount namespace
+where it was a mount point.
+.IP
+Previously (before Linux 3.18),
+.\" mtk: The change was in Linux 3.18, I think, with this commit:
+.\" commit 8ed936b5671bfb33d89bc60bdcc7cf0470ba52fe
+.\" Author: Eric W. Biederman <ebiederman@twitter.com>
+.\" Date: Tue Oct 1 18:33:48 2013 -0700
+.\"
+.\" vfs: Lazily remove mounts on unlinked files and directories.
+attempting to unlink, rename, or remove a file or directory
+that was a mount point in another mount namespace would result in the error
+.BR EBUSY .
+That behavior had technical problems of enforcement (e.g., for NFS)
+and permitted denial-of-service attacks against more privileged users
+(i.e., preventing individual files from being updated
+by bind mounting on top of them).
+.SH EXAMPLES
+See
+.BR pivot_root (2).
+.SH SEE ALSO
+.BR unshare (1),
+.BR clone (2),
+.BR mount (2),
+.BR mount_setattr (2),
+.BR pivot_root (2),
+.BR setns (2),
+.BR umount (2),
+.BR unshare (2),
+.BR proc (5),
+.BR namespaces (7),
+.BR user_namespaces (7),
+.BR findmnt (8),
+.BR mount (8),
+.BR pam_namespace (8),
+.BR pivot_root (8),
+.BR umount (8)
+.PP
+.I Documentation/filesystems/sharedsubtree.rst
+in the kernel source tree.
diff --git a/man7/mq_overview.7 b/man7/mq_overview.7
new file mode 100644
index 0000000..b022ea0
--- /dev/null
+++ b/man7/mq_overview.7
@@ -0,0 +1,389 @@
+'\" t
+.\" Copyright (C) 2006 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH mq_overview 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+mq_overview \- overview of POSIX message queues
+.SH DESCRIPTION
+POSIX message queues allow processes to exchange data in
+the form of messages.
+This API is distinct from that provided by System V message queues
+.RB ( msgget (2),
+.BR msgsnd (2),
+.BR msgrcv (2),
+etc.), but provides similar functionality.
+.PP
+Message queues are created and opened using
+.BR mq_open (3);
+this function returns a
+.I message queue descriptor
+.RI ( mqd_t ),
+which is used to refer to the open message queue in later calls.
+Each message queue is identified by a name of the form
+.IR /somename ;
+that is, a null-terminated string of up to
+.B NAME_MAX
+(i.e., 255) characters consisting of an initial slash,
+followed by one or more characters, none of which are slashes.
+Two processes can operate on the same queue by passing the same name to
+.BR mq_open (3).
+.PP
+Messages are transferred to and from a queue using
+.BR mq_send (3)
+and
+.BR mq_receive (3).
+When a process has finished using the queue, it closes it using
+.BR mq_close (3),
+and when the queue is no longer required, it can be deleted using
+.BR mq_unlink (3).
+Queue attributes can be retrieved and (in some cases) modified using
+.BR mq_getattr (3)
+and
+.BR mq_setattr (3).
+A process can request asynchronous notification
+of the arrival of a message on a previously empty queue using
+.BR mq_notify (3).
+.PP
+A message queue descriptor is a reference to an
+.I "open message queue description"
+(see
+.BR open (2)).
+After a
+.BR fork (2),
+a child inherits copies of its parent's message queue descriptors,
+and these descriptors refer to the same open message queue descriptions
+as the corresponding message queue descriptors in the parent.
+Corresponding message queue descriptors in the two processes share the flags
+.RI ( mq_flags )
+that are associated with the open message queue description.
+.PP
+Each message has an associated
+.IR priority ,
+and messages are always delivered to the receiving process
+highest priority first.
+Message priorities range from 0 (low) to
+.I sysconf(_SC_MQ_PRIO_MAX)\ \-\ 1
+(high).
+On Linux,
+.I sysconf(_SC_MQ_PRIO_MAX)
+returns 32768, but POSIX.1 requires only that
+an implementation support at least priorities in the range 0 to 31;
+some implementations provide only this range.
+.PP
+The remainder of this section describes some specific details
+of the Linux implementation of POSIX message queues.
+.SS Library interfaces and system calls
+In most cases the
+.BR mq_* ()
+library interfaces listed above are implemented
+on top of underlying system calls of the same name.
+Deviations from this scheme are indicated in the following table:
+.RS
+.TS
+lB lB
+l l.
+Library interface System call
+mq_close(3) close(2)
+mq_getattr(3) mq_getsetattr(2)
+mq_notify(3) mq_notify(2)
+mq_open(3) mq_open(2)
+mq_receive(3) mq_timedreceive(2)
+mq_send(3) mq_timedsend(2)
+mq_setattr(3) mq_getsetattr(2)
+mq_timedreceive(3) mq_timedreceive(2)
+mq_timedsend(3) mq_timedsend(2)
+mq_unlink(3) mq_unlink(2)
+.TE
+.RE
+.SS Versions
+POSIX message queues have been supported since Linux 2.6.6.
+glibc support has been provided since glibc 2.3.4.
+.SS Kernel configuration
+Support for POSIX message queues is configurable via the
+.B CONFIG_POSIX_MQUEUE
+kernel configuration option.
+This option is enabled by default.
+.SS Persistence
+POSIX message queues have kernel persistence:
+if not removed by
+.BR mq_unlink (3),
+a message queue will exist until the system is shut down.
+.SS Linking
+Programs using the POSIX message queue API must be compiled with
+.I cc \-lrt
+to link against the real-time library,
+.IR librt .
+.SS /proc interfaces
+The following interfaces can be used to limit the amount of
+kernel memory consumed by POSIX message queues and to set
+the default attributes for new message queues:
+.TP
+.IR /proc/sys/fs/mqueue/msg_default " (since Linux 3.5)"
+This file defines the value used for a new queue's
+.I mq_maxmsg
+setting when the queue is created with a call to
+.BR mq_open (3)
+where
+.I attr
+is specified as NULL.
+The default value for this file is 10.
+The minimum and maximum are as for
+.IR /proc/sys/fs/mqueue/msg_max .
+A new queue's default
+.I mq_maxmsg
+value will be the smaller of
+.I msg_default
+and
+.IR msg_max .
+Before Linux 2.6.28, the default
+.I mq_maxmsg
+was 10;
+from Linux 2.6.28 to Linux 3.4, the default was the value defined for the
+.I msg_max
+limit.
+.TP
+.I /proc/sys/fs/mqueue/msg_max
+This file can be used to view and change the ceiling value for the
+maximum number of messages in a queue.
+This value acts as a ceiling on the
+.I attr\->mq_maxmsg
+argument given to
+.BR mq_open (3).
+The default value for
+.I msg_max
+is 10.
+The minimum value is 1 (10 before Linux 2.6.28).
+The upper limit is
+.BR HARD_MSGMAX .
+The
+.I msg_max
+limit is ignored for privileged processes
+.RB ( CAP_SYS_RESOURCE ),
+but the
+.B HARD_MSGMAX
+ceiling is nevertheless imposed.
+.IP
+The definition of
+.B HARD_MSGMAX
+has changed across kernel versions:
+.RS
+.IP \[bu] 3
+Up to Linux 2.6.32:
+.I 131072\~/\~sizeof(void\~*)
+.IP \[bu]
+Linux 2.6.33 to Linux 3.4:
+.I (32768\~*\~sizeof(void\~*) / 4)
+.IP \[bu]
+Since Linux 3.5:
+.\" commit 5b5c4d1a1440e94994c73dddbad7be0676cd8b9a
+65,536
+.RE
+.TP
+.IR /proc/sys/fs/mqueue/msgsize_default " (since Linux 3.5)"
+This file defines the value used for a new queue's
+.I mq_msgsize
+setting when the queue is created with a call to
+.BR mq_open (3)
+where
+.I attr
+is specified as NULL.
+The default value for this file is 8192 (bytes).
+The minimum and maximum are as for
+.IR /proc/sys/fs/mqueue/msgsize_max .
+If
+.I msgsize_default
+exceeds
+.IR msgsize_max ,
+a new queue's default
+.I mq_msgsize
+value is capped to the
+.I msgsize_max
+limit.
+Before Linux 2.6.28, the default
+.I mq_msgsize
+was 8192;
+from Linux 2.6.28 to Linux 3.4, the default was the value defined for the
+.I msgsize_max
+limit.
+.TP
+.I /proc/sys/fs/mqueue/msgsize_max
+This file can be used to view and change the ceiling on the
+maximum message size.
+This value acts as a ceiling on the
+.I attr\->mq_msgsize
+argument given to
+.BR mq_open (3).
+The default value for
+.I msgsize_max
+is 8192 bytes.
+The minimum value is 128 (8192 before Linux 2.6.28).
+The upper limit for
+.I msgsize_max
+has varied across kernel versions:
+.RS
+.IP \[bu] 3
+Before Linux 2.6.28, the upper limit is
+.BR INT_MAX .
+.IP \[bu]
+From Linux 2.6.28 to Linux 3.4, the limit is 1,048,576.
+.IP \[bu]
+Since Linux 3.5, the limit is 16,777,216
+.RB ( HARD_MSGSIZEMAX ).
+.RE
+.IP
+The
+.I msgsize_max
+limit is ignored for privileged process
+.RB ( CAP_SYS_RESOURCE ),
+but, since Linux 3.5, the
+.B HARD_MSGSIZEMAX
+ceiling is enforced for privileged processes.
+.TP
+.I /proc/sys/fs/mqueue/queues_max
+This file can be used to view and change the system-wide limit on the
+number of message queues that can be created.
+The default value for
+.I queues_max
+is 256.
+No ceiling is imposed on the
+.I queues_max
+limit; privileged processes
+.RB ( CAP_SYS_RESOURCE )
+can exceed the limit (but see BUGS).
+.SS Resource limit
+The
+.B RLIMIT_MSGQUEUE
+resource limit, which places a limit on the amount of space
+that can be consumed by all of the message queues
+belonging to a process's real user ID, is described in
+.BR getrlimit (2).
+.SS Mounting the message queue filesystem
+On Linux, message queues are created in a virtual filesystem.
+(Other implementations may also provide such a feature,
+but the details are likely to differ.)
+This filesystem can be mounted (by the superuser) using the following
+commands:
+.PP
+.in +4n
+.EX
+.RB "#" " mkdir /dev/mqueue"
+.RB "#" " mount \-t mqueue none /dev/mqueue"
+.EE
+.in
+.PP
+The sticky bit is automatically enabled on the mount directory.
+.PP
+After the filesystem has been mounted, the message queues on the system
+can be viewed and manipulated using the commands usually used for files
+(e.g.,
+.BR ls (1)
+and
+.BR rm (1)).
+.PP
+The contents of each file in the directory consist of a single line
+containing information about the queue:
+.PP
+.in +4n
+.EX
+.RB "$" " cat /dev/mqueue/mymq"
+QSIZE:129 NOTIFY:2 SIGNO:0 NOTIFY_PID:8260
+.EE
+.in
+.PP
+These fields are as follows:
+.TP
+.B QSIZE
+Number of bytes of data in all messages in the queue (but see BUGS).
+.TP
+.B NOTIFY_PID
+If this is nonzero, then the process with this PID has used
+.BR mq_notify (3)
+to register for asynchronous message notification,
+and the remaining fields describe how notification occurs.
+.TP
+.B NOTIFY
+Notification method:
+0 is
+.BR SIGEV_SIGNAL ;
+1 is
+.BR SIGEV_NONE ;
+and
+2 is
+.BR SIGEV_THREAD .
+.TP
+.B SIGNO
+Signal number to be used for
+.BR SIGEV_SIGNAL .
+.SS Linux implementation of message queue descriptors
+On Linux, a message queue descriptor is actually a file descriptor.
+(POSIX does not require such an implementation.)
+This means that a message queue descriptor can be monitored using
+.BR select (2),
+.BR poll (2),
+or
+.BR epoll (7).
+This is not portable.
+.PP
+The close-on-exec flag (see
+.BR open (2))
+is automatically set on the file descriptor returned by
+.BR mq_open (2).
+.SS IPC namespaces
+For a discussion of the interaction of POSIX message queue objects and
+IPC namespaces, see
+.BR ipc_namespaces (7).
+.SH NOTES
+System V message queues
+.RB ( msgget (2),
+.BR msgsnd (2),
+.BR msgrcv (2),
+etc.) are an older API for exchanging messages between processes.
+POSIX message queues provide a better designed interface than
+System V message queues;
+on the other hand POSIX message queues are less widely available
+(especially on older systems) than System V message queues.
+.PP
+Linux does not currently (Linux 2.6.26) support the use of access control
+lists (ACLs) for POSIX message queues.
+.SH BUGS
+Since Linux 3.5 to Linux 3.14, the kernel imposed a ceiling of 1024
+.RB ( HARD_QUEUESMAX )
+on the value to which the
+.I queues_max
+limit could be raised,
+and the ceiling was enforced even for privileged processes.
+This ceiling value was removed in Linux 3.14,
+and patches to stable Linux 3.5.x to Linux 3.13.x also removed the ceiling.
+.PP
+As originally implemented (and documented),
+the QSIZE field displayed the total number of (user-supplied)
+bytes in all messages in the message queue.
+Some changes in Linux 3.5
+.\" commit d6629859b36d
+inadvertently changed the behavior,
+so that this field also included a count of kernel overhead bytes
+used to store the messages in the queue.
+This behavioral regression was rectified in Linux 4.2
+.\" commit de54b9ac253787c366bbfb28d901a31954eb3511
+(and earlier stable kernel series),
+so that the count once more included just the bytes of user data
+in messages in the queue.
+.SH EXAMPLES
+An example of the use of various message queue functions is shown in
+.BR mq_notify (3).
+.SH SEE ALSO
+.BR getrlimit (2),
+.BR mq_getsetattr (2),
+.BR poll (2),
+.BR select (2),
+.BR mq_close (3),
+.BR mq_getattr (3),
+.BR mq_notify (3),
+.BR mq_open (3),
+.BR mq_receive (3),
+.BR mq_send (3),
+.BR mq_unlink (3),
+.BR epoll (7),
+.BR namespaces (7)
diff --git a/man7/namespaces.7 b/man7/namespaces.7
new file mode 100644
index 0000000..6ff11af
--- /dev/null
+++ b/man7/namespaces.7
@@ -0,0 +1,417 @@
+'\" t
+.\" Copyright (c) 2013, 2016, 2017 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" and Copyright (c) 2012 by Eric W. Biederman <ebiederm@xmission.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH namespaces 7 2023-07-20 "Linux man-pages 6.05.01"
+.SH NAME
+namespaces \- overview of Linux namespaces
+.SH DESCRIPTION
+A namespace wraps a global system resource in an abstraction that
+makes it appear to the processes within the namespace that they
+have their own isolated instance of the global resource.
+Changes to the global resource are visible to other processes
+that are members of the namespace, but are invisible to other processes.
+One use of namespaces is to implement containers.
+.PP
+This page provides pointers to information on the various namespace types,
+describes the associated
+.I /proc
+files, and summarizes the APIs for working with namespaces.
+.\"
+.SS Namespace types
+The following table shows the namespace types available on Linux.
+The second column of the table shows the flag value that is used to specify
+the namespace type in various APIs.
+The third column identifies the manual page that provides details
+on the namespace type.
+The last column is a summary of the resources that are isolated by
+the namespace type.
+.TS
+lB lB lB lB
+l1 lB1 l1 l.
+Namespace Flag Page Isolates
+Cgroup CLONE_NEWCGROUP \fBcgroup_namespaces\fP(7) T{
+Cgroup root directory
+T}
+IPC CLONE_NEWIPC \fBipc_namespaces\fP(7) T{
+System V IPC,
+POSIX message queues
+T}
+Network CLONE_NEWNET \fBnetwork_namespaces\fP(7) T{
+Network devices,
+stacks, ports, etc.
+T}
+Mount CLONE_NEWNS \fBmount_namespaces\fP(7) Mount points
+PID CLONE_NEWPID \fBpid_namespaces\fP(7) Process IDs
+Time CLONE_NEWTIME \fBtime_namespaces\fP(7) T{
+Boot and monotonic
+clocks
+T}
+User CLONE_NEWUSER \fBuser_namespaces\fP(7) T{
+User and group IDs
+T}
+UTS CLONE_NEWUTS \fButs_namespaces\fP(7) T{
+Hostname and NIS
+domain name
+T}
+.TE
+.\"
+.\" ==================== The namespaces API ====================
+.\"
+.SS The namespaces API
+As well as various
+.I /proc
+files described below,
+the namespaces API includes the following system calls:
+.TP
+.BR clone (2)
+The
+.BR clone (2)
+system call creates a new process.
+If the
+.I flags
+argument of the call specifies one or more of the
+.B CLONE_NEW*
+flags listed above, then new namespaces are created for each flag,
+and the child process is made a member of those namespaces.
+(This system call also implements a number of features
+unrelated to namespaces.)
+.TP
+.BR setns (2)
+The
+.BR setns (2)
+system call allows the calling process to join an existing namespace.
+The namespace to join is specified via a file descriptor that refers to
+one of the
+.IR /proc/ pid /ns
+files described below.
+.TP
+.BR unshare (2)
+The
+.BR unshare (2)
+system call moves the calling process to a new namespace.
+If the
+.I flags
+argument of the call specifies one or more of the
+.B CLONE_NEW*
+flags listed above, then new namespaces are created for each flag,
+and the calling process is made a member of those namespaces.
+(This system call also implements a number of features
+unrelated to namespaces.)
+.TP
+.BR ioctl (2)
+Various
+.BR ioctl (2)
+operations can be used to discover information about namespaces.
+These operations are described in
+.BR ioctl_ns (2).
+.PP
+Creation of new namespaces using
+.BR clone (2)
+and
+.BR unshare (2)
+in most cases requires the
+.B CAP_SYS_ADMIN
+capability, since, in the new namespace,
+the creator will have the power to change global resources
+that are visible to other processes that are subsequently created in,
+or join the namespace.
+User namespaces are the exception: since Linux 3.8,
+no privilege is required to create a user namespace.
+.\"
+.\" ==================== The /proc/[pid]/ns/ directory ====================
+.\"
+.SS The \fI/proc/\fPpid\fI/ns/\fP directory
+Each process has a
+.IR /proc/ pid /ns/
+.\" See commit 6b4e306aa3dc94a0545eb9279475b1ab6209a31f
+subdirectory containing one entry for each namespace that
+supports being manipulated by
+.BR setns (2):
+.PP
+.in +4n
+.EX
+$ \fBls \-l /proc/$$/ns | awk \[aq]{print $1, $9, $10, $11}\[aq]\fP
+total 0
+lrwxrwxrwx. cgroup \-> cgroup:[4026531835]
+lrwxrwxrwx. ipc \-> ipc:[4026531839]
+lrwxrwxrwx. mnt \-> mnt:[4026531840]
+lrwxrwxrwx. net \-> net:[4026531969]
+lrwxrwxrwx. pid \-> pid:[4026531836]
+lrwxrwxrwx. pid_for_children \-> pid:[4026531834]
+lrwxrwxrwx. time \-> time:[4026531834]
+lrwxrwxrwx. time_for_children \-> time:[4026531834]
+lrwxrwxrwx. user \-> user:[4026531837]
+lrwxrwxrwx. uts \-> uts:[4026531838]
+.EE
+.in
+.PP
+Bind mounting (see
+.BR mount (2))
+one of the files in this directory
+to somewhere else in the filesystem keeps
+the corresponding namespace of the process specified by
+.I pid
+alive even if all processes currently in the namespace terminate.
+.PP
+Opening one of the files in this directory
+(or a file that is bind mounted to one of these files)
+returns a file handle for
+the corresponding namespace of the process specified by
+.IR pid .
+As long as this file descriptor remains open,
+the namespace will remain alive,
+even if all processes in the namespace terminate.
+The file descriptor can be passed to
+.BR setns (2).
+.PP
+In Linux 3.7 and earlier, these files were visible as hard links.
+Since Linux 3.8,
+.\" commit bf056bfa80596a5d14b26b17276a56a0dcb080e5
+they appear as symbolic links.
+If two processes are in the same namespace,
+then the device IDs and inode numbers of their
+.IR /proc/ pid /ns/ xxx
+symbolic links will be the same; an application can check this using the
+.I stat.st_dev
+.\" Eric Biederman: "I reserve the right for st_dev to be significant
+.\" when comparing namespaces."
+.\" https://lore.kernel.org/lkml/87poky5ca9.fsf@xmission.com/
+.\" Re: Documenting the ioctl interfaces to discover relationships...
+.\" Date: Mon, 12 Dec 2016 11:30:38 +1300
+and
+.I stat.st_ino
+fields returned by
+.BR stat (2).
+The content of this symbolic link is a string containing
+the namespace type and inode number as in the following example:
+.PP
+.in +4n
+.EX
+$ \fBreadlink /proc/$$/ns/uts\fP
+uts:[4026531838]
+.EE
+.in
+.PP
+The symbolic links in this subdirectory are as follows:
+.TP
+.IR /proc/ pid /ns/cgroup " (since Linux 4.6)"
+This file is a handle for the cgroup namespace of the process.
+.TP
+.IR /proc/ pid /ns/ipc " (since Linux 3.0)"
+This file is a handle for the IPC namespace of the process.
+.TP
+.IR /proc/ pid /ns/mnt " (since Linux 3.8)"
+.\" commit 8823c079ba7136dc1948d6f6dcb5f8022bde438e
+This file is a handle for the mount namespace of the process.
+.TP
+.IR /proc/ pid /ns/net " (since Linux 3.0)"
+This file is a handle for the network namespace of the process.
+.TP
+.IR /proc/ pid /ns/pid " (since Linux 3.8)"
+.\" commit 57e8391d327609cbf12d843259c968b9e5c1838f
+This file is a handle for the PID namespace of the process.
+This handle is permanent for the lifetime of the process
+(i.e., a process's PID namespace membership never changes).
+.TP
+.IR /proc/ pid /ns/pid_for_children " (since Linux 4.12)"
+.\" commit eaa0d190bfe1ed891b814a52712dcd852554cb08
+This file is a handle for the PID namespace of
+child processes created by this process.
+This can change as a consequence of calls to
+.BR unshare (2)
+and
+.BR setns (2)
+(see
+.BR pid_namespaces (7)),
+so the file may differ from
+.IR /proc/ pid /ns/pid .
+The symbolic link gains a value only after the first child process
+is created in the namespace.
+(Beforehand,
+.BR readlink (2)
+of the symbolic link will return an empty buffer.)
+.TP
+.IR /proc/ pid /ns/time " (since Linux 5.6)"
+This file is a handle for the time namespace of the process.
+.TP
+.IR /proc/ pid /ns/time_for_children " (since Linux 5.6)"
+This file is a handle for the time namespace of
+child processes created by this process.
+This can change as a consequence of calls to
+.BR unshare (2)
+and
+.BR setns (2)
+(see
+.BR time_namespaces (7)),
+so the file may differ from
+.IR /proc/ pid /ns/time .
+.TP
+.IR /proc/ pid /ns/user " (since Linux 3.8)"
+.\" commit cde1975bc242f3e1072bde623ef378e547b73f91
+This file is a handle for the user namespace of the process.
+.TP
+.IR /proc/ pid /ns/uts " (since Linux 3.0)"
+This file is a handle for the UTS namespace of the process.
+.PP
+Permission to dereference or read
+.RB ( readlink (2))
+these symbolic links is governed by a ptrace access mode
+.B PTRACE_MODE_READ_FSCREDS
+check; see
+.BR ptrace (2).
+.\"
+.\" ==================== The /proc/sys/user directory ====================
+.\"
+.SS The \fI/proc/sys/user\fP directory
+The files in the
+.I /proc/sys/user
+directory (which is present since Linux 4.9) expose limits
+on the number of namespaces of various types that can be created.
+The files are as follows:
+.TP
+.I max_cgroup_namespaces
+The value in this file defines a per-user limit on the number of
+cgroup namespaces that may be created in the user namespace.
+.TP
+.I max_ipc_namespaces
+The value in this file defines a per-user limit on the number of
+ipc namespaces that may be created in the user namespace.
+.TP
+.I max_mnt_namespaces
+The value in this file defines a per-user limit on the number of
+mount namespaces that may be created in the user namespace.
+.TP
+.I max_net_namespaces
+The value in this file defines a per-user limit on the number of
+network namespaces that may be created in the user namespace.
+.TP
+.I max_pid_namespaces
+The value in this file defines a per-user limit on the number of
+PID namespaces that may be created in the user namespace.
+.TP
+.IR max_time_namespaces " (since Linux 5.7)"
+.\" commit eeec26d5da8248ea4e240b8795bb4364213d3247
+The value in this file defines a per-user limit on the number of
+time namespaces that may be created in the user namespace.
+.TP
+.I max_user_namespaces
+The value in this file defines a per-user limit on the number of
+user namespaces that may be created in the user namespace.
+.TP
+.I max_uts_namespaces
+The value in this file defines a per-user limit on the number of
+uts namespaces that may be created in the user namespace.
+.PP
+Note the following details about these files:
+.IP \[bu] 3
+The values in these files are modifiable by privileged processes.
+.IP \[bu]
+The values exposed by these files are the limits for the user namespace
+in which the opening process resides.
+.IP \[bu]
+The limits are per-user.
+Each user in the same user namespace
+can create namespaces up to the defined limit.
+.IP \[bu]
+The limits apply to all users, including UID 0.
+.IP \[bu]
+These limits apply in addition to any other per-namespace
+limits (such as those for PID and user namespaces) that may be enforced.
+.IP \[bu]
+Upon encountering these limits,
+.BR clone (2)
+and
+.BR unshare (2)
+fail with the error
+.BR ENOSPC .
+.IP \[bu]
+For the initial user namespace,
+the default value in each of these files is half the limit on the number
+of threads that may be created
+.RI ( /proc/sys/kernel/threads\-max ).
+In all descendant user namespaces, the default value in each file is
+.BR MAXINT .
+.IP \[bu]
+When a namespace is created, the object is also accounted
+against ancestor namespaces.
+More precisely:
+.RS
+.IP \[bu] 3
+Each user namespace has a creator UID.
+.IP \[bu]
+When a namespace is created,
+it is accounted against the creator UIDs in each of the
+ancestor user namespaces,
+and the kernel ensures that the corresponding namespace limit
+for the creator UID in the ancestor namespace is not exceeded.
+.IP \[bu]
+The aforementioned point ensures that creating a new user namespace
+cannot be used as a means to escape the limits in force
+in the current user namespace.
+.RE
+.\"
+.SS Namespace lifetime
+Absent any other factors,
+a namespace is automatically torn down when the last process in
+the namespace terminates or leaves the namespace.
+However, there are a number of other factors that may pin
+a namespace into existence even though it has no member processes.
+These factors include the following:
+.IP \[bu] 3
+An open file descriptor or a bind mount exists for the corresponding
+.IR /proc/ pid /ns/*
+file.
+.IP \[bu]
+The namespace is hierarchical (i.e., a PID or user namespace),
+and has a child namespace.
+.IP \[bu]
+It is a user namespace that owns one or more nonuser namespaces.
+.IP \[bu]
+It is a PID namespace,
+and there is a process that refers to the namespace via a
+.IR /proc/ pid /ns/pid_for_children
+symbolic link.
+.IP \[bu]
+It is a time namespace,
+and there is a process that refers to the namespace via a
+.IR /proc/ pid /ns/time_for_children
+symbolic link.
+.IP \[bu]
+It is an IPC namespace, and a corresponding mount of an
+.I mqueue
+filesystem (see
+.BR mq_overview (7))
+refers to this namespace.
+.IP \[bu]
+It is a PID namespace, and a corresponding mount of a
+.BR proc (5)
+filesystem refers to this namespace.
+.SH EXAMPLES
+See
+.BR clone (2)
+and
+.BR user_namespaces (7).
+.SH SEE ALSO
+.BR nsenter (1),
+.BR readlink (1),
+.BR unshare (1),
+.BR clone (2),
+.BR ioctl_ns (2),
+.BR setns (2),
+.BR unshare (2),
+.BR proc (5),
+.BR capabilities (7),
+.BR cgroup_namespaces (7),
+.BR cgroups (7),
+.BR credentials (7),
+.BR ipc_namespaces (7),
+.BR network_namespaces (7),
+.BR pid_namespaces (7),
+.BR user_namespaces (7),
+.BR uts_namespaces (7),
+.BR lsns (8),
+.BR switch_root (8)
diff --git a/man7/netdevice.7 b/man7/netdevice.7
new file mode 100644
index 0000000..a0f0049
--- /dev/null
+++ b/man7/netdevice.7
@@ -0,0 +1,421 @@
+'\" t
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\"
+.\" $Id: netdevice.7,v 1.10 2000/08/17 10:09:54 ak Exp $
+.\"
+.\" Modified, 2004-11-25, mtk, formatting and a few wording fixes
+.\"
+.\" Modified, 2011-11-02, <bidulock@openss7.org>, added many basic
+.\" but missing ioctls, such as SIOCGIFADDR.
+.\"
+.TH netdevice 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+netdevice \- low-level access to Linux network devices
+.SH SYNOPSIS
+.nf
+.B "#include <sys/ioctl.h>"
+.B "#include <net/if.h>"
+.fi
+.SH DESCRIPTION
+This man page describes the sockets interface which is used to configure
+network devices.
+.PP
+Linux supports some standard ioctls to configure network devices.
+They can be used on any socket's file descriptor regardless of the
+family or type.
+Most of them pass an
+.I ifreq
+structure:
+.PP
+.in +4n
+.EX
+struct ifreq {
+ char ifr_name[IFNAMSIZ]; /* Interface name */
+ union {
+ struct sockaddr ifr_addr;
+ struct sockaddr ifr_dstaddr;
+ struct sockaddr ifr_broadaddr;
+ struct sockaddr ifr_netmask;
+ struct sockaddr ifr_hwaddr;
+ short ifr_flags;
+ int ifr_ifindex;
+ int ifr_metric;
+ int ifr_mtu;
+ struct ifmap ifr_map;
+ char ifr_slave[IFNAMSIZ];
+ char ifr_newname[IFNAMSIZ];
+ char *ifr_data;
+ };
+};
+.EE
+.in
+.PP
+.B AF_INET6
+is an exception.
+It passes an
+.I in6_ifreq
+structure:
+.PP
+.in +4n
+.EX
+struct in6_ifreq {
+ struct in6_addr ifr6_addr;
+ u32 ifr6_prefixlen;
+ int ifr6_ifindex; /* Interface index */
+};
+.EE
+.in
+.PP
+Normally, the user specifies which device to affect by setting
+.I ifr_name
+to the name of the interface or
+.I ifr6_ifindex
+to the index of the interface.
+All other members of the structure may
+share memory.
+.SS Ioctls
+If an ioctl is marked as privileged, then using it requires an effective
+user ID of 0 or the
+.B CAP_NET_ADMIN
+capability.
+If this is not the case,
+.B EPERM
+will be returned.
+.TP
+.B SIOCGIFNAME
+Given the
+.IR ifr_ifindex ,
+return the name of the interface in
+.IR ifr_name .
+This is the only ioctl which returns its result in
+.IR ifr_name .
+.TP
+.B SIOCGIFINDEX
+Retrieve the interface index of the interface into
+.IR ifr_ifindex .
+.TP
+.BR SIOCGIFFLAGS ", " SIOCSIFFLAGS
+Get or set the active flag word of the device.
+.I ifr_flags
+contains a bit mask of the following values:
+.\" Do not right adjust text blocks in tables
+.na
+.TS
+tab(:);
+c s
+l l.
+Device flags
+IFF_UP:Interface is running.
+IFF_BROADCAST:Valid broadcast address set.
+IFF_DEBUG:Internal debugging flag.
+IFF_LOOPBACK:Interface is a loopback interface.
+IFF_POINTOPOINT:Interface is a point-to-point link.
+IFF_RUNNING:Resources allocated.
+IFF_NOARP:T{
+No arp protocol, L2 destination address not set.
+T}
+IFF_PROMISC:Interface is in promiscuous mode.
+IFF_NOTRAILERS:Avoid use of trailers.
+IFF_ALLMULTI:Receive all multicast packets.
+IFF_MASTER:Master of a load balancing bundle.
+IFF_SLAVE:Slave of a load balancing bundle.
+IFF_MULTICAST:Supports multicast
+IFF_PORTSEL:Is able to select media type via ifmap.
+IFF_AUTOMEDIA:Auto media selection active.
+IFF_DYNAMIC:T{
+The addresses are lost when the interface goes down.
+T}
+IFF_LOWER_UP:Driver signals L1 up (since Linux 2.6.17)
+IFF_DORMANT:Driver signals dormant (since Linux 2.6.17)
+IFF_ECHO:Echo sent packets (since Linux 2.6.25)
+.TE
+.ad
+.PP
+Setting the active flag word is a privileged operation, but any
+process may read it.
+.TP
+.BR SIOCGIFPFLAGS ", " SIOCSIFPFLAGS
+Get or set extended (private) flags for the device.
+.I ifr_flags
+contains a bit mask of the following values:
+.TS
+tab(:);
+c s
+l l.
+Private flags
+IFF_802_1Q_VLAN:Interface is 802.1Q VLAN device.
+IFF_EBRIDGE:Interface is Ethernet bridging device.
+IFF_SLAVE_INACTIVE:Interface is inactive bonding slave.
+IFF_MASTER_8023AD:Interface is 802.3ad bonding master.
+IFF_MASTER_ALB:Interface is balanced-alb bonding master.
+IFF_BONDING:Interface is a bonding master or slave.
+IFF_SLAVE_NEEDARP:Interface needs ARPs for validation.
+IFF_ISATAP:Interface is RFC4214 ISATAP interface.
+.TE
+.PP
+Setting the extended (private) interface flags is a privileged operation.
+.TP
+.BR SIOCGIFADDR ", " SIOCSIFADDR ", " SIOCDIFADDR
+Get, set, or delete the address of the device using
+.IR ifr_addr ,
+or
+.I ifr6_addr
+with
+.IR ifr6_prefixlen .
+Setting or deleting the interface address is a privileged operation.
+For compatibility,
+.B SIOCGIFADDR
+returns only
+.B AF_INET
+addresses,
+.B SIOCSIFADDR
+accepts
+.B AF_INET
+and
+.B AF_INET6
+addresses, and
+.B SIOCDIFADDR
+deletes only
+.B AF_INET6
+addresses.
+A
+.B AF_INET
+address can be deleted by setting it to zero via
+.BR SIOCSIFADDR .
+.TP
+.BR SIOCGIFDSTADDR ", " SIOCSIFDSTADDR
+Get or set the destination address of a point-to-point device using
+.IR ifr_dstaddr .
+For compatibility, only
+.B AF_INET
+addresses are accepted or returned.
+Setting the destination address is a privileged operation.
+.TP
+.BR SIOCGIFBRDADDR ", " SIOCSIFBRDADDR
+Get or set the broadcast address for a device using
+.IR ifr_brdaddr .
+For compatibility, only
+.B AF_INET
+addresses are accepted or returned.
+Setting the broadcast address is a privileged operation.
+.TP
+.BR SIOCGIFNETMASK ", " SIOCSIFNETMASK
+Get or set the network mask for a device using
+.IR ifr_netmask .
+For compatibility, only
+.B AF_INET
+addresses are accepted or returned.
+Setting the network mask is a privileged operation.
+.TP
+.BR SIOCGIFMETRIC ", " SIOCSIFMETRIC
+Get or set the metric of the device using
+.IR ifr_metric .
+This is currently not implemented; it sets
+.I ifr_metric
+to 0 if you attempt to read it and returns
+.B EOPNOTSUPP
+if you attempt to set it.
+.TP
+.BR SIOCGIFMTU ", " SIOCSIFMTU
+Get or set the MTU (Maximum Transfer Unit) of a device using
+.IR ifr_mtu .
+Setting the MTU is a privileged operation.
+Setting the MTU to
+too small values may cause kernel crashes.
+.TP
+.BR SIOCGIFHWADDR ", " SIOCSIFHWADDR
+Get or set the hardware address of a device using
+.IR ifr_hwaddr .
+The hardware address is specified in a struct
+.IR sockaddr .
+.I sa_family
+contains the ARPHRD_* device type,
+.I sa_data
+the L2 hardware address starting from byte 0.
+Setting the hardware address is a privileged operation.
+.TP
+.B SIOCSIFHWBROADCAST
+Set the hardware broadcast address of a device from
+.IR ifr_hwaddr .
+This is a privileged operation.
+.TP
+.BR SIOCGIFMAP ", " SIOCSIFMAP
+Get or set the interface's hardware parameters using
+.IR ifr_map .
+Setting the parameters is a privileged operation.
+.IP
+.in +4n
+.EX
+struct ifmap {
+ unsigned long mem_start;
+ unsigned long mem_end;
+ unsigned short base_addr;
+ unsigned char irq;
+ unsigned char dma;
+ unsigned char port;
+};
+.EE
+.in
+.IP
+The interpretation of the ifmap structure depends on the device driver
+and the architecture.
+.TP
+.BR SIOCADDMULTI ", " SIOCDELMULTI
+Add an address to or delete an address from the device's link layer
+multicast filters using
+.IR ifr_hwaddr .
+These are privileged operations.
+See also
+.BR packet (7)
+for an alternative.
+.TP
+.BR SIOCGIFTXQLEN ", " SIOCSIFTXQLEN
+Get or set the transmit queue length of a device using
+.IR ifr_qlen .
+Setting the transmit queue length is a privileged operation.
+.TP
+.B SIOCSIFNAME
+Changes the name of the interface specified in
+.I ifr_name
+to
+.IR ifr_newname .
+This is a privileged operation.
+It is allowed only when the interface
+is not up.
+.TP
+.B SIOCGIFCONF
+Return a list of interface (network layer) addresses.
+This currently
+means only addresses of the
+.B AF_INET
+(IPv4) family for compatibility.
+Unlike the others, this ioctl passes an
+.I ifconf
+structure:
+.IP
+.in +4n
+.EX
+struct ifconf {
+ int ifc_len; /* size of buffer */
+ union {
+ char *ifc_buf; /* buffer address */
+ struct ifreq *ifc_req; /* array of structures */
+ };
+};
+.EE
+.in
+.IP
+If
+.I ifc_req
+is NULL,
+.B SIOCGIFCONF
+returns the necessary buffer size in bytes
+for receiving all available addresses in
+.IR ifc_len .
+Otherwise,
+.I ifc_req
+contains a pointer to an array of
+.I ifreq
+structures to be filled with all currently active L3 interface addresses.
+.I ifc_len
+contains the size of the array in bytes.
+Within each
+.I ifreq
+structure,
+.I ifr_name
+will receive the interface name, and
+.I ifr_addr
+the address.
+The actual number of bytes transferred is returned in
+.IR ifc_len .
+.IP
+If the size specified by
+.I ifc_len
+is insufficient to store all the addresses,
+the kernel will skip the exceeding ones and return success.
+There is no reliable way of detecting this condition once it has occurred.
+It is therefore recommended to either determine the necessary buffer size
+beforehand by calling
+.B SIOCGIFCONF
+with
+.I ifc_req
+set to NULL, or to retry the call with a bigger buffer whenever
+.I ifc_len
+upon return differs by less than
+.I sizeof(struct ifreq)
+from its original value.
+.IP
+If an error occurs accessing the
+.I ifconf
+or
+.I ifreq
+structures,
+.B EFAULT
+will be returned.
+.\" Slaving isn't supported in Linux 2.2
+.\" .
+.\" .TP
+.\" .BR SIOCGIFSLAVE ", " SIOCSIFSLAVE
+.\" Get or set the slave device using
+.\" .IR ifr_slave .
+.\" Setting the slave device is a privileged operation.
+.\" .PP
+.\" FIXME . add amateur radio stuff.
+.PP
+Most protocols support their own ioctls to configure protocol-specific
+interface options.
+See the protocol man pages for a description.
+For configuring IP addresses, see
+.BR ip (7).
+.PP
+In addition, some devices support private ioctls.
+These are not described here.
+.SH NOTES
+.B SIOCGIFCONF
+and the other ioctls that accept or return only
+.B AF_INET
+socket addresses
+are IP-specific and perhaps should rather be documented in
+.BR ip (7).
+.PP
+The names of interfaces with no addresses or that don't have the
+.B IFF_RUNNING
+flag set can be found via
+.IR /proc/net/dev .
+.PP
+.B AF_INET6
+IPv6 addresses can be read from
+.I /proc/net/if_inet6
+or via
+.BR rtnetlink (7).
+Adding a new IPv6 address and deleting an existing IPv6 address
+can be done via
+.B SIOCSIFADDR
+and
+.B SIOCDIFADDR
+or via
+.BR rtnetlink (7).
+Retrieving or changing destination IPv6 addresses of a point-to-point
+interface is possible only via
+.BR rtnetlink (7).
+.SH BUGS
+glibc 2.1 is missing the
+.I ifr_newname
+macro in
+.IR <net/if.h> .
+Add the following to your program as a workaround:
+.PP
+.in +4n
+.EX
+#ifndef ifr_newname
+#define ifr_newname ifr_ifru.ifru_slave
+#endif
+.EE
+.in
+.SH SEE ALSO
+.BR proc (5),
+.BR capabilities (7),
+.BR ip (7),
+.BR rtnetlink (7)
diff --git a/man7/netlink.7 b/man7/netlink.7
new file mode 100644
index 0000000..4d6cdbc
--- /dev/null
+++ b/man7/netlink.7
@@ -0,0 +1,609 @@
+'\" t
+.\" This man page is Copyright (c) 1998 by Andi Kleen.
+.\"
+.\" SPDX-License-Identifier: GPL-1.0-or-later
+.\"
+.\" Based on the original comments from Alexey Kuznetsov
+.\" Modified 2005-12-27 by Hasso Tepper <hasso@estpak.ee>
+.\" $Id: netlink.7,v 1.8 2000/06/22 13:23:00 ak Exp $
+.TH netlink 7 2023-07-30 "Linux man-pages 6.05.01"
+.SH NAME
+netlink \- communication between kernel and user space (AF_NETLINK)
+.SH SYNOPSIS
+.nf
+.B #include <asm/types.h>
+.B #include <sys/socket.h>
+.B #include <linux/netlink.h>
+.PP
+.BI "netlink_socket = socket(AF_NETLINK, " socket_type ", " netlink_family );
+.fi
+.SH DESCRIPTION
+Netlink is used to transfer information between the kernel and
+user-space processes.
+It consists of a standard sockets-based interface for user space
+processes and an internal kernel API for kernel modules.
+The internal kernel interface is not documented in this manual page.
+There is also an obsolete netlink interface
+via netlink character devices; this interface is not documented here
+and is provided only for backward compatibility.
+.PP
+Netlink is a datagram-oriented service.
+Both
+.B SOCK_RAW
+and
+.B SOCK_DGRAM
+are valid values for
+.IR socket_type .
+However, the netlink protocol does not distinguish between datagram
+and raw sockets.
+.PP
+.I netlink_family
+selects the kernel module or netlink group to communicate with.
+The currently assigned netlink families are:
+.TP
+.B NETLINK_ROUTE
+Receives routing and link updates and may be used to modify the routing
+tables (both IPv4 and IPv6), IP addresses, link parameters,
+neighbor setups, queueing disciplines, traffic classes, and
+packet classifiers (see
+.BR rtnetlink (7)).
+.TP
+.BR NETLINK_W1 " (Linux 2.6.13 to Linux 2.16.17)"
+Messages from 1-wire subsystem.
+.TP
+.B NETLINK_USERSOCK
+Reserved for user-mode socket protocols.
+.TP
+.BR NETLINK_FIREWALL " (up to and including Linux 3.4)"
+.\" removed by commit d16cf20e2f2f13411eece7f7fb72c17d141c4a84
+Transport IPv4 packets from netfilter to user space.
+Used by
+.I ip_queue
+kernel module.
+After a long period of being declared obsolete (in favor of the more advanced
+.I nfnetlink_queue
+feature),
+.B NETLINK_FIREWALL
+was removed in Linux 3.5.
+.TP
+.BR NETLINK_SOCK_DIAG " (since Linux 3.3)"
+.\" commit 7f1fb60c4fc9fb29fbb406ac8c4cfb4e59e168d6
+Query information about sockets of various protocol families from the kernel
+(see
+.BR sock_diag (7)).
+.TP
+.BR NETLINK_INET_DIAG " (since Linux 2.6.14)"
+An obsolete synonym for
+.BR NETLINK_SOCK_DIAG .
+.TP
+.BR NETLINK_NFLOG " (up to and including Linux 3.16)"
+Netfilter/iptables ULOG.
+.TP
+.B NETLINK_XFRM
+.\" FIXME More details on NETLINK_XFRM needed.
+IPsec.
+.TP
+.BR NETLINK_SELINUX " (since Linux 2.6.4)"
+SELinux event notifications.
+.TP
+.BR NETLINK_ISCSI " (since Linux 2.6.15)"
+.\" FIXME More details on NETLINK_ISCSI needed.
+Open-iSCSI.
+.TP
+.BR NETLINK_AUDIT " (since Linux 2.6.6)"
+.\" FIXME More details on NETLINK_AUDIT needed.
+Auditing.
+.TP
+.BR NETLINK_FIB_LOOKUP " (since Linux 2.6.13)"
+.\" FIXME More details on NETLINK_FIB_LOOKUP needed.
+Access to FIB lookup from user space.
+.TP
+.BR NETLINK_CONNECTOR " (since Linux 2.6.14)"
+Kernel connector.
+See
+.I Documentation/driver\-api/connector.rst
+(or
+.I /Documentation/connector/connector.*
+.\" commit baa293e9544bea71361950d071579f0e4d5713ed
+in Linux 5.2 and earlier)
+in the Linux kernel source tree for further information.
+.TP
+.BR NETLINK_NETFILTER " (since Linux 2.6.14)"
+.\" FIXME More details on NETLINK_NETFILTER needed.
+Netfilter subsystem.
+.TP
+.BR NETLINK_SCSITRANSPORT " (since Linux 2.6.19)"
+.\" commit 84314fd4740ad73550c76dee4a9578979d84af48
+.\" FIXME More details on NETLINK_SCSITRANSPORT needed.
+SCSI Transports.
+.TP
+.BR NETLINK_RDMA " (since Linux 3.0)"
+.\" commit b2cbae2c248776d81cc265ff7d48405b6a4cc463
+.\" FIXME More details on NETLINK_RDMA needed.
+Infiniband RDMA.
+.TP
+.BR NETLINK_IP6_FW " (up to and including Linux 3.4)"
+Transport IPv6 packets from netfilter to user space.
+Used by
+.I ip6_queue
+kernel module.
+.TP
+.B NETLINK_DNRTMSG
+DECnet routing messages.
+.TP
+.BR NETLINK_KOBJECT_UEVENT " (since Linux 2.6.10)"
+.\" FIXME More details on NETLINK_KOBJECT_UEVENT needed.
+Kernel messages to user space.
+.TP
+.BR NETLINK_GENERIC " (since Linux 2.6.15)"
+Generic netlink family for simplified netlink usage.
+.TP
+.BR NETLINK_CRYPTO " (since Linux 3.2)"
+.\" commit a38f7907b926e4c6c7d389ad96cc38cec2e5a9e9
+.\" Author: Steffen Klassert <steffen.klassert@secunet.com>
+Netlink interface to request information about ciphers registered
+with the kernel crypto API as well as allow configuration of the
+kernel crypto API.
+.PP
+Netlink messages consist of a byte stream with one or multiple
+.I nlmsghdr
+headers and associated payload.
+The byte stream should be accessed only with the standard
+.B NLMSG_*
+macros.
+See
+.BR netlink (3)
+for further information.
+.PP
+In multipart messages (multiple
+.I nlmsghdr
+headers with associated payload in one byte stream) the first and all
+following headers have the
+.B NLM_F_MULTI
+flag set, except for the last header which has the type
+.BR NLMSG_DONE .
+.PP
+After each
+.I nlmsghdr
+the payload follows.
+.PP
+.in +4n
+.EX
+struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including header */
+ __u16 nlmsg_type; /* Type of message content */
+ __u16 nlmsg_flags; /* Additional flags */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Sender port ID */
+};
+.EE
+.in
+.PP
+.I nlmsg_type
+can be one of the standard message types:
+.B NLMSG_NOOP
+message is to be ignored,
+.B NLMSG_ERROR
+message signals an error and the payload contains an
+.I nlmsgerr
+structure,
+.B NLMSG_DONE
+message terminates a multipart message.
+Error messages get the
+original request appended, unless the user requests to cap the
+error message, and get extra error data if requested.
+.PP
+.in +4n
+.EX
+struct nlmsgerr {
+ int error; /* Negative errno or 0 for acknowledgements */
+ struct nlmsghdr msg; /* Message header that caused the error */
+ /*
+ * followed by the message contents
+ * unless NETLINK_CAP_ACK was set
+ * or the ACK indicates success (error == 0).
+ * For example Generic Netlink message with attributes.
+ * message length is aligned with NLMSG_ALIGN()
+ */
+ /*
+ * followed by TLVs defined in enum nlmsgerr_attrs
+ * if NETLINK_EXT_ACK was set
+ */
+};
+.EE
+.in
+.PP
+A netlink family usually specifies more message types, see the
+appropriate manual pages for that, for example,
+.BR rtnetlink (7)
+for
+.BR NETLINK_ROUTE .
+.TS
+tab(:);
+l s
+lB lx.
+Standard flag bits in \fInlmsg_flags\fP
+_
+NLM_F_REQUEST:T{
+Must be set on all request messages.
+T}
+NLM_F_MULTI:T{
+The message is part of a multipart message terminated by
+.BR NLMSG_DONE .
+T}
+NLM_F_ACK:T{
+Request for an acknowledgement on success.
+T}
+NLM_F_ECHO:T{
+Echo this request.
+T}
+.TE
+.\" No right adjustment for text blocks in tables
+.TS
+tab(:);
+l s
+lB lx.
+Additional flag bits for GET requests
+_
+NLM_F_ROOT:T{
+Return the complete table instead of a single entry.
+T}
+NLM_F_MATCH:T{
+Return all entries matching criteria passed in message content.
+Not implemented yet.
+T}
+NLM_F_ATOMIC:T{
+Return an atomic snapshot of the table.
+T}
+NLM_F_DUMP:T{
+Convenience macro; equivalent to
+(NLM_F_ROOT|NLM_F_MATCH).
+T}
+.TE
+.\" FIXME NLM_F_ATOMIC is not used anymore?
+.PP
+Note that
+.B NLM_F_ATOMIC
+requires the
+.B CAP_NET_ADMIN
+capability or an effective UID of 0.
+.TS
+tab(:);
+l s
+lB lx.
+Additional flag bits for NEW requests
+_
+NLM_F_REPLACE:T{
+Replace existing matching object.
+T}
+NLM_F_EXCL:T{
+Don't replace if the object already exists.
+T}
+NLM_F_CREATE:T{
+Create object if it doesn't already exist.
+T}
+NLM_F_APPEND:T{
+Add to the end of the object list.
+T}
+.TE
+.PP
+.I nlmsg_seq
+and
+.I nlmsg_pid
+are used to track messages.
+.I nlmsg_pid
+shows the origin of the message.
+Note that there isn't a 1:1 relationship between
+.I nlmsg_pid
+and the PID of the process if the message originated from a netlink
+socket.
+See the
+.B ADDRESS FORMATS
+section for further information.
+.PP
+Both
+.I nlmsg_seq
+and
+.I nlmsg_pid
+.\" FIXME Explain more about nlmsg_seq and nlmsg_pid.
+are opaque to netlink core.
+.PP
+Netlink is not a reliable protocol.
+It tries its best to deliver a message to its destination(s),
+but may drop messages when an out-of-memory condition or
+other error occurs.
+For reliable transfer the sender can request an
+acknowledgement from the receiver by setting the
+.B NLM_F_ACK
+flag.
+An acknowledgement is an
+.B NLMSG_ERROR
+packet with the error field set to 0.
+The application must generate acknowledgements for
+received messages itself.
+The kernel tries to send an
+.B NLMSG_ERROR
+message for every failed packet.
+A user process should follow this convention too.
+.PP
+However, reliable transmissions from kernel to user are impossible
+in any case.
+The kernel can't send a netlink message if the socket buffer is full:
+the message will be dropped and the kernel and the user-space process will
+no longer have the same view of kernel state.
+It is up to the application to detect when this happens (via the
+.B ENOBUFS
+error returned by
+.BR recvmsg (2))
+and resynchronize.
+.SS Address formats
+The
+.I sockaddr_nl
+structure describes a netlink client in user space or in the kernel.
+A
+.I sockaddr_nl
+can be either unicast (only sent to one peer) or sent to
+netlink multicast groups
+.RI ( nl_groups
+not equal 0).
+.PP
+.in +4n
+.EX
+struct sockaddr_nl {
+ sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* Zero */
+ pid_t nl_pid; /* Port ID */
+ __u32 nl_groups; /* Multicast groups mask */
+};
+.EE
+.in
+.PP
+.I nl_pid
+is the unicast address of netlink socket.
+It's always 0 if the destination is in the kernel.
+For a user-space process,
+.I nl_pid
+is usually the PID of the process owning the destination socket.
+However,
+.I nl_pid
+identifies a netlink socket, not a process.
+If a process owns several netlink
+sockets, then
+.I nl_pid
+can be equal to the process ID only for at most one socket.
+There are two ways to assign
+.I nl_pid
+to a netlink socket.
+If the application sets
+.I nl_pid
+before calling
+.BR bind (2),
+then it is up to the application to make sure that
+.I nl_pid
+is unique.
+If the application sets it to 0, the kernel takes care of assigning it.
+The kernel assigns the process ID to the first netlink socket the process
+opens and assigns a unique
+.I nl_pid
+to every netlink socket that the process subsequently creates.
+.PP
+.I nl_groups
+is a bit mask with every bit representing a netlink group number.
+Each netlink family has a set of 32 multicast groups.
+When
+.BR bind (2)
+is called on the socket, the
+.I nl_groups
+field in the
+.I sockaddr_nl
+should be set to a bit mask of the groups which it wishes to listen to.
+The default value for this field is zero which means that no multicasts
+will be received.
+A socket may multicast messages to any of the multicast groups by setting
+.I nl_groups
+to a bit mask of the groups it wishes to send to when it calls
+.BR sendmsg (2)
+or does a
+.BR connect (2).
+Only processes with an effective UID of 0 or the
+.B CAP_NET_ADMIN
+capability may send or listen to a netlink multicast group.
+Since Linux 2.6.13,
+.\" commit d629b836d151d43332492651dd841d32e57ebe3b
+messages can't be broadcast to multiple groups.
+Any replies to a message received for a multicast group should be
+sent back to the sending PID and the multicast group.
+Some Linux kernel subsystems may additionally allow other users
+to send and/or receive messages.
+As at Linux 3.0, the
+.BR NETLINK_KOBJECT_UEVENT ,
+.BR NETLINK_GENERIC ,
+.BR NETLINK_ROUTE ,
+and
+.B NETLINK_SELINUX
+groups allow other users to receive messages.
+No groups allow other users to send messages.
+.SS Socket options
+To set or get a netlink socket option, call
+.BR getsockopt (2)
+to read or
+.BR setsockopt (2)
+to write the option with the option level argument set to
+.BR SOL_NETLINK .
+Unless otherwise noted,
+.I optval
+is a pointer to an
+.IR int .
+.TP
+.BR NETLINK_PKTINFO " (since Linux 2.6.14)"
+.\" commit 9a4595bc7e67962f13232ee55a64e063062c3a99
+.\" Author: Patrick McHardy <kaber@trash.net>
+Enable
+.B nl_pktinfo
+control messages for received packets to get the extended
+destination group number.
+.TP
+.BR NETLINK_ADD_MEMBERSHIP ,\ NETLINK_DROP_MEMBERSHIP " (since Linux 2.6.14)"
+.\" commit 9a4595bc7e67962f13232ee55a64e063062c3a99
+.\" Author: Patrick McHardy <kaber@trash.net>
+Join/leave a group specified by
+.IR optval .
+.TP
+.BR NETLINK_LIST_MEMBERSHIPS " (since Linux 4.2)"
+.\" commit b42be38b2778eda2237fc759e55e3b698b05b315
+.\" Author: David Herrmann <dh.herrmann@gmail.com>
+Retrieve all groups a socket is a member of.
+.I optval
+is a pointer to
+.B __u32
+and
+.I optlen
+is the size of the array.
+The array is filled with the full membership set of the
+socket, and the required array size is returned in
+.IR optlen .
+.TP
+.BR NETLINK_BROADCAST_ERROR " (since Linux 2.6.30)"
+.\" commit be0c22a46cfb79ab2342bb28fde99afa94ef868e
+.\" Author: Pablo Neira Ayuso <pablo@netfilter.org>
+When not set,
+.B netlink_broadcast()
+only reports
+.B ESRCH
+errors and silently ignore
+.B ENOBUFS
+errors.
+.TP
+.BR NETLINK_NO_ENOBUFS " (since Linux 2.6.30)"
+.\" commit 38938bfe3489394e2eed5e40c9bb8f66a2ce1405
+.\" Author: Pablo Neira Ayuso <pablo@netfilter.org>
+This flag can be used by unicast and broadcast listeners to avoid receiving
+.B ENOBUFS
+errors.
+.TP
+.BR NETLINK_LISTEN_ALL_NSID " (since Linux 4.2)"
+.\" commit 59324cf35aba5336b611074028777838a963d03b
+.\" Author: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+When set, this socket will receive netlink notifications from
+all network namespaces that have an
+.I nsid
+assigned into the network namespace where the socket has been opened.
+The
+.I nsid
+is sent to user space via an ancillary data.
+.TP
+.BR NETLINK_CAP_ACK " (since Linux 4.3)"
+.\" commit 0a6a3a23ea6efde079a5b77688541a98bf202721
+.\" Author: Christophe Ricard <christophe.ricard@gmail.com>
+The kernel may fail to allocate the necessary room for the acknowledgement
+message back to user space.
+This option trims off the payload of the original netlink message.
+The netlink message header is still included, so the user can guess from the
+sequence number which message triggered the acknowledgement.
+.SH VERSIONS
+The socket interface to netlink first appeared Linux 2.2.
+.PP
+Linux 2.0 supported a more primitive device-based netlink interface
+(which is still available as a compatibility option).
+This obsolete interface is not described here.
+.SH NOTES
+It is often better to use netlink via
+.I libnetlink
+or
+.I libnl
+than via the low-level kernel interface.
+.SH BUGS
+This manual page is not complete.
+.SH EXAMPLES
+The following example creates a
+.B NETLINK_ROUTE
+netlink socket which will listen to the
+.B RTMGRP_LINK
+(network interface create/delete/up/down events) and
+.B RTMGRP_IPV4_IFADDR
+(IPv4 addresses add/delete events) multicast groups.
+.PP
+.in +4n
+.EX
+struct sockaddr_nl sa;
+\&
+memset(&sa, 0, sizeof(sa));
+sa.nl_family = AF_NETLINK;
+sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR;
+\&
+fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+bind(fd, (struct sockaddr *) &sa, sizeof(sa));
+.EE
+.in
+.PP
+The next example demonstrates how to send a netlink message to the
+kernel (pid 0).
+Note that the application must take care of message sequence numbers
+in order to reliably track acknowledgements.
+.PP
+.in +4n
+.EX
+struct nlmsghdr *nh; /* The nlmsghdr with payload to send */
+struct sockaddr_nl sa;
+struct iovec iov = { nh, nh\->nlmsg_len };
+struct msghdr msg;
+\&
+msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
+memset(&sa, 0, sizeof(sa));
+sa.nl_family = AF_NETLINK;
+nh\->nlmsg_pid = 0;
+nh\->nlmsg_seq = ++sequence_number;
+/* Request an ack from kernel by setting NLM_F_ACK */
+nh\->nlmsg_flags |= NLM_F_ACK;
+\&
+sendmsg(fd, &msg, 0);
+.EE
+.in
+.PP
+And the last example is about reading netlink message.
+.PP
+.in +4n
+.EX
+int len;
+/* 8192 to avoid message truncation on platforms with
+ page size > 4096 */
+struct nlmsghdr buf[8192/sizeof(struct nlmsghdr)];
+struct iovec iov = { buf, sizeof(buf) };
+struct sockaddr_nl sa;
+struct msghdr msg;
+struct nlmsghdr *nh;
+\&
+msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
+len = recvmsg(fd, &msg, 0);
+\&
+for (nh = (struct nlmsghdr *) buf; NLMSG_OK (nh, len);
+ nh = NLMSG_NEXT (nh, len)) {
+ /* The end of multipart message */
+ if (nh\->nlmsg_type == NLMSG_DONE)
+ return;
+\&
+ if (nh\->nlmsg_type == NLMSG_ERROR)
+ /* Do some error handling */
+ ...
+\&
+ /* Continue with parsing payload */
+ ...
+}
+.EE
+.in
+.SH SEE ALSO
+.BR cmsg (3),
+.BR netlink (3),
+.BR capabilities (7),
+.BR rtnetlink (7),
+.BR sock_diag (7)
+.PP
+.UR ftp://ftp.inr.ac.ru\:/ip\-routing\:/iproute2*
+information about libnetlink
+.UE
+.PP
+.UR http://www.infradead.org\:/\[ti]tgr\:/libnl/
+information about libnl
+.UE
+.PP
+RFC 3549 "Linux Netlink as an IP Services Protocol"
diff --git a/man7/network_namespaces.7 b/man7/network_namespaces.7
new file mode 100644
index 0000000..a9e6306
--- /dev/null
+++ b/man7/network_namespaces.7
@@ -0,0 +1,62 @@
+.\" Copyright (c) 2017 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH network_namespaces 7 2023-03-12 "Linux man-pages 6.05.01"
+.SH NAME
+network_namespaces \- overview of Linux network namespaces
+.SH DESCRIPTION
+Network namespaces provide isolation of the system resources associated
+with networking: network devices, IPv4 and IPv6 protocol stacks,
+IP routing tables, firewall rules, the
+.I /proc/net
+directory (which is a symbolic link to
+.IR /proc/ pid /net ),
+the
+.I /sys/class/net
+directory, various files under
+.IR /proc/sys/net ,
+port numbers (sockets), and so on.
+In addition,
+network namespaces isolate the UNIX domain abstract socket namespace (see
+.BR unix (7)).
+.PP
+A physical network device can live in exactly one
+network namespace.
+When a network namespace is freed
+(i.e., when the last process in the namespace terminates),
+its physical network devices are moved back to the
+initial network namespace
+(not to the namespace of the parent of the process).
+.PP
+A virtual network
+.RB ( veth (4))
+device pair provides a pipe-like abstraction
+that can be used to create tunnels between network namespaces,
+and can be used to create a bridge to a physical network device
+in another namespace.
+When a namespace is freed, the
+.BR veth (4)
+devices that it contains are destroyed.
+.PP
+Use of network namespaces requires a kernel that is configured with the
+.B CONFIG_NET_NS
+option.
+.\" FIXME .SH EXAMPLES
+.SH SEE ALSO
+.BR nsenter (1),
+.BR unshare (1),
+.BR clone (2),
+.BR veth (4),
+.BR proc (5),
+.BR sysfs (5),
+.BR namespaces (7),
+.BR user_namespaces (7),
+.BR brctl (8),
+.BR ip (8),
+.BR ip\-address (8),
+.BR ip\-link (8),
+.BR ip\-netns (8),
+.BR iptables (8),
+.BR ovs\-vsctl (8)
diff --git a/man7/nptl.7 b/man7/nptl.7
new file mode 100644
index 0000000..a00c845
--- /dev/null
+++ b/man7/nptl.7
@@ -0,0 +1,112 @@
+.\" Copyright (c) 2015 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH nptl 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+nptl \- Native POSIX Threads Library
+.SH DESCRIPTION
+NPTL (Native POSIX Threads Library)
+is the GNU C library POSIX threads implementation that is used on modern
+Linux systems.
+.\"
+.SS NPTL and signals
+NPTL makes internal use of the first two real-time signals
+(signal numbers 32 and 33).
+One of these signals is used to support thread cancelation and POSIX timers
+(see
+.BR timer_create (2));
+the other is used as part of a mechanism that ensures all threads in
+a process always have the same UIDs and GIDs, as required by POSIX.
+These signals cannot be used in applications.
+.PP
+To prevent accidental use of these signals in applications,
+which might interfere with the operation of the NPTL implementation,
+various glibc library functions and system call wrapper functions
+attempt to hide these signals from applications,
+as follows:
+.IP \[bu] 3
+.B SIGRTMIN
+is defined with the value 34 (rather than 32).
+.IP \[bu]
+The
+.BR sigwaitinfo (2),
+.BR sigtimedwait (2),
+and
+.BR sigwait (3)
+interfaces silently ignore requests to wait for these two signals
+if they are specified in the signal set argument of these calls.
+.IP \[bu]
+The
+.BR sigprocmask (2)
+and
+.BR pthread_sigmask (3)
+interfaces silently ignore attempts to block these two signals.
+.IP \[bu]
+The
+.BR sigaction (2),
+.BR pthread_kill (3),
+and
+.BR pthread_sigqueue (3)
+interfaces fail with the error
+.B EINVAL
+(indicating an invalid signal number) if these signals are specified.
+.IP \[bu]
+.BR sigfillset (3)
+does not include these two signals when it creates a full signal set.
+.\"
+.SS NPTL and process credential changes
+At the Linux kernel level,
+credentials (user and group IDs) are a per-thread attribute.
+However, POSIX requires that all of the POSIX threads in a process
+have the same credentials.
+To accommodate this requirement,
+the NPTL implementation wraps all of the system calls that
+change process credentials with functions that,
+in addition to invoking the underlying system call,
+arrange for all other threads in the process to also change their credentials.
+.PP
+The implementation of each of these system calls involves the use of
+a real-time signal that is sent (using
+.BR tgkill (2))
+to each of the other threads that must change its credentials.
+Before sending these signals, the thread that is changing credentials
+saves the new credential(s) and records the system call being employed
+in a global buffer.
+A signal handler in the receiving thread(s) fetches this information and
+then uses the same system call to change its credentials.
+.PP
+Wrapper functions employing this technique are provided for
+.BR setgid (2),
+.BR setuid (2),
+.BR setegid (2),
+.BR seteuid (2),
+.BR setregid (2),
+.BR setreuid (2),
+.BR setresgid (2),
+.BR setresuid (2),
+and
+.BR setgroups (2).
+.\" FIXME .
+.\" Maybe say something about vfork() not being serialized wrt set*id() APIs?
+.\" https://sourceware.org/bugzilla/show_bug.cgi?id=14749
+.SH STANDARDS
+For details of the conformance of NPTL to the POSIX standard, see
+.BR pthreads (7).
+.SH NOTES
+POSIX says
+.\" See POSIX.1-2008 specification of pthread_mutexattr_init()
+that any thread in any process with access to the memory
+containing a process-shared
+.RB ( PTHREAD_PROCESS_SHARED )
+mutex can operate on that mutex.
+However, on 64-bit x86 systems, the mutex definition for x86-64
+is incompatible with the mutex definition for i386,
+.\" See sysdeps/x86/bits/pthreadtypes.h
+meaning that 32-bit and 64-bit binaries can't share mutexes on x86-64 systems.
+.SH SEE ALSO
+.BR credentials (7),
+.BR pthreads (7),
+.BR signal (7),
+.BR standards (7)
diff --git a/man7/numa.7 b/man7/numa.7
new file mode 100644
index 0000000..9ac5097
--- /dev/null
+++ b/man7/numa.7
@@ -0,0 +1,170 @@
+.\" Copyright (c) 2008, Linux Foundation, written by Michael Kerrisk
+.\" <mtk.manpages@gmail.com>
+.\" and Copyright 2003,2004 Andi Kleen, SuSE Labs.
+.\" numa_maps material Copyright (c) 2005 Silicon Graphics Incorporated.
+.\" Christoph Lameter, <cl@linux-foundation.org>.
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH numa 7 2023-04-03 "Linux man-pages 6.05.01"
+.SH NAME
+numa \- overview of Non-Uniform Memory Architecture
+.SH DESCRIPTION
+Non-Uniform Memory Access (NUMA) refers to multiprocessor systems
+whose memory is divided into multiple memory nodes.
+The access time of a memory node depends on
+the relative locations of the accessing CPU and the accessed node.
+(This contrasts with a symmetric multiprocessor system,
+where the access time for all of the memory is the same for all CPUs.)
+Normally, each CPU on a NUMA system has a local memory node whose
+contents can be accessed faster than the memory in
+the node local to another CPU
+or the memory on a bus shared by all CPUs.
+.SS NUMA system calls
+The Linux kernel implements the following NUMA-related system calls:
+.BR get_mempolicy (2),
+.BR mbind (2),
+.BR migrate_pages (2),
+.BR move_pages (2),
+and
+.BR set_mempolicy (2).
+However, applications should normally use the interface provided by
+.IR libnuma ;
+see "Library Support" below.
+.SS \fI/proc/\fPpid\fI/numa_maps\fP (since Linux 2.6.14)
+.\" See also Changelog-2.6.14
+This file displays information about a process's
+NUMA memory policy and allocation.
+.PP
+Each line contains information about a memory range used by the process,
+displaying\[em]among other information\[em]the effective memory policy for
+that memory range and on which nodes the pages have been allocated.
+.PP
+.I numa_maps
+is a read-only file.
+When
+.IR /proc/ pid /numa_maps
+is read, the kernel will scan the virtual address space of the
+process and report how memory is used.
+One line is displayed for each unique memory range of the process.
+.PP
+The first field of each line shows the starting address of the memory range.
+This field allows a correlation with the contents of the
+.IR /proc/ pid /maps
+file,
+which contains the end address of the range and other information,
+such as the access permissions and sharing.
+.PP
+The second field shows the memory policy currently in effect for the
+memory range.
+Note that the effective policy is not necessarily the policy
+installed by the process for that memory range.
+Specifically, if the process installed a "default" policy for that range,
+the effective policy for that range will be the process policy,
+which may or may not be "default".
+.PP
+The rest of the line contains information about the pages allocated in
+the memory range, as follows:
+.TP
+.I N<node>=<nr_pages>
+The number of pages allocated on
+.IR <node> .
+.I <nr_pages>
+includes only pages currently mapped by the process.
+Page migration and memory reclaim may have temporarily unmapped pages
+associated with this memory range.
+These pages may show up again only after the process has
+attempted to reference them.
+If the memory range represents a shared memory area or file mapping,
+other processes may currently have additional pages mapped in a
+corresponding memory range.
+.TP
+.I file=<filename>
+The file backing the memory range.
+If the file is mapped as private, write accesses may have generated
+COW (Copy-On-Write) pages in this memory range.
+These pages are displayed as anonymous pages.
+.TP
+.I heap
+Memory range is used for the heap.
+.TP
+.I stack
+Memory range is used for the stack.
+.TP
+.I huge
+Huge memory range.
+The page counts shown are huge pages and not regular sized pages.
+.TP
+.I anon=<pages>
+The number of anonymous page in the range.
+.TP
+.I dirty=<pages>
+Number of dirty pages.
+.TP
+.I mapped=<pages>
+Total number of mapped pages, if different from
+.I dirty
+and
+.I anon
+pages.
+.TP
+.I mapmax=<count>
+Maximum mapcount (number of processes mapping a single page) encountered
+during the scan.
+This may be used as an indicator of the degree of sharing occurring in a
+given memory range.
+.TP
+.I swapcache=<count>
+Number of pages that have an associated entry on a swap device.
+.TP
+.I active=<pages>
+The number of pages on the active list.
+This field is shown only if different from the number of pages in this range.
+This means that some inactive pages exist in the memory range that may be
+removed from memory by the swapper soon.
+.TP
+.I writeback=<pages>
+Number of pages that are currently being written out to disk.
+.SH STANDARDS
+None.
+.SH NOTES
+The Linux NUMA system calls and
+.I /proc
+interface are available only
+if the kernel was configured and built with the
+.B CONFIG_NUMA
+option.
+.SS Library support
+Link with \fI\-lnuma\fP
+to get the system call definitions.
+.I libnuma
+and the required
+.I <numaif.h>
+header are available in the
+.I numactl
+package.
+.PP
+However, applications should not use these system calls directly.
+Instead, the higher level interface provided by the
+.BR numa (3)
+functions in the
+.I numactl
+package is recommended.
+The
+.I numactl
+package is available at
+.UR ftp://oss.sgi.com\:/www\:/projects\:/libnuma\:/download/
+.UE .
+The package is also included in some Linux distributions.
+Some distributions include the development library and header
+in the separate
+.I numactl\-devel
+package.
+.SH SEE ALSO
+.BR get_mempolicy (2),
+.BR mbind (2),
+.BR move_pages (2),
+.BR set_mempolicy (2),
+.BR numa (3),
+.BR cpuset (7),
+.BR numactl (8)
diff --git a/man7/operator.7 b/man7/operator.7
new file mode 100644
index 0000000..ec4652f
--- /dev/null
+++ b/man7/operator.7
@@ -0,0 +1,54 @@
+'\" t
+.\" Copyright (c) 1989, 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" SPDX-License-Identifier: BSD-3-Clause
+.\"
+.\" @(#)operator.7 8.1 (Berkeley) 6/9/93
+.\"
+.\" Copied shamelessly from FreeBSD with minor changes. 2003-05-21
+.\" Brian M. Carlson <sandals@crustytoothpaste.ath.cx>
+.\"
+.\" Restored automatic formatting from FreeBSD. 2003-08-24
+.\" Martin Schulze <joey@infodrom.org>
+.\"
+.\" 2007-12-08, mtk, Converted from mdoc to man macros
+.\"
+.TH operator 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+operator \- C operator precedence and order of evaluation
+.SH DESCRIPTION
+This manual page lists C operators and their precedence in evaluation.
+.PP
+.TS
+lb lb lb
+l l l.
+Operator Associativity Notes
+[] () . \-> ++ \-\- left to right [1]
+++ \-\- & * + \- \[ti] ! sizeof right to left [2]
+(type) right to left
+* / % left to right
++ \- left to right
+<< >> left to right
+< > <= >= left to right
+== != left to right
+& left to right
+\[ha] left to right
+| left to right
+&& left to right
+|| left to right
+?: right to left
+= *= /= %= += \-= <<= >>= &= \[ha]= |= right to left
+, left to right
+.TE
+.PP
+The following notes provide further information to the above table:
+.PP
+.PD 0
+.IP [1] 4
+The ++ and \-\- operators at this precedence level are
+the postfix flavors of the operators.
+.IP [2]
+The ++ and \-\- operators at this precedence level are
+the prefix flavors of the operators.
+.PD
diff --git a/man7/packet.7 b/man7/packet.7
new file mode 100644
index 0000000..b2a264c
--- /dev/null
+++ b/man7/packet.7
@@ -0,0 +1,694 @@
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\"
+.\" $Id: packet.7,v 1.13 2000/08/14 08:03:45 ak Exp $
+.\"
+.TH packet 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+packet \- packet interface on device level
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <linux/if_packet.h>
+.B #include <net/ethernet.h> /* the L2 protocols */
+.PP
+.BI "packet_socket = socket(AF_PACKET, int " socket_type ", int "protocol );
+.fi
+.SH DESCRIPTION
+Packet sockets are used to receive or send raw packets at the device driver
+(OSI Layer 2) level.
+They allow the user to implement protocol modules in user space
+on top of the physical layer.
+.PP
+The
+.I socket_type
+is either
+.B SOCK_RAW
+for raw packets including the link-level header or
+.B SOCK_DGRAM
+for cooked packets with the link-level header removed.
+The link-level header information is available in a common format in a
+.I sockaddr_ll
+structure.
+.I protocol
+is the IEEE 802.3 protocol number in network byte order.
+See the
+.I <linux/if_ether.h>
+include file for a list of allowed protocols.
+When protocol
+is set to
+.BR htons(ETH_P_ALL) ,
+then all protocols are received.
+All incoming packets of that protocol type will be passed to the packet
+socket before they are passed to the protocols implemented in the kernel.
+If
+.I protocol
+is set to zero,
+no packets are received.
+.BR bind (2)
+can optionally be called with a nonzero
+.I sll_protocol
+to start receiving packets for the protocols specified.
+.PP
+In order to create a packet socket, a process must have the
+.B CAP_NET_RAW
+capability in the user namespace that governs its network namespace.
+.PP
+.B SOCK_RAW
+packets are passed to and from the device driver without any changes in
+the packet data.
+When receiving a packet, the address is still parsed and
+passed in a standard
+.I sockaddr_ll
+address structure.
+When transmitting a packet, the user-supplied buffer
+should contain the physical-layer header.
+That packet is then
+queued unmodified to the network driver of the interface defined by the
+destination address.
+Some device drivers always add other headers.
+.B SOCK_RAW
+is similar to but not compatible with the obsolete
+.B AF_INET/SOCK_PACKET
+of Linux 2.0.
+.PP
+.B SOCK_DGRAM
+operates on a slightly higher level.
+The physical header is removed before the packet is passed to the user.
+Packets sent through a
+.B SOCK_DGRAM
+packet socket get a suitable physical-layer header based on the
+information in the
+.I sockaddr_ll
+destination address before they are queued.
+.PP
+By default, all packets of the specified protocol type
+are passed to a packet socket.
+To get packets only from a specific interface use
+.BR bind (2)
+specifying an address in a
+.I struct sockaddr_ll
+to bind the packet socket to an interface.
+Fields used for binding are
+.I sll_family
+(should be
+.BR AF_PACKET ),
+.IR sll_protocol ,
+and
+.IR sll_ifindex .
+.PP
+The
+.BR connect (2)
+operation is not supported on packet sockets.
+.PP
+When the
+.B MSG_TRUNC
+flag is passed to
+.BR recvmsg (2),
+.BR recv (2),
+or
+.BR recvfrom (2),
+the real length of the packet on the wire is always returned,
+even when it is longer than the buffer.
+.SS Address types
+The
+.I sockaddr_ll
+structure is a device-independent physical-layer address.
+.PP
+.in +4n
+.EX
+struct sockaddr_ll {
+ unsigned short sll_family; /* Always AF_PACKET */
+ unsigned short sll_protocol; /* Physical\-layer protocol */
+ int sll_ifindex; /* Interface number */
+ unsigned short sll_hatype; /* ARP hardware type */
+ unsigned char sll_pkttype; /* Packet type */
+ unsigned char sll_halen; /* Length of address */
+ unsigned char sll_addr[8]; /* Physical\-layer address */
+};
+.EE
+.in
+.PP
+The fields of this structure are as follows:
+.TP
+.I sll_protocol
+is the standard ethernet protocol type in network byte order as defined
+in the
+.I <linux/if_ether.h>
+include file.
+It defaults to the socket's protocol.
+.TP
+.I sll_ifindex
+is the interface index of the interface
+(see
+.BR netdevice (7));
+0 matches any interface (only permitted for binding).
+.I sll_hatype
+is an ARP type as defined in the
+.I <linux/if_arp.h>
+include file.
+.TP
+.I sll_pkttype
+contains the packet type.
+Valid types are
+.B PACKET_HOST
+for a packet addressed to the local host,
+.B PACKET_BROADCAST
+for a physical-layer broadcast packet,
+.B PACKET_MULTICAST
+for a packet sent to a physical-layer multicast address,
+.B PACKET_OTHERHOST
+for a packet to some other host that has been caught by a device driver
+in promiscuous mode, and
+.B PACKET_OUTGOING
+for a packet originating from the local host that is looped back to a packet
+socket.
+These types make sense only for receiving.
+.TP
+.I sll_addr
+.TQ
+.I sll_halen
+contain the physical-layer (e.g., IEEE 802.3) address and its length.
+The exact interpretation depends on the device.
+.PP
+When you send packets, it is enough to specify
+.IR sll_family ,
+.IR sll_addr ,
+.IR sll_halen ,
+.IR sll_ifindex ,
+and
+.IR sll_protocol .
+The other fields should be 0.
+.I sll_hatype
+and
+.I sll_pkttype
+are set on received packets for your information.
+.SS Socket options
+Packet socket options are configured by calling
+.BR setsockopt (2)
+with level
+.BR SOL_PACKET .
+.TP
+.B PACKET_ADD_MEMBERSHIP
+.PD 0
+.TP
+.B PACKET_DROP_MEMBERSHIP
+.PD
+Packet sockets can be used to configure physical-layer multicasting
+and promiscuous mode.
+.B PACKET_ADD_MEMBERSHIP
+adds a binding and
+.B PACKET_DROP_MEMBERSHIP
+drops it.
+They both expect a
+.I packet_mreq
+structure as argument:
+.IP
+.in +4n
+.EX
+struct packet_mreq {
+ int mr_ifindex; /* interface index */
+ unsigned short mr_type; /* action */
+ unsigned short mr_alen; /* address length */
+ unsigned char mr_address[8]; /* physical\-layer address */
+};
+.EE
+.in
+.IP
+.I mr_ifindex
+contains the interface index for the interface whose status
+should be changed.
+The
+.I mr_type
+field specifies which action to perform.
+.B PACKET_MR_PROMISC
+enables receiving all packets on a shared medium (often known as
+"promiscuous mode"),
+.B PACKET_MR_MULTICAST
+binds the socket to the physical-layer multicast group specified in
+.I mr_address
+and
+.IR mr_alen ,
+and
+.B PACKET_MR_ALLMULTI
+sets the socket up to receive all multicast packets arriving at
+the interface.
+.IP
+In addition, the traditional ioctls
+.BR SIOCSIFFLAGS ,
+.BR SIOCADDMULTI ,
+.B SIOCDELMULTI
+can be used for the same purpose.
+.TP
+.BR PACKET_AUXDATA " (since Linux 2.6.21)"
+.\" commit 8dc4194474159660d7f37c495e3fc3f10d0db8cc
+If this binary option is enabled, the packet socket passes a metadata
+structure along with each packet in the
+.BR recvmsg (2)
+control field.
+The structure can be read with
+.BR cmsg (3).
+It is defined as
+.IP
+.in +4n
+.EX
+struct tpacket_auxdata {
+ __u32 tp_status;
+ __u32 tp_len; /* packet length */
+ __u32 tp_snaplen; /* captured length */
+ __u16 tp_mac;
+ __u16 tp_net;
+ __u16 tp_vlan_tci;
+ __u16 tp_vlan_tpid; /* Since Linux 3.14; earlier, these
+ were unused padding bytes */
+.\" commit a0cdfcf39362410d5ea983f4daf67b38de129408 added tp_vlan_tpid
+};
+.EE
+.in
+.TP
+.BR PACKET_FANOUT " (since Linux 3.1)"
+.\" commit dc99f600698dcac69b8f56dda9a8a00d645c5ffc
+To scale processing across threads, packet sockets can form a fanout
+group.
+In this mode, each matching packet is enqueued onto only one
+socket in the group.
+A socket joins a fanout group by calling
+.BR setsockopt (2)
+with level
+.B SOL_PACKET
+and option
+.BR PACKET_FANOUT .
+Each network namespace can have up to 65536 independent groups.
+A socket selects a group by encoding the ID in the first 16 bits of
+the integer option value.
+The first packet socket to join a group implicitly creates it.
+To successfully join an existing group, subsequent packet sockets
+must have the same protocol, device settings, fanout mode, and
+flags (see below).
+Packet sockets can leave a fanout group only by closing the socket.
+The group is deleted when the last socket is closed.
+.IP
+Fanout supports multiple algorithms to spread traffic between sockets,
+as follows:
+.RS
+.IP \[bu] 3
+The default mode,
+.BR PACKET_FANOUT_HASH ,
+sends packets from the same flow to the same socket to maintain
+per-flow ordering.
+For each packet, it chooses a socket by taking the packet flow hash
+modulo the number of sockets in the group, where a flow hash is a hash
+over network-layer address and optional transport-layer port fields.
+.IP \[bu]
+The load-balance mode
+.B PACKET_FANOUT_LB
+implements a round-robin algorithm.
+.IP \[bu]
+.B PACKET_FANOUT_CPU
+selects the socket based on the CPU that the packet arrived on.
+.IP \[bu]
+.B PACKET_FANOUT_ROLLOVER
+processes all data on a single socket, moving to the next when one
+becomes backlogged.
+.IP \[bu]
+.B PACKET_FANOUT_RND
+selects the socket using a pseudo-random number generator.
+.IP \[bu]
+.B PACKET_FANOUT_QM
+.\" commit 2d36097d26b5991d71a2cf4a20c1a158f0f1bfcd
+(available since Linux 3.14)
+selects the socket using the recorded queue_mapping of the received skb.
+.RE
+.IP
+Fanout modes can take additional options.
+IP fragmentation causes packets from the same flow to have different
+flow hashes.
+The flag
+.BR PACKET_FANOUT_FLAG_DEFRAG ,
+if set, causes packets to be defragmented before fanout is applied, to
+preserve order even in this case.
+Fanout mode and options are communicated in the second 16 bits of the
+integer option value.
+The flag
+.B PACKET_FANOUT_FLAG_ROLLOVER
+enables the roll over mechanism as a backup strategy: if the
+original fanout algorithm selects a backlogged socket, the packet
+rolls over to the next available one.
+.TP
+.BR PACKET_LOSS " (with " PACKET_TX_RING )
+When a malformed packet is encountered on a transmit ring,
+the default is to reset its
+.I tp_status
+to
+.B TP_STATUS_WRONG_FORMAT
+and abort the transmission immediately.
+The malformed packet blocks itself and subsequently enqueued packets from
+being sent.
+The format error must be fixed, the associated
+.I tp_status
+reset to
+.BR TP_STATUS_SEND_REQUEST ,
+and the transmission process restarted via
+.BR send (2).
+However, if
+.B PACKET_LOSS
+is set, any malformed packet will be skipped, its
+.I tp_status
+reset to
+.BR TP_STATUS_AVAILABLE ,
+and the transmission process continued.
+.TP
+.BR PACKET_RESERVE " (with " PACKET_RX_RING )
+By default, a packet receive ring writes packets immediately following the
+metadata structure and alignment padding.
+This integer option reserves additional headroom.
+.TP
+.B PACKET_RX_RING
+Create a memory-mapped ring buffer for asynchronous packet reception.
+The packet socket reserves a contiguous region of application address
+space, lays it out into an array of packet slots and copies packets
+(up to
+.IR tp_snaplen )
+into subsequent slots.
+Each packet is preceded by a metadata structure similar to
+.IR tpacket_auxdata .
+The protocol fields encode the offset to the data
+from the start of the metadata header.
+.I tp_net
+stores the offset to the network layer.
+If the packet socket is of type
+.BR SOCK_DGRAM ,
+then
+.I tp_mac
+is the same.
+If it is of type
+.BR SOCK_RAW ,
+then that field stores the offset to the link-layer frame.
+Packet socket and application communicate the head and tail of the ring
+through the
+.I tp_status
+field.
+The packet socket owns all slots with
+.I tp_status
+equal to
+.BR TP_STATUS_KERNEL .
+After filling a slot, it changes the status of the slot to transfer
+ownership to the application.
+During normal operation, the new
+.I tp_status
+value has at least the
+.B TP_STATUS_USER
+bit set to signal that a received packet has been stored.
+When the application has finished processing a packet, it transfers
+ownership of the slot back to the socket by setting
+.I tp_status
+equal to
+.BR TP_STATUS_KERNEL .
+.IP
+Packet sockets implement multiple variants of the packet ring.
+The implementation details are described in
+.I Documentation/networking/packet_mmap.rst
+in the Linux kernel source tree.
+.TP
+.B PACKET_STATISTICS
+Retrieve packet socket statistics in the form of a structure
+.IP
+.in +4n
+.EX
+struct tpacket_stats {
+ unsigned int tp_packets; /* Total packet count */
+ unsigned int tp_drops; /* Dropped packet count */
+};
+.EE
+.in
+.IP
+Receiving statistics resets the internal counters.
+The statistics structure differs when using a ring of variant
+.BR TPACKET_V3 .
+.TP
+.BR PACKET_TIMESTAMP " (with " PACKET_RX_RING "; since Linux 2.6.36)"
+.\" commit 614f60fa9d73a9e8fdff3df83381907fea7c5649
+The packet receive ring always stores a timestamp in the metadata header.
+By default, this is a software generated timestamp generated when the
+packet is copied into the ring.
+This integer option selects the type of timestamp.
+Besides the default, it support the two hardware formats described in
+.I Documentation/networking/timestamping.rst
+in the Linux kernel source tree.
+.TP
+.BR PACKET_TX_RING " (since Linux 2.6.31)"
+.\" commit 69e3c75f4d541a6eb151b3ef91f34033cb3ad6e1
+Create a memory-mapped ring buffer for packet transmission.
+This option is similar to
+.B PACKET_RX_RING
+and takes the same arguments.
+The application writes packets into slots with
+.I tp_status
+equal to
+.B TP_STATUS_AVAILABLE
+and schedules them for transmission by changing
+.I tp_status
+to
+.BR TP_STATUS_SEND_REQUEST .
+When packets are ready to be transmitted, the application calls
+.BR send (2)
+or a variant thereof.
+The
+.I buf
+and
+.I len
+fields of this call are ignored.
+If an address is passed using
+.BR sendto (2)
+or
+.BR sendmsg (2),
+then that overrides the socket default.
+On successful transmission, the socket resets
+.I tp_status
+to
+.BR TP_STATUS_AVAILABLE .
+It immediately aborts the transmission on error unless
+.B PACKET_LOSS
+is set.
+.TP
+.BR PACKET_VERSION " (with " PACKET_RX_RING "; since Linux 2.6.27)"
+.\" commit bbd6ef87c544d88c30e4b762b1b61ef267a7d279
+By default,
+.B PACKET_RX_RING
+creates a packet receive ring of variant
+.BR TPACKET_V1 .
+To create another variant, configure the desired variant by setting this
+integer option before creating the ring.
+.TP
+.BR PACKET_QDISC_BYPASS " (since Linux 3.14)"
+.\" commit d346a3fae3ff1d99f5d0c819bf86edf9094a26a1
+By default, packets sent through packet sockets pass through the kernel's
+qdisc (traffic control) layer, which is fine for the vast majority of use
+cases.
+For traffic generator appliances using packet sockets
+that intend to brute-force flood the network\[em]for example,
+to test devices under load in a similar
+fashion to pktgen\[em]this layer can be bypassed by setting
+this integer option to 1.
+A side effect is that packet buffering in the qdisc layer is avoided,
+which will lead to increased drops when network
+device transmit queues are busy;
+therefore, use at your own risk.
+.SS Ioctls
+.B SIOCGSTAMP
+can be used to receive the timestamp of the last received packet.
+Argument is a
+.I struct timeval
+variable.
+.\" FIXME Document SIOCGSTAMPNS
+.PP
+In addition, all standard ioctls defined in
+.BR netdevice (7)
+and
+.BR socket (7)
+are valid on packet sockets.
+.SS Error handling
+Packet sockets do no error handling other than errors occurred
+while passing the packet to the device driver.
+They don't have the concept of a pending error.
+.SH ERRORS
+.TP
+.B EADDRNOTAVAIL
+Unknown multicast group address passed.
+.TP
+.B EFAULT
+User passed invalid memory address.
+.TP
+.B EINVAL
+Invalid argument.
+.TP
+.B EMSGSIZE
+Packet is bigger than interface MTU.
+.TP
+.B ENETDOWN
+Interface is not up.
+.TP
+.B ENOBUFS
+Not enough memory to allocate the packet.
+.TP
+.B ENODEV
+Unknown device name or interface index specified in interface address.
+.TP
+.B ENOENT
+No packet received.
+.TP
+.B ENOTCONN
+No interface address passed.
+.TP
+.B ENXIO
+Interface address contained an invalid interface index.
+.TP
+.B EPERM
+User has insufficient privileges to carry out this operation.
+.PP
+In addition, other errors may be generated by the low-level driver.
+.SH VERSIONS
+.B AF_PACKET
+is a new feature in Linux 2.2.
+Earlier Linux versions supported only
+.BR SOCK_PACKET .
+.SH NOTES
+For portable programs it is suggested to use
+.B AF_PACKET
+via
+.BR pcap (3);
+although this covers only a subset of the
+.B AF_PACKET
+features.
+.PP
+The
+.B SOCK_DGRAM
+packet sockets make no attempt to create or parse the IEEE 802.2 LLC
+header for a IEEE 802.3 frame.
+When
+.B ETH_P_802_3
+is specified as protocol for sending the kernel creates the
+802.3 frame and fills out the length field; the user has to supply the LLC
+header to get a fully conforming packet.
+Incoming 802.3 packets are not multiplexed on the DSAP/SSAP protocol
+fields; instead they are supplied to the user as protocol
+.B ETH_P_802_2
+with the LLC header prefixed.
+It is thus not possible to bind to
+.BR ETH_P_802_3 ;
+bind to
+.B ETH_P_802_2
+instead and do the protocol multiplex yourself.
+The default for sending is the standard Ethernet DIX
+encapsulation with the protocol filled in.
+.PP
+Packet sockets are not subject to the input or output firewall chains.
+.SS Compatibility
+In Linux 2.0, the only way to get a packet socket was with the call:
+.PP
+.in +4n
+.EX
+socket(AF_INET, SOCK_PACKET, protocol)
+.EE
+.in
+.PP
+This is still supported, but deprecated and strongly discouraged.
+The main difference between the two methods is that
+.B SOCK_PACKET
+uses the old
+.I struct sockaddr_pkt
+to specify an interface, which doesn't provide physical-layer
+independence.
+.PP
+.in +4n
+.EX
+struct sockaddr_pkt {
+ unsigned short spkt_family;
+ unsigned char spkt_device[14];
+ unsigned short spkt_protocol;
+};
+.EE
+.in
+.PP
+.I spkt_family
+contains
+the device type,
+.I spkt_protocol
+is the IEEE 802.3 protocol type as defined in
+.I <sys/if_ether.h>
+and
+.I spkt_device
+is the device name as a null-terminated string, for example, eth0.
+.PP
+This structure is obsolete and should not be used in new code.
+.SH BUGS
+.SS LLC header handling
+The IEEE 802.2/803.3 LLC handling could be considered as a bug.
+.SS MSG_TRUNC issues
+The
+.B MSG_TRUNC
+.BR recvmsg (2)
+extension is an ugly hack and should be replaced by a control message.
+There is currently no way to get the original destination address of
+packets via
+.BR SOCK_DGRAM .
+.SS spkt_device device name truncation
+The
+.I spkt_device
+field of
+.I sockaddr_pkt
+has a size of 14 bytes,
+which is less than the constant
+.B IFNAMSIZ
+defined in
+.I <net/if.h>
+which is 16 bytes and describes the system limit for a network interface name.
+This means the names of network devices longer than 14 bytes
+will be truncated to fit into
+.IR spkt_device .
+All these lengths include the terminating null byte (\[aq]\e0\[aq])).
+.PP
+Issues from this with old code typically show up with
+very long interface names used by the
+.B Predictable Network Interface Names
+feature enabled by default in many modern Linux distributions.
+.PP
+The preferred solution is to rewrite code to avoid
+.BR SOCK_PACKET .
+Possible user solutions are to disable
+.B Predictable Network Interface Names
+or to rename the interface to a name of at most 13 bytes,
+for example using the
+.BR ip (8)
+tool.
+.SS Documentation issues
+Socket filters are not documented.
+.\" .SH CREDITS
+.\" This man page was written by Andi Kleen with help from Matthew Wilcox.
+.\" AF_PACKET in Linux 2.2 was implemented
+.\" by Alexey Kuznetsov, based on code by Alan Cox and others.
+.SH SEE ALSO
+.BR socket (2),
+.BR pcap (3),
+.BR capabilities (7),
+.BR ip (7),
+.BR raw (7),
+.BR socket (7),
+.BR ip (8),
+.PP
+RFC\ 894 for the standard IP Ethernet encapsulation.
+RFC\ 1700 for the IEEE 802.3 IP encapsulation.
+.PP
+The
+.I <linux/if_ether.h>
+include file for physical-layer protocols.
+.PP
+The Linux kernel source tree.
+.I Documentation/networking/filter.rst
+describes how to apply Berkeley Packet Filters to packet sockets.
+.I tools/testing/selftests/net/psock_tpacket.c
+contains example source code for all available versions of
+.B PACKET_RX_RING
+and
+.BR PACKET_TX_RING .
diff --git a/man7/path_resolution.7 b/man7/path_resolution.7
new file mode 100644
index 0000000..1704603
--- /dev/null
+++ b/man7/path_resolution.7
@@ -0,0 +1,264 @@
+.\" Copyright (C) 2003 Andries Brouwer (aeb@cwi.nl)
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH path_resolution 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+path_resolution \- how a pathname is resolved to a file
+.SH DESCRIPTION
+Some UNIX/Linux system calls have as parameter one or more filenames.
+A filename (or pathname) is resolved as follows.
+.SS Step 1: start of the resolution process
+If the pathname starts with the \[aq]/\[aq] character, the starting lookup
+directory is the root directory of the calling process.
+A process inherits its root directory from its parent.
+Usually this will be the root directory of the file hierarchy.
+A process may get a different root directory by use of the
+.BR chroot (2)
+system call, or may temporarily use a different root directory by using
+.BR openat2 (2)
+with the
+.B RESOLVE_IN_ROOT
+flag set.
+.PP
+A process may get an entirely private mount namespace in case
+it\[em]or one of its ancestors\[em]was started by an invocation of the
+.BR clone (2)
+system call that had the
+.B CLONE_NEWNS
+flag set.
+This handles the \[aq]/\[aq] part of the pathname.
+.PP
+If the pathname does not start with the \[aq]/\[aq] character, the starting
+lookup directory of the resolution process is the current working directory of
+the process \[em] or in the case of
+.BR openat (2)-style
+system calls, the
+.I dfd
+argument (or the current working directory if
+.B AT_FDCWD
+is passed as the
+.I dfd
+argument).
+The current working directory is inherited from the parent, and can
+be changed by use of the
+.BR chdir (2)
+system call.
+.PP
+Pathnames starting with a \[aq]/\[aq] character are called absolute pathnames.
+Pathnames not starting with a \[aq]/\[aq] are called relative pathnames.
+.SS Step 2: walk along the path
+Set the current lookup directory to the starting lookup directory.
+Now, for each nonfinal component of the pathname, where a component
+is a substring delimited by \[aq]/\[aq] characters, this component is looked up
+in the current lookup directory.
+.PP
+If the process does not have search permission on
+the current lookup directory,
+an
+.B EACCES
+error is returned ("Permission denied").
+.PP
+If the component is not found, an
+.B ENOENT
+error is returned
+("No such file or directory").
+.PP
+If the component is found, but is neither a directory nor a symbolic link,
+an
+.B ENOTDIR
+error is returned ("Not a directory").
+.PP
+If the component is found and is a directory, we set the
+current lookup directory to that directory, and go to the
+next component.
+.PP
+If the component is found and is a symbolic link,
+we first resolve this symbolic link
+(with the current lookup directory
+as starting lookup directory).
+Upon error, that error is returned.
+If the result is not a directory, an
+.B ENOTDIR
+error is returned.
+If the resolution of the symbolic link is successful and returns a directory,
+we set the current lookup directory to that directory, and go to
+the next component.
+Note that the resolution process here can involve recursion if the
+prefix ('dirname') component of a pathname contains a filename
+that is a symbolic link that resolves to a directory (where the
+prefix component of that directory may contain a symbolic link, and so on).
+In order to protect the kernel against stack overflow, and also
+to protect against denial of service, there are limits on the
+maximum recursion depth, and on the maximum number of symbolic links
+followed.
+An
+.B ELOOP
+error is returned when the maximum is
+exceeded ("Too many levels of symbolic links").
+.PP
+.\"
+.\" presently: max recursion depth during symlink resolution: 5
+.\" max total number of symbolic links followed: 40
+.\" _POSIX_SYMLOOP_MAX is 8
+As currently implemented on Linux, the maximum number
+.\" MAXSYMLINKS is 40
+of symbolic links that will be followed while resolving a pathname is 40.
+Before Linux 2.6.18, the limit on the recursion depth was 5.
+Starting with Linux 2.6.18, this limit
+.\" MAX_NESTED_LINKS
+was raised to 8.
+In Linux 4.2,
+.\" commit 894bc8c4662ba9daceafe943a5ba0dd407da5cd3
+the kernel's pathname-resolution code
+was reworked to eliminate the use of recursion,
+so that the only limit that remains is the maximum of 40
+resolutions for the entire pathname.
+.PP
+The resolution of symbolic links during this stage can be blocked by using
+.BR openat2 (2),
+with the
+.B RESOLVE_NO_SYMLINKS
+flag set.
+.SS Step 3: find the final entry
+The lookup of the final component of the pathname goes just like
+that of all other components, as described in the previous step,
+with two differences: (i) the final component need not be a
+directory (at least as far as the path resolution process is
+concerned\[em]it may have to be a directory, or a nondirectory, because of
+the requirements of the specific system call), and (ii) it
+is not necessarily an error if the component is not found\[em]maybe
+we are just creating it.
+The details on the treatment
+of the final entry are described in the manual pages of the specific
+system calls.
+.SS . and ..
+By convention, every directory has the entries "." and "..",
+which refer to the directory itself and to its parent directory,
+respectively.
+.PP
+The path resolution process will assume that these entries have
+their conventional meanings, regardless of whether they are
+actually present in the physical filesystem.
+.PP
+One cannot walk up past the root: "/.." is the same as "/".
+.SS Mount points
+After a
+.I mount dev path
+command, the pathname "path" refers to
+the root of the filesystem hierarchy on the device "dev", and no
+longer to whatever it referred to earlier.
+.PP
+One can walk out of a mounted filesystem: "path/.." refers to
+the parent directory of "path",
+outside of the filesystem hierarchy on "dev".
+.PP
+Traversal of mount points can be blocked by using
+.BR openat2 (2),
+with the
+.B RESOLVE_NO_XDEV
+flag set (though note that this also restricts bind mount traversal).
+.SS Trailing slashes
+If a pathname ends in a \[aq]/\[aq], that forces resolution of the preceding
+component as in Step 2:
+the component preceding the slash either exists and resolves to a directory
+or it names a directory that is to be created
+immediately after the pathname is resolved.
+Otherwise, a trailing \[aq]/\[aq] is ignored.
+.SS Final symbolic link
+If the last component of a pathname is a symbolic link, then it
+depends on the system call whether the file referred to will be
+the symbolic link or the result of path resolution on its contents.
+For example, the system call
+.BR lstat (2)
+will operate on the symbolic link,
+while
+.BR stat (2)
+operates on the file pointed to by the symbolic link.
+.SS Length limit
+There is a maximum length for pathnames.
+If the pathname (or some
+intermediate pathname obtained while resolving symbolic links)
+is too long, an
+.B ENAMETOOLONG
+error is returned ("Filename too long").
+.SS Empty pathname
+In the original UNIX, the empty pathname referred to the current directory.
+Nowadays POSIX decrees that an empty pathname must not be resolved
+successfully.
+Linux returns
+.B ENOENT
+in this case.
+.SS Permissions
+The permission bits of a file consist of three groups of three bits; see
+.BR chmod (1)
+and
+.BR stat (2).
+The first group of three is used when the effective user ID of
+the calling process equals the owner ID of the file.
+The second group
+of three is used when the group ID of the file either equals the
+effective group ID of the calling process, or is one of the
+supplementary group IDs of the calling process (as set by
+.BR setgroups (2)).
+When neither holds, the third group is used.
+.PP
+Of the three bits used, the first bit determines read permission,
+the second write permission, and the last execute permission
+in case of ordinary files, or search permission in case of directories.
+.PP
+Linux uses the fsuid instead of the effective user ID in permission checks.
+Ordinarily the fsuid will equal the effective user ID, but the fsuid can be
+changed by the system call
+.BR setfsuid (2).
+.PP
+(Here "fsuid" stands for something like "filesystem user ID".
+The concept was required for the implementation of a user space
+NFS server at a time when processes could send a signal to a process
+with the same effective user ID.
+It is obsolete now.
+Nobody should use
+.BR setfsuid (2).)
+.PP
+Similarly, Linux uses the fsgid ("filesystem group ID")
+instead of the effective group ID.
+See
+.BR setfsgid (2).
+.\" FIXME . say something about filesystem mounted read-only ?
+.SS Bypassing permission checks: superuser and capabilities
+On a traditional UNIX system, the superuser
+.RI ( root ,
+user ID 0) is all-powerful, and bypasses all permissions restrictions
+when accessing files.
+.\" (but for exec at least one x bit must be set) -- AEB
+.\" but there is variation across systems on this point: for
+.\" example, HP-UX and Tru64 are as described by AEB. However,
+.\" on some implementations (e.g., Solaris, FreeBSD),
+.\" access(X_OK) by superuser will report success, regardless
+.\" of the file's execute permission bits. -- MTK (Oct 05)
+.PP
+On Linux, superuser privileges are divided into capabilities (see
+.BR capabilities (7)).
+Two capabilities are relevant for file permissions checks:
+.B CAP_DAC_OVERRIDE
+and
+.BR CAP_DAC_READ_SEARCH .
+(A process has these capabilities if its fsuid is 0.)
+.PP
+The
+.B CAP_DAC_OVERRIDE
+capability overrides all permission checking,
+but grants execute permission only when at least one
+of the file's three execute permission bits is set.
+.PP
+The
+.B CAP_DAC_READ_SEARCH
+capability grants read and search permission
+on directories, and read permission on ordinary files.
+.\" FIXME . say something about immutable files
+.\" FIXME . say something about ACLs
+.SH SEE ALSO
+.BR readlink (2),
+.BR capabilities (7),
+.BR credentials (7),
+.BR symlink (7)
diff --git a/man7/persistent-keyring.7 b/man7/persistent-keyring.7
new file mode 100644
index 0000000..472782a
--- /dev/null
+++ b/man7/persistent-keyring.7
@@ -0,0 +1,124 @@
+.\" Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH persistent-keyring 7 2023-02-08 "Linux man-pages 6.05.01"
+.SH NAME
+persistent-keyring \- per-user persistent keyring
+.SH DESCRIPTION
+The persistent keyring is a keyring used to anchor keys on behalf of a user.
+Each UID the kernel deals with has its own persistent keyring that
+is shared between all threads owned by that UID.
+The persistent keyring has a name (description) of the form
+.I _persistent.<UID>
+where
+.I <UID>
+is the user ID of the corresponding user.
+.PP
+The persistent keyring may not be accessed directly,
+even by processes with the appropriate UID.
+.\" FIXME The meaning of the preceding sentence isn't clear. What is meant?
+Instead, it must first be linked to one of a process's keyrings,
+before that keyring can access the persistent keyring
+by virtue of its possessor permits.
+This linking is done with the
+.BR keyctl_get_persistent (3)
+function.
+.PP
+If a persistent keyring does not exist when it is accessed by the
+.BR keyctl_get_persistent (3)
+operation, it will be automatically created.
+.PP
+Each time the
+.BR keyctl_get_persistent (3)
+operation is performed,
+the persistent keyring's expiration timer is reset to the value in:
+.PP
+.in +4n
+.EX
+/proc/sys/kernel/keys/persistent_keyring_expiry
+.EE
+.in
+.PP
+Should the timeout be reached,
+the persistent keyring will be removed and
+everything it pins can then be garbage collected.
+The keyring will then be re-created on a subsequent call to
+.BR keyctl_get_persistent (3).
+.PP
+The persistent keyring is not directly searched by
+.BR request_key (2);
+it is searched only if it is linked into one of the keyrings
+that is searched by
+.BR request_key (2).
+.PP
+The persistent keyring is independent of
+.BR clone (2),
+.BR fork (2),
+.BR vfork (2),
+.BR execve (2),
+and
+.BR _exit (2).
+It persists until its expiration timer triggers,
+at which point it is garbage collected.
+This allows the persistent keyring to carry keys beyond the life of
+the kernel's record of the corresponding UID
+(the destruction of which results in the destruction of the
+.BR user\-keyring (7)
+and the
+.BR user\-session\-keyring (7)).
+The persistent keyring can thus be used to
+hold authentication tokens for processes that run without user interaction,
+such as programs started by
+.BR cron (8).
+.PP
+The persistent keyring is used to store UID-specific objects that
+themselves have limited lifetimes (e.g., kerberos tokens).
+If those tokens cease to be used
+(i.e., the persistent keyring is not accessed),
+then the timeout of the persistent keyring ensures that
+the corresponding objects are automatically discarded.
+.\"
+.SS Special operations
+The
+.I keyutils
+library provides the
+.BR keyctl_get_persistent (3)
+function for manipulating persistent keyrings.
+(This function is an interface to the
+.BR keyctl (2)
+.B KEYCTL_GET_PERSISTENT
+operation.)
+This operation allows the calling thread to get the persistent keyring
+corresponding to its own UID or, if the thread has the
+.B CAP_SETUID
+capability, the persistent keyring corresponding to some other UID
+in the same user namespace.
+.SH NOTES
+Each user namespace owns a keyring called
+.I .persistent_register
+that contains links to all of the persistent keys in that namespace.
+(The
+.I .persistent_register
+keyring can be seen when reading the contents of the
+.I /proc/keys
+file for the UID 0 in the namespace.)
+The
+.BR keyctl_get_persistent (3)
+operation looks for a key with a name of the form
+.IR _persistent. UID
+in that keyring,
+creates the key if it does not exist, and links it into the keyring.
+.SH SEE ALSO
+.ad l
+.nh
+.BR keyctl (1),
+.BR keyctl (3),
+.BR keyctl_get_persistent (3),
+.BR keyrings (7),
+.BR process\-keyring (7),
+.BR session\-keyring (7),
+.BR thread\-keyring (7),
+.BR user\-keyring (7),
+.BR user\-session\-keyring (7)
diff --git a/man7/pid_namespaces.7 b/man7/pid_namespaces.7
new file mode 100644
index 0000000..b154fb4
--- /dev/null
+++ b/man7/pid_namespaces.7
@@ -0,0 +1,388 @@
+.\" Copyright (c) 2013 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" and Copyright (c) 2012 by Eric W. Biederman <ebiederm@xmission.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH pid_namespaces 7 2023-03-30 "Linux man-pages 6.05.01"
+.SH NAME
+pid_namespaces \- overview of Linux PID namespaces
+.SH DESCRIPTION
+For an overview of namespaces, see
+.BR namespaces (7).
+.PP
+PID namespaces isolate the process ID number space,
+meaning that processes in different PID namespaces can have the same PID.
+PID namespaces allow containers to provide functionality
+such as suspending/resuming the set of processes in the container and
+migrating the container to a new host
+while the processes inside the container maintain the same PIDs.
+.PP
+PIDs in a new PID namespace start at 1,
+somewhat like a standalone system, and calls to
+.BR fork (2),
+.BR vfork (2),
+or
+.BR clone (2)
+will produce processes with PIDs that are unique within the namespace.
+.PP
+Use of PID namespaces requires a kernel that is configured with the
+.B CONFIG_PID_NS
+option.
+.\"
+.\" ============================================================
+.\"
+.SS The namespace "init" process
+The first process created in a new namespace
+(i.e., the process created using
+.BR clone (2)
+with the
+.B CLONE_NEWPID
+flag, or the first child created by a process after a call to
+.BR unshare (2)
+using the
+.B CLONE_NEWPID
+flag) has the PID 1, and is the "init" process for the namespace (see
+.BR init (1)).
+This process becomes the parent of any child processes that are orphaned
+because a process that resides in this PID namespace terminated
+(see below for further details).
+.PP
+If the "init" process of a PID namespace terminates,
+the kernel terminates all of the processes in the namespace via a
+.B SIGKILL
+signal.
+This behavior reflects the fact that the "init" process
+is essential for the correct operation of a PID namespace.
+In this case, a subsequent
+.BR fork (2)
+into this PID namespace fail with the error
+.BR ENOMEM ;
+it is not possible to create a new process in a PID namespace whose "init"
+process has terminated.
+Such scenarios can occur when, for example,
+a process uses an open file descriptor for a
+.IR /proc/ pid /ns/pid
+file corresponding to a process that was in a namespace to
+.BR setns (2)
+into that namespace after the "init" process has terminated.
+Another possible scenario can occur after a call to
+.BR unshare (2):
+if the first child subsequently created by a
+.BR fork (2)
+terminates, then subsequent calls to
+.BR fork (2)
+fail with
+.BR ENOMEM .
+.PP
+Only signals for which the "init" process has established a signal handler
+can be sent to the "init" process by other members of the PID namespace.
+This restriction applies even to privileged processes,
+and prevents other members of the PID namespace from
+accidentally killing the "init" process.
+.PP
+Likewise, a process in an ancestor namespace
+can\[em]subject to the usual permission checks described in
+.BR kill (2)\[em]send
+signals to the "init" process of a child PID namespace only
+if the "init" process has established a handler for that signal.
+(Within the handler, the
+.I siginfo_t
+.I si_pid
+field described in
+.BR sigaction (2)
+will be zero.)
+.B SIGKILL
+or
+.B SIGSTOP
+are treated exceptionally:
+these signals are forcibly delivered when sent from an ancestor PID namespace.
+Neither of these signals can be caught by the "init" process,
+and so will result in the usual actions associated with those signals
+(respectively, terminating and stopping the process).
+.PP
+Starting with Linux 3.4, the
+.BR reboot (2)
+system call causes a signal to be sent to the namespace "init" process.
+See
+.BR reboot (2)
+for more details.
+.\"
+.\" ============================================================
+.\"
+.SS Nesting PID namespaces
+PID namespaces can be nested:
+each PID namespace has a parent,
+except for the initial ("root") PID namespace.
+The parent of a PID namespace is the PID namespace of the process that
+created the namespace using
+.BR clone (2)
+or
+.BR unshare (2).
+PID namespaces thus form a tree,
+with all namespaces ultimately tracing their ancestry to the root namespace.
+Since Linux 3.7,
+.\" commit f2302505775fd13ba93f034206f1e2a587017929
+.\" The kernel constant MAX_PID_NS_LEVEL
+the kernel limits the maximum nesting depth for PID namespaces to 32.
+.PP
+A process is visible to other processes in its PID namespace,
+and to the processes in each direct ancestor PID namespace
+going back to the root PID namespace.
+In this context, "visible" means that one process
+can be the target of operations by another process using
+system calls that specify a process ID.
+Conversely, the processes in a child PID namespace can't see
+processes in the parent and further removed ancestor namespaces.
+More succinctly: a process can see (e.g., send signals with
+.BR kill (2),
+set nice values with
+.BR setpriority (2),
+etc.) only processes contained in its own PID namespace
+and in descendants of that namespace.
+.PP
+A process has one process ID in each of the layers of the PID
+namespace hierarchy in which is visible,
+and walking back though each direct ancestor namespace
+through to the root PID namespace.
+System calls that operate on process IDs always
+operate using the process ID that is visible in the
+PID namespace of the caller.
+A call to
+.BR getpid (2)
+always returns the PID associated with the namespace in which
+the process was created.
+.PP
+Some processes in a PID namespace may have parents
+that are outside of the namespace.
+For example, the parent of the initial process in the namespace
+(i.e., the
+.BR init (1)
+process with PID 1) is necessarily in another namespace.
+Likewise, the direct children of a process that uses
+.BR setns (2)
+to cause its children to join a PID namespace are in a different
+PID namespace from the caller of
+.BR setns (2).
+Calls to
+.BR getppid (2)
+for such processes return 0.
+.PP
+While processes may freely descend into child PID namespaces
+(e.g., using
+.BR setns (2)
+with a PID namespace file descriptor),
+they may not move in the other direction.
+That is to say, processes may not enter any ancestor namespaces
+(parent, grandparent, etc.).
+Changing PID namespaces is a one-way operation.
+.PP
+The
+.B NS_GET_PARENT
+.BR ioctl (2)
+operation can be used to discover the parental relationship
+between PID namespaces; see
+.BR ioctl_ns (2).
+.\"
+.\" ============================================================
+.\"
+.SS setns(2) and unshare(2) semantics
+Calls to
+.BR setns (2)
+that specify a PID namespace file descriptor
+and calls to
+.BR unshare (2)
+with the
+.B CLONE_NEWPID
+flag cause children subsequently created
+by the caller to be placed in a different PID namespace from the caller.
+(Since Linux 4.12, that PID namespace is shown via the
+.IR /proc/ pid /ns/pid_for_children
+file, as described in
+.BR namespaces (7).)
+These calls do not, however,
+change the PID namespace of the calling process,
+because doing so would change the caller's idea of its own PID
+(as reported by
+.BR getpid ()),
+which would break many applications and libraries.
+.PP
+To put things another way:
+a process's PID namespace membership is determined when the process is created
+and cannot be changed thereafter.
+Among other things, this means that the parental relationship
+between processes mirrors the parental relationship between PID namespaces:
+the parent of a process is either in the same namespace
+or resides in the immediate parent PID namespace.
+.PP
+A process may call
+.BR unshare (2)
+with the
+.B CLONE_NEWPID
+flag only once.
+After it has performed this operation, its
+.IR /proc/ pid /ns/pid_for_children
+symbolic link will be empty until the first child is created in the namespace.
+.\"
+.\" ============================================================
+.\"
+.SS Adoption of orphaned children
+When a child process becomes orphaned, it is reparented to the "init"
+process in the PID namespace of its parent
+(unless one of the nearer ancestors of the parent employed the
+.BR prctl (2)
+.B PR_SET_CHILD_SUBREAPER
+command to mark itself as the reaper of orphaned descendant processes).
+Note that because of the
+.BR setns (2)
+and
+.BR unshare (2)
+semantics described above, this may be the "init" process in the PID
+namespace that is the
+.I parent
+of the child's PID namespace,
+rather than the "init" process in the child's own PID namespace.
+.\" Furthermore, by definition, the parent of the "init" process
+.\" of a PID namespace resides in the parent PID namespace.
+.\"
+.\" ============================================================
+.\"
+.SS Compatibility of CLONE_NEWPID with other CLONE_* flags
+In current versions of Linux,
+.B CLONE_NEWPID
+can't be combined with
+.BR CLONE_THREAD .
+Threads are required to be in the same PID namespace such that
+the threads in a process can send signals to each other.
+Similarly, it must be possible to see all of the threads
+of a process in the
+.BR proc (5)
+filesystem.
+Additionally, if two threads were in different PID
+namespaces, the process ID of the process sending a signal
+could not be meaningfully encoded when a signal is sent
+(see the description of the
+.I siginfo_t
+type in
+.BR sigaction (2)).
+Since this is computed when a signal is enqueued,
+a signal queue shared by processes in multiple PID namespaces
+would defeat that.
+.PP
+.\" Note these restrictions were all introduced in
+.\" 8382fcac1b813ad0a4e68a838fc7ae93fa39eda0
+.\" when CLONE_NEWPID|CLONE_VM was disallowed
+In earlier versions of Linux,
+.B CLONE_NEWPID
+was additionally disallowed (failing with the error
+.BR EINVAL )
+in combination with
+.B CLONE_SIGHAND
+.\" (restriction lifted in faf00da544045fdc1454f3b9e6d7f65c841de302)
+(before Linux 4.3) as well as
+.\" (restriction lifted in e79f525e99b04390ca4d2366309545a836c03bf1)
+.B CLONE_VM
+(before Linux 3.12).
+The changes that lifted these restrictions have also been ported to
+earlier stable kernels.
+.\"
+.\" ============================================================
+.\"
+.SS /proc and PID namespaces
+A
+.I /proc
+filesystem shows (in the
+.IR /proc/ pid
+directories) only processes visible in the PID namespace
+of the process that performed the mount, even if the
+.I /proc
+filesystem is viewed from processes in other namespaces.
+.PP
+After creating a new PID namespace,
+it is useful for the child to change its root directory
+and mount a new procfs instance at
+.I /proc
+so that tools such as
+.BR ps (1)
+work correctly.
+If a new mount namespace is simultaneously created by including
+.B CLONE_NEWNS
+in the
+.I flags
+argument of
+.BR clone (2)
+or
+.BR unshare (2),
+then it isn't necessary to change the root directory:
+a new procfs instance can be mounted directly over
+.IR /proc .
+.PP
+From a shell, the command to mount
+.I /proc
+is:
+.PP
+.in +4n
+.EX
+$ mount \-t proc proc /proc
+.EE
+.in
+.PP
+Calling
+.BR readlink (2)
+on the path
+.I /proc/self
+yields the process ID of the caller in the PID namespace of the procfs mount
+(i.e., the PID namespace of the process that mounted the procfs).
+This can be useful for introspection purposes,
+when a process wants to discover its PID in other namespaces.
+.\"
+.\" ============================================================
+.\"
+.SS /proc files
+.TP
+.BR /proc/sys/kernel/ns_last_pid " (since Linux 3.3)"
+.\" commit b8f566b04d3cddd192cfd2418ae6d54ac6353792
+This file
+(which is virtualized per PID namespace)
+displays the last PID that was allocated in this PID namespace.
+When the next PID is allocated,
+the kernel will search for the lowest unallocated PID
+that is greater than this value,
+and when this file is subsequently read it will show that PID.
+.IP
+This file is writable by a process that has the
+.B CAP_SYS_ADMIN
+or (since Linux 5.9)
+.B CAP_CHECKPOINT_RESTORE
+capability inside the user namespace that owns the PID namespace.
+.\" This ability is necessary to support checkpoint restore in user-space
+This makes it possible to determine the PID that is allocated
+to the next process that is created inside this PID namespace.
+.\"
+.\" ============================================================
+.\"
+.SS Miscellaneous
+When a process ID is passed over a UNIX domain socket to a
+process in a different PID namespace (see the description of
+.B SCM_CREDENTIALS
+in
+.BR unix (7)),
+it is translated into the corresponding PID value in
+the receiving process's PID namespace.
+.SH STANDARDS
+Linux.
+.SH EXAMPLES
+See
+.BR user_namespaces (7).
+.SH SEE ALSO
+.BR clone (2),
+.BR reboot (2),
+.BR setns (2),
+.BR unshare (2),
+.BR proc (5),
+.BR capabilities (7),
+.BR credentials (7),
+.BR mount_namespaces (7),
+.BR namespaces (7),
+.BR user_namespaces (7),
+.BR switch_root (8)
diff --git a/man7/pipe.7 b/man7/pipe.7
new file mode 100644
index 0000000..baf05bc
--- /dev/null
+++ b/man7/pipe.7
@@ -0,0 +1,407 @@
+.\" Copyright (C) 2005 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH pipe 7 2023-07-16 "Linux man-pages 6.05.01"
+.SH NAME
+pipe \- overview of pipes and FIFOs
+.SH DESCRIPTION
+Pipes and FIFOs (also known as named pipes)
+provide a unidirectional interprocess communication channel.
+A pipe has a
+.I read end
+and a
+.IR "write end" .
+Data written to the write end of a pipe can be read
+from the read end of the pipe.
+.PP
+A pipe is created using
+.BR pipe (2),
+which creates a new pipe and returns two file descriptors,
+one referring to the read end of the pipe,
+the other referring to the write end.
+Pipes can be used to create a communication channel between related
+processes; see
+.BR pipe (2)
+for an example.
+.PP
+A FIFO (short for First In First Out) has a name within the filesystem
+(created using
+.BR mkfifo (3)),
+and is opened using
+.BR open (2).
+Any process may open a FIFO, assuming the file permissions allow it.
+The read end is opened using the
+.B O_RDONLY
+flag; the write end is opened using the
+.B O_WRONLY
+flag.
+See
+.BR fifo (7)
+for further details.
+.IR Note :
+although FIFOs have a pathname in the filesystem,
+I/O on FIFOs does not involve operations on the underlying device
+(if there is one).
+.SS I/O on pipes and FIFOs
+The only difference between pipes and FIFOs is the manner in which
+they are created and opened.
+Once these tasks have been accomplished,
+I/O on pipes and FIFOs has exactly the same semantics.
+.PP
+If a process attempts to read from an empty pipe, then
+.BR read (2)
+will block until data is available.
+If a process attempts to write to a full pipe (see below), then
+.BR write (2)
+blocks until sufficient data has been read from the pipe
+to allow the write to complete.
+.PP
+Nonblocking I/O is possible by using the
+.BR fcntl (2)
+.B F_SETFL
+operation to enable the
+.B O_NONBLOCK
+open file status flag or by opening a
+.BR fifo (7)
+with
+.BR O_NONBLOCK .
+If any process has the pipe open for writing, reads fail with
+.BR EAGAIN ;
+otherwise\[em]with no potential writers\[em]reads succeed and return empty.
+.PP
+The communication channel provided by a pipe is a
+.IR "byte stream" :
+there is no concept of message boundaries.
+.PP
+If all file descriptors referring to the write end of a pipe
+have been closed, then an attempt to
+.BR read (2)
+from the pipe will see end-of-file
+.RB ( read (2)
+will return 0).
+If all file descriptors referring to the read end of a pipe
+have been closed, then a
+.BR write (2)
+will cause a
+.B SIGPIPE
+signal to be generated for the calling process.
+If the calling process is ignoring this signal, then
+.BR write (2)
+fails with the error
+.BR EPIPE .
+An application that uses
+.BR pipe (2)
+and
+.BR fork (2)
+should use suitable
+.BR close (2)
+calls to close unnecessary duplicate file descriptors;
+this ensures that end-of-file and
+.BR SIGPIPE / EPIPE
+are delivered when appropriate.
+.PP
+It is not possible to apply
+.BR lseek (2)
+to a pipe.
+.SS Pipe capacity
+A pipe has a limited capacity.
+If the pipe is full, then a
+.BR write (2)
+will block or fail, depending on whether the
+.B O_NONBLOCK
+flag is set (see below).
+Different implementations have different limits for the pipe capacity.
+Applications should not rely on a particular capacity:
+an application should be designed so that a reading process consumes data
+as soon as it is available,
+so that a writing process does not remain blocked.
+.PP
+Before Linux 2.6.11, the capacity of a pipe was the same as
+the system page size (e.g., 4096 bytes on i386).
+Since Linux 2.6.11, the pipe capacity is 16 pages
+(i.e., 65,536 bytes in a system with a page size of 4096 bytes).
+Since Linux 2.6.35, the default pipe capacity is 16 pages,
+but the capacity can be queried and set using the
+.BR fcntl (2)
+.B F_GETPIPE_SZ
+and
+.B F_SETPIPE_SZ
+operations.
+See
+.BR fcntl (2)
+for more information.
+.PP
+The following
+.BR ioctl (2)
+operation, which can be applied to a file descriptor
+that refers to either end of a pipe,
+places a count of the number of unread bytes in the pipe in the
+.I int
+buffer pointed to by the final argument of the call:
+.PP
+.in +4n
+.EX
+ioctl(fd, FIONREAD, &nbytes);
+.EE
+.in
+.PP
+The
+.B FIONREAD
+operation is not specified in any standard,
+but is provided on many implementations.
+.\"
+.SS /proc files
+On Linux, the following files control how much memory can be used for pipes:
+.TP
+.IR /proc/sys/fs/pipe\-max\-pages " (only in Linux 2.6.34)"
+.\" commit b492e95be0ae672922f4734acf3f5d35c30be948
+An upper limit, in pages, on the capacity that an unprivileged user
+(one without the
+.B CAP_SYS_RESOURCE
+capability)
+can set for a pipe.
+.IP
+The default value for this limit is 16 times the default pipe capacity
+(see above); the lower limit is two pages.
+.IP
+This interface was removed in Linux 2.6.35, in favor of
+.IR /proc/sys/fs/pipe\-max\-size .
+.TP
+.IR /proc/sys/fs/pipe\-max\-size " (since Linux 2.6.35)"
+.\" commit ff9da691c0498ff81fdd014e7a0731dab2337dac
+The maximum size (in bytes) of individual pipes that can be set
+.\" This limit is not checked on pipe creation, where the capacity is
+.\" always PIPE_DEF_BUFS, regardless of pipe-max-size
+by users without the
+.B CAP_SYS_RESOURCE
+capability.
+The value assigned to this file may be rounded upward,
+to reflect the value actually employed for a convenient implementation.
+To determine the rounded-up value,
+display the contents of this file after assigning a value to it.
+.IP
+The default value for this file is 1048576 (1\ MiB).
+The minimum value that can be assigned to this file is the system page size.
+Attempts to set a limit less than the page size cause
+.BR write (2)
+to fail with the error
+.BR EINVAL .
+.IP
+Since Linux 4.9,
+.\" commit 086e774a57fba4695f14383c0818994c0b31da7c
+the value on this file also acts as a ceiling on the default capacity
+of a new pipe or newly opened FIFO.
+.TP
+.IR /proc/sys/fs/pipe\-user\-pages\-hard " (since Linux 4.5)"
+.\" commit 759c01142a5d0f364a462346168a56de28a80f52
+The hard limit on the total size (in pages) of all pipes created or set by
+a single unprivileged user (i.e., one with neither the
+.B CAP_SYS_RESOURCE
+nor the
+.B CAP_SYS_ADMIN
+capability).
+So long as the total number of pages allocated to pipe buffers
+for this user is at this limit,
+attempts to create new pipes will be denied,
+and attempts to increase a pipe's capacity will be denied.
+.IP
+When the value of this limit is zero (which is the default),
+no hard limit is applied.
+.\" The default was chosen to avoid breaking existing applications that
+.\" make intensive use of pipes (e.g., for splicing).
+.TP
+.IR /proc/sys/fs/pipe\-user\-pages\-soft " (since Linux 4.5)"
+.\" commit 759c01142a5d0f364a462346168a56de28a80f52
+The soft limit on the total size (in pages) of all pipes created or set by
+a single unprivileged user (i.e., one with neither the
+.B CAP_SYS_RESOURCE
+nor the
+.B CAP_SYS_ADMIN
+capability).
+So long as the total number of pages allocated to pipe buffers
+for this user is at this limit,
+individual pipes created by a user will be limited to one page,
+and attempts to increase a pipe's capacity will be denied.
+.IP
+When the value of this limit is zero, no soft limit is applied.
+The default value for this file is 16384,
+which permits creating up to 1024 pipes with the default capacity.
+.PP
+Before Linux 4.9, some bugs affected the handling of the
+.I pipe\-user\-pages\-soft
+and
+.I pipe\-user\-pages\-hard
+limits; see BUGS.
+.\"
+.SS PIPE_BUF
+POSIX.1 says that writes of less than
+.B PIPE_BUF
+bytes must be atomic: the output data is written to the pipe as a
+contiguous sequence.
+Writes of more than
+.B PIPE_BUF
+bytes may be nonatomic: the kernel may interleave the data
+with data written by other processes.
+POSIX.1 requires
+.B PIPE_BUF
+to be at least 512 bytes.
+(On Linux,
+.B PIPE_BUF
+is 4096 bytes.)
+The precise semantics depend on whether the file descriptor is nonblocking
+.RB ( O_NONBLOCK ),
+whether there are multiple writers to the pipe, and on
+.IR n ,
+the number of bytes to be written:
+.TP
+\fBO_NONBLOCK\fP disabled, \fIn\fP <= \fBPIPE_BUF\fP
+All
+.I n
+bytes are written atomically;
+.BR write (2)
+may block if there is not room for
+.I n
+bytes to be written immediately
+.TP
+\fBO_NONBLOCK\fP enabled, \fIn\fP <= \fBPIPE_BUF\fP
+If there is room to write
+.I n
+bytes to the pipe, then
+.BR write (2)
+succeeds immediately, writing all
+.I n
+bytes; otherwise
+.BR write (2)
+fails, with
+.I errno
+set to
+.BR EAGAIN .
+.TP
+\fBO_NONBLOCK\fP disabled, \fIn\fP > \fBPIPE_BUF\fP
+The write is nonatomic: the data given to
+.BR write (2)
+may be interleaved with
+.BR write (2)s
+by other process;
+the
+.BR write (2)
+blocks until
+.I n
+bytes have been written.
+.TP
+\fBO_NONBLOCK\fP enabled, \fIn\fP > \fBPIPE_BUF\fP
+If the pipe is full, then
+.BR write (2)
+fails, with
+.I errno
+set to
+.BR EAGAIN .
+Otherwise, from 1 to
+.I n
+bytes may be written (i.e., a "partial write" may occur;
+the caller should check the return value from
+.BR write (2)
+to see how many bytes were actually written),
+and these bytes may be interleaved with writes by other processes.
+.SS Open file status flags
+The only open file status flags that can be meaningfully applied to
+a pipe or FIFO are
+.B O_NONBLOCK
+and
+.BR O_ASYNC .
+.PP
+Setting the
+.B O_ASYNC
+flag for the read end of a pipe causes a signal
+.RB ( SIGIO
+by default) to be generated when new input becomes available on the pipe.
+The target for delivery of signals must be set using the
+.BR fcntl (2)
+.B F_SETOWN
+command.
+On Linux,
+.B O_ASYNC
+is supported for pipes and FIFOs only since Linux 2.6.
+.SS Portability notes
+On some systems (but not Linux), pipes are bidirectional:
+data can be transmitted in both directions between the pipe ends.
+POSIX.1 requires only unidirectional pipes.
+Portable applications should avoid reliance on
+bidirectional pipe semantics.
+.SS BUGS
+Before Linux 4.9, some bugs affected the handling of the
+.I pipe\-user\-pages\-soft
+and
+.I pipe\-user\-pages\-hard
+limits when using the
+.BR fcntl (2)
+.B F_SETPIPE_SZ
+operation to change a pipe's capacity:
+.\" These bugs where remedied by a series of patches, in particular,
+.\" commit b0b91d18e2e97b741b294af9333824ecc3fadfd8 and
+.\" commit a005ca0e6813e1d796a7422a7e31d8b8d6555df1
+.IP (a) 5
+When increasing the pipe capacity, the checks against the soft and
+hard limits were made against existing consumption,
+and excluded the memory required for the increased pipe capacity.
+The new increase in pipe capacity could then push the total
+memory used by the user for pipes (possibly far) over a limit.
+(This could also trigger the problem described next.)
+.IP
+Starting with Linux 4.9,
+the limit checking includes the memory required for the new pipe capacity.
+.IP (b)
+The limit checks were performed even when the new pipe capacity was
+less than the existing pipe capacity.
+This could lead to problems if a user set a large pipe capacity,
+and then the limits were lowered, with the result that the user could
+no longer decrease the pipe capacity.
+.IP
+Starting with Linux 4.9, checks against the limits
+are performed only when increasing a pipe's capacity;
+an unprivileged user can always decrease a pipe's capacity.
+.IP (c)
+The accounting and checking against the limits were done as follows:
+.RS
+.IP (1) 5
+.PD 0
+Test whether the user has exceeded the limit.
+.IP (2)
+Make the new pipe buffer allocation.
+.IP (3)
+Account new allocation against the limits.
+.PD
+.RE
+.IP
+This was racey.
+Multiple processes could pass point (1) simultaneously,
+and then allocate pipe buffers that were accounted for only in step (3),
+with the result that the user's pipe buffer
+allocation could be pushed over the limit.
+.IP
+Starting with Linux 4.9,
+the accounting step is performed before doing the allocation,
+and the operation fails if the limit would be exceeded.
+.PP
+Before Linux 4.9, bugs similar to points (a) and (c) could also occur
+when the kernel allocated memory for a new pipe buffer;
+that is, when calling
+.BR pipe (2)
+and when opening a previously unopened FIFO.
+.SH SEE ALSO
+.BR mkfifo (1),
+.BR dup (2),
+.BR fcntl (2),
+.BR open (2),
+.BR pipe (2),
+.BR poll (2),
+.BR select (2),
+.BR socketpair (2),
+.BR splice (2),
+.BR stat (2),
+.BR tee (2),
+.BR vmsplice (2),
+.BR mkfifo (3),
+.BR epoll (7),
+.BR fifo (7)
diff --git a/man7/pkeys.7 b/man7/pkeys.7
new file mode 100644
index 0000000..4f96f1e
--- /dev/null
+++ b/man7/pkeys.7
@@ -0,0 +1,237 @@
+.\" Copyright (C) 2016 Intel Corporation
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH pkeys 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+pkeys \- overview of Memory Protection Keys
+.SH DESCRIPTION
+Memory Protection Keys (pkeys) are an extension to existing
+page-based memory permissions.
+Normal page permissions using
+page tables require expensive system calls and TLB invalidations
+when changing permissions.
+Memory Protection Keys provide a mechanism for changing
+protections without requiring modification of the page tables on
+every permission change.
+.PP
+To use pkeys, software must first "tag" a page in the page tables
+with a pkey.
+After this tag is in place, an application only has
+to change the contents of a register in order to remove write
+access, or all access to a tagged page.
+.PP
+Protection keys work in conjunction with the existing
+.BR PROT_READ ,
+.BR PROT_WRITE ,
+and
+.B PROT_EXEC
+permissions passed to system calls such as
+.BR mprotect (2)
+and
+.BR mmap (2),
+but always act to further restrict these traditional permission
+mechanisms.
+.PP
+If a process performs an access that violates pkey
+restrictions, it receives a
+.B SIGSEGV
+signal.
+See
+.BR sigaction (2)
+for details of the information available with that signal.
+.PP
+To use the pkeys feature, the processor must support it, and the kernel
+must contain support for the feature on a given processor.
+As of early 2016 only future Intel x86 processors are supported,
+and this hardware supports 16 protection keys in each process.
+However, pkey 0 is used as the default key, so a maximum of 15
+are available for actual application use.
+The default key is assigned to any memory region for which a
+pkey has not been explicitly assigned via
+.BR pkey_mprotect (2).
+.PP
+Protection keys have the potential to add a layer of security and
+reliability to applications.
+But they have not been primarily designed as
+a security feature.
+For instance, WRPKRU is a completely unprivileged
+instruction, so pkeys are useless in any case that an attacker controls
+the PKRU register or can execute arbitrary instructions.
+.PP
+Applications should be very careful to ensure that they do not "leak"
+protection keys.
+For instance, before calling
+.BR pkey_free (2),
+the application should be sure that no memory has that pkey assigned.
+If the application left the freed pkey assigned, a future user of
+that pkey might inadvertently change the permissions of an unrelated
+data structure, which could impact security or stability.
+The kernel currently allows in-use pkeys to have
+.BR pkey_free (2)
+called on them because it would have processor or memory performance
+implications to perform the additional checks needed to disallow it.
+Implementation of the necessary checks is left up to applications.
+Applications may implement these checks by searching the
+.IR /proc/ pid /smaps
+file for memory regions with the pkey assigned.
+Further details can be found in
+.BR proc (5).
+.PP
+Any application wanting to use protection keys needs to be able
+to function without them.
+They might be unavailable because the hardware that the
+application runs on does not support them, the kernel code does
+not contain support, the kernel support has been disabled, or
+because the keys have all been allocated, perhaps by a library
+the application is using.
+It is recommended that applications wanting to use protection
+keys should simply call
+.BR pkey_alloc (2)
+and test whether the call succeeds,
+instead of attempting to detect support for the
+feature in any other way.
+.PP
+Although unnecessary, hardware support for protection keys may be
+enumerated with the
+.I cpuid
+instruction.
+Details of how to do this can be found in the Intel Software
+Developers Manual.
+The kernel performs this enumeration and exposes the information in
+.I /proc/cpuinfo
+under the "flags" field.
+The string "pku" in this field indicates hardware support for protection
+keys and the string "ospke" indicates that the kernel contains and has
+enabled protection keys support.
+.PP
+Applications using threads and protection keys should be especially
+careful.
+Threads inherit the protection key rights of the parent at the time
+of the
+.BR clone (2),
+system call.
+Applications should either ensure that their own permissions are
+appropriate for child threads at the time when
+.BR clone (2)
+is called, or ensure that each child thread can perform its
+own initialization of protection key rights.
+.\"
+.SS Signal Handler Behavior
+Each time a signal handler is invoked (including nested signals), the
+thread is temporarily given a new, default set of protection key rights
+that override the rights from the interrupted context.
+This means that applications must re-establish their desired protection
+key rights upon entering a signal handler if the desired rights differ
+from the defaults.
+The rights of any interrupted context are restored when the signal
+handler returns.
+.PP
+This signal behavior is unusual and is due to the fact that the x86 PKRU
+register (which stores protection key access rights) is managed with the
+same hardware mechanism (XSAVE) that manages floating-point registers.
+The signal behavior is the same as that of floating-point registers.
+.\"
+.SS Protection Keys system calls
+The Linux kernel implements the following pkey-related system calls:
+.BR pkey_mprotect (2),
+.BR pkey_alloc (2),
+and
+.BR pkey_free (2).
+.PP
+The Linux pkey system calls are available only if the kernel was
+configured and built with the
+.B CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+option.
+.SH EXAMPLES
+The program below allocates a page of memory with read and write permissions.
+It then writes some data to the memory and successfully reads it
+back.
+After that, it attempts to allocate a protection key and
+disallows access to the page by using the WRPKRU instruction.
+It then tries to access the page,
+which we now expect to cause a fatal signal to the application.
+.PP
+.in +4n
+.EX
+.RB "$" " ./a.out"
+buffer contains: 73
+about to read buffer again...
+Segmentation fault (core dumped)
+.EE
+.in
+.SS Program source
+\&
+.EX
+#define _GNU_SOURCE
+#include <err.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+\&
+int
+main(void)
+{
+ int status;
+ int pkey;
+ int *buffer;
+\&
+ /*
+ * Allocate one page of memory.
+ */
+ buffer = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, \-1, 0);
+ if (buffer == MAP_FAILED)
+ err(EXIT_FAILURE, "mmap");
+\&
+ /*
+ * Put some random data into the page (still OK to touch).
+ */
+ *buffer = __LINE__;
+ printf("buffer contains: %d\en", *buffer);
+\&
+ /*
+ * Allocate a protection key:
+ */
+ pkey = pkey_alloc(0, 0);
+ if (pkey == \-1)
+ err(EXIT_FAILURE, "pkey_alloc");
+\&
+ /*
+ * Disable access to any memory with "pkey" set,
+ * even though there is none right now.
+ */
+ status = pkey_set(pkey, PKEY_DISABLE_ACCESS);
+ if (status)
+ err(EXIT_FAILURE, "pkey_set");
+\&
+ /*
+ * Set the protection key on "buffer".
+ * Note that it is still read/write as far as mprotect() is
+ * concerned and the previous pkey_set() overrides it.
+ */
+ status = pkey_mprotect(buffer, getpagesize(),
+ PROT_READ | PROT_WRITE, pkey);
+ if (status == \-1)
+ err(EXIT_FAILURE, "pkey_mprotect");
+\&
+ printf("about to read buffer again...\en");
+\&
+ /*
+ * This will crash, because we have disallowed access.
+ */
+ printf("buffer contains: %d\en", *buffer);
+\&
+ status = pkey_free(pkey);
+ if (status == \-1)
+ err(EXIT_FAILURE, "pkey_free");
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.SH SEE ALSO
+.BR pkey_alloc (2),
+.BR pkey_free (2),
+.BR pkey_mprotect (2),
+.BR sigaction (2)
diff --git a/man7/posixoptions.7 b/man7/posixoptions.7
new file mode 100644
index 0000000..b0dea3d
--- /dev/null
+++ b/man7/posixoptions.7
@@ -0,0 +1,1014 @@
+.\" Copyright (c) 2003 Andries Brouwer (aeb@cwi.nl)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH posixoptions 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+posixoptions \- optional parts of the POSIX standard
+.SH DESCRIPTION
+The POSIX standard (the information below is from POSIX.1-2001)
+describes a set of behaviors and interfaces for a compliant system.
+However, many interfaces are optional and there are feature test macros
+to test the availability of interfaces at compile time, and functions
+.BR sysconf (3),
+.BR fpathconf (3),
+.BR pathconf (3),
+.BR confstr (3)
+to do this at run time.
+From shell scripts one can use
+.BR getconf (1).
+For more detail, see
+.BR sysconf (3).
+.PP
+We give the name of the POSIX abbreviation, the option, the name of the
+.BR sysconf (3)
+parameter used to inquire about the option, and possibly
+a very short description.
+Much more precise detail can be found in the POSIX standard itself,
+versions of which can nowadays be accessed freely on the web.
+.SS ADV - _POSIX_ADVISORY_INFO - _SC_ADVISORY_INFO
+The following advisory functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_fadvise ()
+.IR posix_fallocate ()
+.IR posix_memalign ()
+.IR posix_madvise ()
+.in
+.fi
+.SS AIO - _POSIX_ASYNCHRONOUS_IO - _SC_ASYNCHRONOUS_IO
+The header
+.I <aio.h>
+is present.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR aio_cancel ()
+.IR aio_error ()
+.IR aio_fsync ()
+.IR aio_read ()
+.IR aio_return ()
+.IR aio_suspend ()
+.IR aio_write ()
+.IR lio_listio ()
+.in
+.fi
+.SS BAR - _POSIX_BARRIERS - _SC_BARRIERS
+This option implies the
+.B _POSIX_THREADS
+and
+.B _POSIX_THREAD_SAFE_FUNCTIONS
+options.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR pthread_barrier_destroy ()
+.IR pthread_barrier_init ()
+.IR pthread_barrier_wait ()
+.IR pthread_barrierattr_destroy ()
+.IR pthread_barrierattr_init ()
+.in
+.fi
+.\" .SS BE
+.\" Batch environment.
+.\" .SS CD
+.\" C development.
+.SS --- - POSIX_CHOWN_RESTRICTED
+If this option is in effect (as it always is under POSIX.1-2001),
+then only root may change the owner of a file, and nonroot can
+set the group of a file only to one of the groups it belongs to.
+This affects the following functions
+.PP
+.nf
+.in +4n
+.IR chown ()
+.IR fchown ()
+.in
+.fi
+.\" What about lchown() ?
+.SS CS - _POSIX_CLOCK_SELECTION - _SC_CLOCK_SELECTION
+This option implies the
+.B _POSIX_TIMERS
+option.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR pthread_condattr_getclock ()
+.IR pthread_condattr_setclock ()
+.IR clock_nanosleep ()
+.in
+.fi
+.PP
+If
+.B CLOCK_REALTIME
+is changed by the function
+.IR clock_settime (),
+then this affects all timers set for an absolute time.
+.SS CPT - _POSIX_CPUTIME - _SC_CPUTIME
+The
+.B CLOCK_PROCESS_CPUTIME_ID
+clock ID is supported.
+The initial value of this clock is 0 for each process.
+This option implies the
+.B _POSIX_TIMERS
+option.
+The function
+.IR clock_getcpuclockid ()
+is present.
+.\" .SS FD
+.\" Fortran development
+.\" .SS FR
+.\" Fortran runtime
+.SS --- - _POSIX_FILE_LOCKING - _SC_FILE_LOCKING
+This option has been deleted.
+Not in final XPG6.
+.SS FSC - _POSIX_FSYNC - _SC_FSYNC
+The function
+.IR fsync ()
+is present.
+.SS IP6 - _POSIX_IPV6 - _SC_IPV6
+Internet Protocol Version 6 is supported.
+.SS --- - _POSIX_JOB_CONTROL - _SC_JOB_CONTROL
+If this option is in effect (as it always is under POSIX.1-2001),
+then the system implements POSIX-style job control,
+and the following functions are present:
+.PP
+.nf
+.in +4n
+.IR setpgid ()
+.IR tcdrain ()
+.IR tcflush ()
+.IR tcgetpgrp ()
+.IR tcsendbreak ()
+.IR tcsetattr ()
+.IR tcsetpgrp ()
+.in
+.fi
+.SS MF - _POSIX_MAPPED_FILES - _SC_MAPPED_FILES
+Shared memory is supported.
+The include file
+.I <sys/mman.h>
+is present.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR mmap ()
+.IR msync ()
+.IR munmap ()
+.in
+.fi
+.SS ML - _POSIX_MEMLOCK - _SC_MEMLOCK
+Shared memory can be locked into core.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR mlockall ()
+.IR munlockall ()
+.in
+.fi
+.SS MR/MLR - _POSIX_MEMLOCK_RANGE - _SC_MEMLOCK_RANGE
+More precisely, ranges can be locked into core.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR mlock ()
+.IR munlock ()
+.in
+.fi
+.SS MPR - _POSIX_MEMORY_PROTECTION - _SC_MEMORY_PROTECTION
+The function
+.IR mprotect ()
+is present.
+.SS MSG - _POSIX_MESSAGE_PASSING - _SC_MESSAGE_PASSING
+The include file
+.I <mqueue.h>
+is present.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR mq_close ()
+.IR mq_getattr ()
+.IR mq_notify ()
+.IR mq_open ()
+.IR mq_receive ()
+.IR mq_send ()
+.IR mq_setattr ()
+.IR mq_unlink ()
+.in
+.fi
+.SS MON - _POSIX_MONOTONIC_CLOCK - _SC_MONOTONIC_CLOCK
+.B CLOCK_MONOTONIC
+is supported.
+This option implies the
+.B _POSIX_TIMERS
+option.
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR aio_suspend ()
+.IR clock_getres ()
+.IR clock_gettime ()
+.IR clock_settime ()
+.IR timer_create ()
+.in
+.fi
+.SS --- - _POSIX_MULTI_PROCESS - _SC_MULTI_PROCESS
+This option has been deleted.
+Not in final XPG6.
+.\" .SS MX
+.\" IEC 60559 Floating-Point Option.
+.SS --- - _POSIX_NO_TRUNC
+If this option is in effect (as it always is under POSIX.1-2001),
+then pathname components longer than
+.B NAME_MAX
+are not truncated,
+but give an error.
+This property may be dependent on the path prefix of the component.
+.SS PIO - _POSIX_PRIORITIZED_IO - _SC_PRIORITIZED_IO
+This option says that one can specify priorities for asynchronous I/O.
+This affects the functions
+.PP
+.nf
+.in +4n
+.IR aio_read ()
+.IR aio_write ()
+.in
+.fi
+.SS PS - _POSIX_PRIORITY_SCHEDULING - _SC_PRIORITY_SCHEDULING
+The include file
+.I <sched.h>
+is present.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR sched_get_priority_max ()
+.IR sched_get_priority_min ()
+.IR sched_getparam ()
+.IR sched_getscheduler ()
+.IR sched_rr_get_interval ()
+.IR sched_setparam ()
+.IR sched_setscheduler ()
+.IR sched_yield ()
+.in
+.fi
+.PP
+If also
+.B _POSIX_SPAWN
+is in effect, then the following functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_spawnattr_getschedparam ()
+.IR posix_spawnattr_getschedpolicy ()
+.IR posix_spawnattr_setschedparam ()
+.IR posix_spawnattr_setschedpolicy ()
+.in
+.fi
+.SS RS - _POSIX_RAW_SOCKETS
+Raw sockets are supported.
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR getsockopt ()
+.IR setsockopt ()
+.in
+.fi
+.SS --- - _POSIX_READER_WRITER_LOCKS - _SC_READER_WRITER_LOCKS
+This option implies the
+.B _POSIX_THREADS
+option.
+Conversely,
+under POSIX.1-2001 the
+.B _POSIX_THREADS
+option implies this option.
+.PP
+The following functions are present:
+.PP
+.in +4n
+.nf
+.IR pthread_rwlock_destroy ()
+.IR pthread_rwlock_init ()
+.IR pthread_rwlock_rdlock ()
+.IR pthread_rwlock_tryrdlock ()
+.IR pthread_rwlock_trywrlock ()
+.IR pthread_rwlock_unlock ()
+.IR pthread_rwlock_wrlock ()
+.IR pthread_rwlockattr_destroy ()
+.IR pthread_rwlockattr_init ()
+.in
+.fi
+.SS RTS - _POSIX_REALTIME_SIGNALS - _SC_REALTIME_SIGNALS
+Realtime signals are supported.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR sigqueue ()
+.IR sigtimedwait ()
+.IR sigwaitinfo ()
+.in
+.fi
+.SS --- - _POSIX_REGEXP - _SC_REGEXP
+If this option is in effect (as it always is under POSIX.1-2001),
+then POSIX regular expressions are supported
+and the following functions are present:
+.PP
+.nf
+.in +4n
+.IR regcomp ()
+.IR regerror ()
+.IR regexec ()
+.IR regfree ()
+.in
+.fi
+.SS --- - _POSIX_SAVED_IDS - _SC_SAVED_IDS
+If this option is in effect (as it always is under POSIX.1-2001),
+then a process has a saved set-user-ID and a saved set-group-ID.
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR exec ()
+.IR kill ()
+.IR seteuid ()
+.IR setegid ()
+.IR setgid ()
+.IR setuid ()
+.in
+.fi
+.\" .SS SD
+.\" Software development
+.SS SEM - _POSIX_SEMAPHORES - _SC_SEMAPHORES
+The include file
+.I <semaphore.h>
+is present.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR sem_close ()
+.IR sem_destroy ()
+.IR sem_getvalue ()
+.IR sem_init ()
+.IR sem_open ()
+.IR sem_post ()
+.IR sem_trywait ()
+.IR sem_unlink ()
+.IR sem_wait ()
+.in
+.fi
+.SS SHM - _POSIX_SHARED_MEMORY_OBJECTS - _SC_SHARED_MEMORY_OBJECTS
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR mmap ()
+.IR munmap ()
+.IR shm_open ()
+.IR shm_unlink ()
+.in
+.fi
+.SS --- - _POSIX_SHELL - _SC_SHELL
+If this option is in effect (as it always is under POSIX.1-2001),
+the function
+.IR system ()
+is present.
+.SS SPN - _POSIX_SPAWN - _SC_SPAWN
+This option describes support for process creation in a context where
+it is difficult or impossible to use
+.IR fork (),
+for example, because no MMU is present.
+.PP
+If
+.B _POSIX_SPAWN
+is in effect, then the include file
+.I <spawn.h>
+and the following functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_spawn ()
+.IR posix_spawn_file_actions_addclose ()
+.IR posix_spawn_file_actions_adddup2 ()
+.IR posix_spawn_file_actions_addopen ()
+.IR posix_spawn_file_actions_destroy ()
+.IR posix_spawn_file_actions_init ()
+.IR posix_spawnattr_destroy ()
+.IR posix_spawnattr_getsigdefault ()
+.IR posix_spawnattr_getflags ()
+.IR posix_spawnattr_getpgroup ()
+.IR posix_spawnattr_getsigmask ()
+.IR posix_spawnattr_init ()
+.IR posix_spawnattr_setsigdefault ()
+.IR posix_spawnattr_setflags ()
+.IR posix_spawnattr_setpgroup ()
+.IR posix_spawnattr_setsigmask ()
+.IR posix_spawnp ()
+.in
+.fi
+.PP
+If also
+.B _POSIX_PRIORITY_SCHEDULING
+is in effect, then
+the following functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_spawnattr_getschedparam ()
+.IR posix_spawnattr_getschedpolicy ()
+.IR posix_spawnattr_setschedparam ()
+.IR posix_spawnattr_setschedpolicy ()
+.in
+.fi
+.SS SPI - _POSIX_SPIN_LOCKS - _SC_SPIN_LOCKS
+This option implies the
+.B _POSIX_THREADS
+and
+.B _POSIX_THREAD_SAFE_FUNCTIONS
+options.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR pthread_spin_destroy ()
+.IR pthread_spin_init ()
+.IR pthread_spin_lock ()
+.IR pthread_spin_trylock ()
+.IR pthread_spin_unlock ()
+.in -4n
+.fi
+.SS SS - _POSIX_SPORADIC_SERVER - _SC_SPORADIC_SERVER
+The scheduling policy
+.B SCHED_SPORADIC
+is supported.
+This option implies the
+.B _POSIX_PRIORITY_SCHEDULING
+option.
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR sched_setparam ()
+.IR sched_setscheduler ()
+.in
+.fi
+.SS SIO - _POSIX_SYNCHRONIZED_IO - _SC_SYNCHRONIZED_IO
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR open ()
+.IR msync ()
+.IR fsync ()
+.IR fdatasync ()
+.in
+.fi
+.SS TSA - _POSIX_THREAD_ATTR_STACKADDR - _SC_THREAD_ATTR_STACKADDR
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR pthread_attr_getstack ()
+.IR pthread_attr_getstackaddr ()
+.IR pthread_attr_setstack ()
+.IR pthread_attr_setstackaddr ()
+.in
+.fi
+.SS TSS - _POSIX_THREAD_ATTR_STACKSIZE - _SC_THREAD_ATTR_STACKSIZE
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR pthread_attr_getstack ()
+.IR pthread_attr_getstacksize ()
+.IR pthread_attr_setstack ()
+.IR pthread_attr_setstacksize ()
+.in
+.fi
+.SS TCT - _POSIX_THREAD_CPUTIME - _SC_THREAD_CPUTIME
+The clockID CLOCK_THREAD_CPUTIME_ID is supported.
+This option implies the
+.B _POSIX_TIMERS
+option.
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR pthread_getcpuclockid ()
+.IR clock_getres ()
+.IR clock_gettime ()
+.IR clock_settime ()
+.IR timer_create ()
+.in
+.fi
+.SS TPI - _POSIX_THREAD_PRIO_INHERIT - _SC_THREAD_PRIO_INHERIT
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR pthread_mutexattr_getprotocol ()
+.IR pthread_mutexattr_setprotocol ()
+.in
+.fi
+.SS TPP - _POSIX_THREAD_PRIO_PROTECT - _SC_THREAD_PRIO_PROTECT
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR pthread_mutex_getprioceiling ()
+.IR pthread_mutex_setprioceiling ()
+.IR pthread_mutexattr_getprioceiling ()
+.IR pthread_mutexattr_getprotocol ()
+.IR pthread_mutexattr_setprioceiling ()
+.IR pthread_mutexattr_setprotocol ()
+.in
+.fi
+.SS TPS - _POSIX_THREAD_PRIORITY_SCHEDULING - _SC_THREAD_PRIORITY_SCHEDULING
+If this option is in effect, the different threads inside a process
+can run with different priorities and/or different schedulers.
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR pthread_attr_getinheritsched ()
+.IR pthread_attr_getschedpolicy ()
+.IR pthread_attr_getscope ()
+.IR pthread_attr_setinheritsched ()
+.IR pthread_attr_setschedpolicy ()
+.IR pthread_attr_setscope ()
+.IR pthread_getschedparam ()
+.IR pthread_setschedparam ()
+.IR pthread_setschedprio ()
+.in
+.fi
+.SS TSH - _POSIX_THREAD_PROCESS_SHARED - _SC_THREAD_PROCESS_SHARED
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR pthread_barrierattr_getpshared ()
+.IR pthread_barrierattr_setpshared ()
+.IR pthread_condattr_getpshared ()
+.IR pthread_condattr_setpshared ()
+.IR pthread_mutexattr_getpshared ()
+.IR pthread_mutexattr_setpshared ()
+.IR pthread_rwlockattr_getpshared ()
+.IR pthread_rwlockattr_setpshared ()
+.in
+.fi
+.SS TSF - _POSIX_THREAD_SAFE_FUNCTIONS - _SC_THREAD_SAFE_FUNCTIONS
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR readdir_r ()
+.IR getgrgid_r ()
+.IR getgrnam_r ()
+.IR getpwnam_r ()
+.IR getpwuid_r ()
+.IR flockfile ()
+.IR ftrylockfile ()
+.IR funlockfile ()
+.IR getc_unlocked ()
+.IR getchar_unlocked ()
+.IR putc_unlocked ()
+.IR putchar_unlocked ()
+.IR rand_r ()
+.IR strerror_r ()
+.IR strtok_r ()
+.IR asctime_r ()
+.IR ctime_r ()
+.IR gmtime_r ()
+.IR localtime_r ()
+.in
+.fi
+.SS TSP - _POSIX_THREAD_SPORADIC_SERVER - _SC_THREAD_SPORADIC_SERVER
+This option implies the
+.B _POSIX_THREAD_PRIORITY_SCHEDULING
+option.
+The following functions are affected:
+.PP
+.nf
+.in +4n
+.IR sched_getparam ()
+.IR sched_setparam ()
+.IR sched_setscheduler ()
+.in
+.fi
+.SS THR - _POSIX_THREADS - _SC_THREADS
+Basic support for POSIX threads is available.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR pthread_atfork ()
+.IR pthread_attr_destroy ()
+.IR pthread_attr_getdetachstate ()
+.IR pthread_attr_getschedparam ()
+.IR pthread_attr_init ()
+.IR pthread_attr_setdetachstate ()
+.IR pthread_attr_setschedparam ()
+.IR pthread_cancel ()
+.IR pthread_cleanup_push ()
+.IR pthread_cleanup_pop ()
+.IR pthread_cond_broadcast ()
+.IR pthread_cond_destroy ()
+.IR pthread_cond_init ()
+.IR pthread_cond_signal ()
+.IR pthread_cond_timedwait ()
+.IR pthread_cond_wait ()
+.IR pthread_condattr_destroy ()
+.IR pthread_condattr_init ()
+.IR pthread_create ()
+.IR pthread_detach ()
+.IR pthread_equal ()
+.IR pthread_exit ()
+.IR pthread_getspecific ()
+.IR pthread_join ()
+.IR pthread_key_create ()
+.IR pthread_key_delete ()
+.IR pthread_mutex_destroy ()
+.IR pthread_mutex_init ()
+.IR pthread_mutex_lock ()
+.IR pthread_mutex_trylock ()
+.IR pthread_mutex_unlock ()
+.IR pthread_mutexattr_destroy ()
+.IR pthread_mutexattr_init ()
+.IR pthread_once ()
+.IR pthread_rwlock_destroy ()
+.IR pthread_rwlock_init ()
+.IR pthread_rwlock_rdlock ()
+.IR pthread_rwlock_tryrdlock ()
+.IR pthread_rwlock_trywrlock ()
+.IR pthread_rwlock_unlock ()
+.IR pthread_rwlock_wrlock ()
+.IR pthread_rwlockattr_destroy ()
+.IR pthread_rwlockattr_init ()
+.IR pthread_self ()
+.IR pthread_setcancelstate ()
+.IR pthread_setcanceltype ()
+.IR pthread_setspecific ()
+.IR pthread_testcancel ()
+.in
+.fi
+.SS TMO - _POSIX_TIMEOUTS - _SC_TIMEOUTS
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR mq_timedreceive ()
+.IR mq_timedsend ()
+.IR pthread_mutex_timedlock ()
+.IR pthread_rwlock_timedrdlock ()
+.IR pthread_rwlock_timedwrlock ()
+.IR sem_timedwait ()
+.IR posix_trace_timedgetnext_event ()
+.in
+.fi
+.SS TMR - _POSIX_TIMERS - _SC_TIMERS
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR clock_getres ()
+.IR clock_gettime ()
+.IR clock_settime ()
+.IR nanosleep ()
+.IR timer_create ()
+.IR timer_delete ()
+.IR timer_gettime ()
+.IR timer_getoverrun ()
+.IR timer_settime ()
+.in
+.fi
+.SS TRC - _POSIX_TRACE - _SC_TRACE
+POSIX tracing is available.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_trace_attr_destroy ()
+.IR posix_trace_attr_getclockres ()
+.IR posix_trace_attr_getcreatetime ()
+.IR posix_trace_attr_getgenversion ()
+.IR posix_trace_attr_getmaxdatasize ()
+.IR posix_trace_attr_getmaxsystemeventsize ()
+.IR posix_trace_attr_getmaxusereventsize ()
+.IR posix_trace_attr_getname ()
+.IR posix_trace_attr_getstreamfullpolicy ()
+.IR posix_trace_attr_getstreamsize ()
+.IR posix_trace_attr_init ()
+.IR posix_trace_attr_setmaxdatasize ()
+.IR posix_trace_attr_setname ()
+.IR posix_trace_attr_setstreamsize ()
+.IR posix_trace_attr_setstreamfullpolicy ()
+.IR posix_trace_clear ()
+.IR posix_trace_create ()
+.IR posix_trace_event ()
+.IR posix_trace_eventid_equal ()
+.IR posix_trace_eventid_get_name ()
+.IR posix_trace_eventid_open ()
+.IR posix_trace_eventtypelist_getnext_id ()
+.IR posix_trace_eventtypelist_rewind ()
+.IR posix_trace_flush ()
+.IR posix_trace_get_attr ()
+.IR posix_trace_get_status ()
+.IR posix_trace_getnext_event ()
+.IR posix_trace_shutdown ()
+.IR posix_trace_start ()
+.IR posix_trace_stop ()
+.IR posix_trace_trygetnext_event ()
+.in
+.fi
+.SS TEF - _POSIX_TRACE_EVENT_FILTER - _SC_TRACE_EVENT_FILTER
+This option implies the
+.B _POSIX_TRACE
+option.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_trace_eventset_add ()
+.IR posix_trace_eventset_del ()
+.IR posix_trace_eventset_empty ()
+.IR posix_trace_eventset_fill ()
+.IR posix_trace_eventset_ismember ()
+.IR posix_trace_get_filter ()
+.IR posix_trace_set_filter ()
+.IR posix_trace_trid_eventid_open ()
+.in
+.fi
+.SS TRI - _POSIX_TRACE_INHERIT - _SC_TRACE_INHERIT
+Tracing children of the traced process is supported.
+This option implies the
+.B _POSIX_TRACE
+option.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_trace_attr_getinherited ()
+.IR posix_trace_attr_setinherited ()
+.in
+.fi
+.SS TRL - _POSIX_TRACE_LOG - _SC_TRACE_LOG
+This option implies the
+.B _POSIX_TRACE
+option.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_trace_attr_getlogfullpolicy ()
+.IR posix_trace_attr_getlogsize ()
+.IR posix_trace_attr_setlogfullpolicy ()
+.IR posix_trace_attr_setlogsize ()
+.IR posix_trace_close ()
+.IR posix_trace_create_withlog ()
+.IR posix_trace_open ()
+.IR posix_trace_rewind ()
+.in
+.fi
+.SS TYM - _POSIX_TYPED_MEMORY_OBJECTS - _SC_TYPED_MEMORY_OBJECT
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR posix_mem_offset ()
+.IR posix_typed_mem_get_info ()
+.IR posix_typed_mem_open ()
+.in
+.fi
+.SS --- - _POSIX_VDISABLE
+Always present (probably 0).
+Value to set a changeable special control
+character to indicate that it is disabled.
+.SH X/OPEN SYSTEM INTERFACE EXTENSIONS
+.SS XSI - _XOPEN_CRYPT - _SC_XOPEN_CRYPT
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR crypt ()
+.IR encrypt ()
+.IR setkey ()
+.fi
+.SS XSI - _XOPEN_REALTIME - _SC_XOPEN_REALTIME
+This option implies the following options:
+.PP
+.PD 0
+.TP
+.BR _POSIX_ASYNCHRONOUS_IO == 200112L
+.TP
+.B _POSIX_FSYNC
+.TP
+.B _POSIX_MAPPED_FILES
+.TP
+.BR _POSIX_MEMLOCK == 200112L
+.TP
+.BR _POSIX_MEMLOCK_RANGE == 200112L
+.TP
+.B _POSIX_MEMORY_PROTECTION
+.TP
+.BR _POSIX_MESSAGE_PASSING == 200112L
+.TP
+.B _POSIX_PRIORITIZED_IO
+.TP
+.BR _POSIX_PRIORITY_SCHEDULING == 200112L
+.TP
+.BR _POSIX_REALTIME_SIGNALS == 200112L
+.TP
+.BR _POSIX_SEMAPHORES == 200112L
+.TP
+.BR _POSIX_SHARED_MEMORY_OBJECTS == 200112L
+.TP
+.BR _POSIX_SYNCHRONIZED_IO == 200112L
+.TP
+.BR _POSIX_TIMERS == 200112L
+.PD
+.\"
+.SS ADV - --- - ---
+The Advanced Realtime option group implies that the following options
+are all defined to 200112L:
+.PP
+.PD 0
+.TP
+.B _POSIX_ADVISORY_INFO
+.TP
+.B _POSIX_CLOCK_SELECTION
+(implies
+.BR _POSIX_TIMERS )
+.TP
+.B _POSIX_CPUTIME
+(implies
+.BR _POSIX_TIMERS )
+.TP
+.B _POSIX_MONOTONIC_CLOCK
+(implies
+.BR _POSIX_TIMERS )
+.TP
+.B _POSIX_SPAWN
+.TP
+.B _POSIX_SPORADIC_SERVER
+(implies
+.BR _POSIX_PRIORITY_SCHEDULING )
+.TP
+.B _POSIX_TIMEOUTS
+.TP
+.B _POSIX_TYPED_MEMORY_OBJECTS
+.PD
+.\"
+.SS XSI - _XOPEN_REALTIME_THREADS - _SC_XOPEN_REALTIME_THREADS
+This option implies that the following options
+are all defined to 200112L:
+.PP
+.PD 0
+.TP
+.B _POSIX_THREAD_PRIO_INHERIT
+.TP
+.B _POSIX_THREAD_PRIO_PROTECT
+.TP
+.B _POSIX_THREAD_PRIORITY_SCHEDULING
+.PD
+.SS ADVANCED REALTIME THREADS - --- - ---
+This option implies that the following options
+are all defined to 200112L:
+.PP
+.PD 0
+.TP
+.B _POSIX_BARRIERS
+(implies
+.BR _POSIX_THREADS ,
+.BR _POSIX_THREAD_SAFE_FUNCTIONS )
+.TP
+.B _POSIX_SPIN_LOCKS
+(implies
+.BR _POSIX_THREADS ,
+.BR _POSIX_THREAD_SAFE_FUNCTIONS )
+.TP
+.B _POSIX_THREAD_CPUTIME
+(implies
+.BR _POSIX_TIMERS )
+.TP
+.B _POSIX_THREAD_SPORADIC_SERVER
+(implies
+.BR _POSIX_THREAD_PRIORITY_SCHEDULING )
+.PD
+.\"
+.SS TRACING - --- - ---
+This option implies that the following options
+are all defined to 200112L:
+.PP
+.PD 0
+.TP
+.B _POSIX_TRACE
+.TP
+.B _POSIX_TRACE_EVENT_FILTER
+.TP
+.B _POSIX_TRACE_LOG
+.TP
+.B _POSIX_TRACE_INHERIT
+.PD
+.SS STREAMS - _XOPEN_STREAMS - _SC_XOPEN_STREAMS
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR fattach ()
+.IR fdetach ()
+.IR getmsg ()
+.IR getpmsg ()
+.IR ioctl ()
+.IR isastream ()
+.IR putmsg ()
+.IR putpmsg ()
+.in
+.fi
+.SS XSI - _XOPEN_LEGACY - _SC_XOPEN_LEGACY
+Functions included in the legacy option group were previously mandatory,
+but are now optional in this version.
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR bcmp ()
+.IR bcopy ()
+.IR bzero ()
+.IR ecvt ()
+.IR fcvt ()
+.IR ftime ()
+.IR gcvt ()
+.IR getwd ()
+.IR index ()
+.IR mktemp ()
+.IR rindex ()
+.IR utimes ()
+.IR wcswcs ()
+.in
+.fi
+.SS XSI - _XOPEN_UNIX - _SC_XOPEN_UNIX
+The following functions are present:
+.PP
+.nf
+.in +4n
+.IR mmap ()
+.IR munmap ()
+.IR msync ()
+.in
+.fi
+.PP
+This option implies the following options:
+.PP
+.PD 0
+.TP
+.B _POSIX_FSYNC
+.TP
+.B _POSIX_MAPPED_FILES
+.TP
+.B _POSIX_MEMORY_PROTECTION
+.TP
+.B _POSIX_THREAD_ATTR_STACKADDR
+.TP
+.B _POSIX_THREAD_ATTR_STACKSIZE
+.TP
+.B _POSIX_THREAD_PROCESS_SHARED
+.TP
+.B _POSIX_THREAD_SAFE_FUNCTIONS
+.TP
+.B _POSIX_THREADS
+.PD
+.PP
+This option may imply the following options from the XSI option groups:
+.PP
+.PD 0
+.TP
+.RB "Encryption (" _XOPEN_CRYPT )
+.TP
+.RB "Realtime (" _XOPEN_REALTIME )
+.TP
+.RB "Advanced Realtime (" ADB )
+.TP
+.RB "Realtime Threads (" _XOPEN_REALTIME_THREADS )
+.TP
+.RB "Advanced Realtime Threads (" "ADVANCED REALTIME THREADS" )
+.TP
+.RB "Tracing (" TRACING )
+.TP
+.RB "XSI Streams (" STREAMS )
+.TP
+.RB "Legacy (" _XOPEN_LEGACY )
+.PD
+.SH SEE ALSO
+.BR sysconf (3),
+.BR standards (7)
diff --git a/man7/precedence.7 b/man7/precedence.7
new file mode 100644
index 0000000..6ef216d
--- /dev/null
+++ b/man7/precedence.7
@@ -0,0 +1 @@
+.so man7/operator.7
diff --git a/man7/process-keyring.7 b/man7/process-keyring.7
new file mode 100644
index 0000000..53557a0
--- /dev/null
+++ b/man7/process-keyring.7
@@ -0,0 +1,55 @@
+.\" Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH process-keyring 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+process-keyring \- per-process shared keyring
+.SH DESCRIPTION
+The process keyring is a keyring used to anchor keys on behalf of a process.
+It is created only when a process requests it.
+The process keyring has the name (description)
+.IR _pid .
+.PP
+A special serial number value,
+.BR KEY_SPEC_PROCESS_KEYRING ,
+is defined that can be used in lieu of the actual serial number of
+the calling process's process keyring.
+.PP
+From the
+.BR keyctl (1)
+utility, '\fB@p\fP' can be used instead of a numeric key ID in
+much the same way, but since
+.BR keyctl (1)
+is a program run after forking, this is of no utility.
+.PP
+A thread created using the
+.BR clone (2)
+.B CLONE_THREAD
+flag has the same process keyring as the caller of
+.BR clone (2).
+When a new process is created using
+.BR fork ()
+it initially has no process keyring.
+A process's process keyring is cleared on
+.BR execve (2).
+The process keyring is destroyed when the last
+thread that refers to it terminates.
+.PP
+If a process doesn't have a process keyring when it is accessed,
+then the process keyring will be created if the keyring is to be modified;
+otherwise, the error
+.B ENOKEY
+results.
+.SH SEE ALSO
+.ad l
+.nh
+.BR keyctl (1),
+.BR keyctl (3),
+.BR keyrings (7),
+.BR persistent\-keyring (7),
+.BR session\-keyring (7),
+.BR thread\-keyring (7),
+.BR user\-keyring (7),
+.BR user\-session\-keyring (7)
diff --git a/man7/pthreads.7 b/man7/pthreads.7
new file mode 100644
index 0000000..2c00798
--- /dev/null
+++ b/man7/pthreads.7
@@ -0,0 +1,937 @@
+.\" Copyright (c) 2005 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH pthreads 7 2023-03-18 "Linux man-pages 6.05.01"
+.SH NAME
+pthreads \- POSIX threads
+.SH DESCRIPTION
+POSIX.1 specifies a set of interfaces (functions, header files) for
+threaded programming commonly known as POSIX threads, or Pthreads.
+A single process can contain multiple threads,
+all of which are executing the same program.
+These threads share the same global memory (data and heap segments),
+but each thread has its own stack (automatic variables).
+.PP
+POSIX.1 also requires that threads share a range of other attributes
+(i.e., these attributes are process-wide rather than per-thread):
+.IP \[bu] 3
+process ID
+.IP \[bu]
+parent process ID
+.IP \[bu]
+process group ID and session ID
+.IP \[bu]
+controlling terminal
+.IP \[bu]
+user and group IDs
+.IP \[bu]
+open file descriptors
+.IP \[bu]
+record locks (see
+.BR fcntl (2))
+.IP \[bu]
+signal dispositions
+.IP \[bu]
+file mode creation mask
+.RB ( umask (2))
+.IP \[bu]
+current directory
+.RB ( chdir (2))
+and
+root directory
+.RB ( chroot (2))
+.IP \[bu]
+interval timers
+.RB ( setitimer (2))
+and POSIX timers
+.RB ( timer_create (2))
+.IP \[bu]
+nice value
+.RB ( setpriority (2))
+.IP \[bu]
+resource limits
+.RB ( setrlimit (2))
+.IP \[bu]
+measurements of the consumption of CPU time
+.RB ( times (2))
+and resources
+.RB ( getrusage (2))
+.PP
+As well as the stack, POSIX.1 specifies that various other
+attributes are distinct for each thread, including:
+.IP \[bu] 3
+thread ID (the
+.I pthread_t
+data type)
+.IP \[bu]
+signal mask
+.RB ( pthread_sigmask (3))
+.IP \[bu]
+the
+.I errno
+variable
+.IP \[bu]
+alternate signal stack
+.RB ( sigaltstack (2))
+.IP \[bu]
+real-time scheduling policy and priority
+.RB ( sched (7))
+.PP
+The following Linux-specific features are also per-thread:
+.IP \[bu] 3
+capabilities (see
+.BR capabilities (7))
+.IP \[bu]
+CPU affinity
+.RB ( sched_setaffinity (2))
+.SS Pthreads function return values
+Most pthreads functions return 0 on success, and an error number on failure.
+The error numbers that can be returned have the same meaning as
+the error numbers returned in
+.I errno
+by conventional system calls and C library functions.
+Note that the pthreads functions do not set
+.IR errno .
+For each of the pthreads functions that can return an error,
+POSIX.1-2001 specifies that the function can never fail with the error
+.BR EINTR .
+.SS Thread IDs
+Each of the threads in a process has a unique thread identifier
+(stored in the type
+.IR pthread_t ).
+This identifier is returned to the caller of
+.BR pthread_create (3),
+and a thread can obtain its own thread identifier using
+.BR pthread_self (3).
+.PP
+Thread IDs are guaranteed to be unique only within a process.
+(In all pthreads functions that accept a thread ID as an argument,
+that ID by definition refers to a thread in
+the same process as the caller.)
+.PP
+The system may reuse a thread ID after a terminated thread has been joined,
+or a detached thread has terminated.
+POSIX says: "If an application attempts to use a thread ID whose
+lifetime has ended, the behavior is undefined."
+.SS Thread-safe functions
+A thread-safe function is one that can be safely
+(i.e., it will deliver the same results regardless of whether it is)
+called from multiple threads at the same time.
+.PP
+POSIX.1-2001 and POSIX.1-2008 require that all functions specified
+in the standard shall be thread-safe,
+except for the following functions:
+.PP
+.in +4n
+.EX
+asctime()
+basename()
+catgets()
+crypt()
+ctermid() if passed a non-NULL argument
+ctime()
+dbm_clearerr()
+dbm_close()
+dbm_delete()
+dbm_error()
+dbm_fetch()
+dbm_firstkey()
+dbm_nextkey()
+dbm_open()
+dbm_store()
+dirname()
+dlerror()
+drand48()
+ecvt() [POSIX.1-2001 only (function removed in POSIX.1-2008)]
+encrypt()
+endgrent()
+endpwent()
+endutxent()
+fcvt() [POSIX.1-2001 only (function removed in POSIX.1-2008)]
+ftw()
+gcvt() [POSIX.1-2001 only (function removed in POSIX.1-2008)]
+getc_unlocked()
+getchar_unlocked()
+getdate()
+getenv()
+getgrent()
+getgrgid()
+getgrnam()
+gethostbyaddr() [POSIX.1-2001 only (function removed in
+ POSIX.1-2008)]
+gethostbyname() [POSIX.1-2001 only (function removed in
+ POSIX.1-2008)]
+gethostent()
+getlogin()
+getnetbyaddr()
+getnetbyname()
+getnetent()
+getopt()
+getprotobyname()
+getprotobynumber()
+getprotoent()
+getpwent()
+getpwnam()
+getpwuid()
+getservbyname()
+getservbyport()
+getservent()
+getutxent()
+getutxid()
+getutxline()
+gmtime()
+hcreate()
+hdestroy()
+hsearch()
+inet_ntoa()
+l64a()
+lgamma()
+lgammaf()
+lgammal()
+localeconv()
+localtime()
+lrand48()
+mrand48()
+nftw()
+nl_langinfo()
+ptsname()
+putc_unlocked()
+putchar_unlocked()
+putenv()
+pututxline()
+rand()
+readdir()
+setenv()
+setgrent()
+setkey()
+setpwent()
+setutxent()
+strerror()
+strsignal() [Added in POSIX.1-2008]
+strtok()
+system() [Added in POSIX.1-2008]
+tmpnam() if passed a non-NULL argument
+ttyname()
+unsetenv()
+wcrtomb() if its final argument is NULL
+wcsrtombs() if its final argument is NULL
+wcstombs()
+wctomb()
+.EE
+.in
+.SS Async-cancel-safe functions
+An async-cancel-safe function is one that can be safely called
+in an application where asynchronous cancelability is enabled (see
+.BR pthread_setcancelstate (3)).
+.PP
+Only the following functions are required to be async-cancel-safe by
+POSIX.1-2001 and POSIX.1-2008:
+.PP
+.in +4n
+.EX
+pthread_cancel()
+pthread_setcancelstate()
+pthread_setcanceltype()
+.EE
+.in
+.SS Cancelation points
+POSIX.1 specifies that certain functions must,
+and certain other functions may, be cancelation points.
+If a thread is cancelable, its cancelability type is deferred,
+and a cancelation request is pending for the thread,
+then the thread is canceled when it calls a function
+that is a cancelation point.
+.PP
+The following functions are required to be cancelation points by
+POSIX.1-2001 and/or POSIX.1-2008:
+.PP
+.\" FIXME
+.\" Document the list of all functions that are cancelation points in glibc
+.in +4n
+.EX
+accept()
+aio_suspend()
+clock_nanosleep()
+close()
+connect()
+creat()
+fcntl() F_SETLKW
+fdatasync()
+fsync()
+getmsg()
+getpmsg()
+lockf() F_LOCK
+mq_receive()
+mq_send()
+mq_timedreceive()
+mq_timedsend()
+msgrcv()
+msgsnd()
+msync()
+nanosleep()
+open()
+openat() [Added in POSIX.1-2008]
+pause()
+poll()
+pread()
+pselect()
+pthread_cond_timedwait()
+pthread_cond_wait()
+pthread_join()
+pthread_testcancel()
+putmsg()
+putpmsg()
+pwrite()
+read()
+readv()
+recv()
+recvfrom()
+recvmsg()
+select()
+sem_timedwait()
+sem_wait()
+send()
+sendmsg()
+sendto()
+sigpause() [POSIX.1-2001 only (moves to "may" list in POSIX.1-2008)]
+sigsuspend()
+sigtimedwait()
+sigwait()
+sigwaitinfo()
+sleep()
+system()
+tcdrain()
+usleep() [POSIX.1-2001 only (function removed in POSIX.1-2008)]
+wait()
+waitid()
+waitpid()
+write()
+writev()
+.EE
+.in
+.PP
+The following functions may be cancelation points according to
+POSIX.1-2001 and/or POSIX.1-2008:
+.PP
+.in +4n
+.EX
+access()
+asctime()
+asctime_r()
+catclose()
+catgets()
+catopen()
+chmod() [Added in POSIX.1-2008]
+chown() [Added in POSIX.1-2008]
+closedir()
+closelog()
+ctermid()
+ctime()
+ctime_r()
+dbm_close()
+dbm_delete()
+dbm_fetch()
+dbm_nextkey()
+dbm_open()
+dbm_store()
+dlclose()
+dlopen()
+dprintf() [Added in POSIX.1-2008]
+endgrent()
+endhostent()
+endnetent()
+endprotoent()
+endpwent()
+endservent()
+endutxent()
+faccessat() [Added in POSIX.1-2008]
+fchmod() [Added in POSIX.1-2008]
+fchmodat() [Added in POSIX.1-2008]
+fchown() [Added in POSIX.1-2008]
+fchownat() [Added in POSIX.1-2008]
+fclose()
+fcntl() (for any value of cmd argument)
+fflush()
+fgetc()
+fgetpos()
+fgets()
+fgetwc()
+fgetws()
+fmtmsg()
+fopen()
+fpathconf()
+fprintf()
+fputc()
+fputs()
+fputwc()
+fputws()
+fread()
+freopen()
+fscanf()
+fseek()
+fseeko()
+fsetpos()
+fstat()
+fstatat() [Added in POSIX.1-2008]
+ftell()
+ftello()
+ftw()
+futimens() [Added in POSIX.1-2008]
+fwprintf()
+fwrite()
+fwscanf()
+getaddrinfo()
+getc()
+getc_unlocked()
+getchar()
+getchar_unlocked()
+getcwd()
+getdate()
+getdelim() [Added in POSIX.1-2008]
+getgrent()
+getgrgid()
+getgrgid_r()
+getgrnam()
+getgrnam_r()
+gethostbyaddr() [POSIX.1-2001 only (function removed in
+ POSIX.1-2008)]
+gethostbyname() [POSIX.1-2001 only (function removed in
+ POSIX.1-2008)]
+gethostent()
+gethostid()
+gethostname()
+getline() [Added in POSIX.1-2008]
+getlogin()
+getlogin_r()
+getnameinfo()
+getnetbyaddr()
+getnetbyname()
+getnetent()
+getopt() (if opterr is nonzero)
+getprotobyname()
+getprotobynumber()
+getprotoent()
+getpwent()
+getpwnam()
+getpwnam_r()
+getpwuid()
+getpwuid_r()
+gets()
+getservbyname()
+getservbyport()
+getservent()
+getutxent()
+getutxid()
+getutxline()
+getwc()
+getwchar()
+getwd() [POSIX.1-2001 only (function removed in POSIX.1-2008)]
+glob()
+iconv_close()
+iconv_open()
+ioctl()
+link()
+linkat() [Added in POSIX.1-2008]
+lio_listio() [Added in POSIX.1-2008]
+localtime()
+localtime_r()
+lockf() [Added in POSIX.1-2008]
+lseek()
+lstat()
+mkdir() [Added in POSIX.1-2008]
+mkdirat() [Added in POSIX.1-2008]
+mkdtemp() [Added in POSIX.1-2008]
+mkfifo() [Added in POSIX.1-2008]
+mkfifoat() [Added in POSIX.1-2008]
+mknod() [Added in POSIX.1-2008]
+mknodat() [Added in POSIX.1-2008]
+mkstemp()
+mktime()
+nftw()
+opendir()
+openlog()
+pathconf()
+pclose()
+perror()
+popen()
+posix_fadvise()
+posix_fallocate()
+posix_madvise()
+posix_openpt()
+posix_spawn()
+posix_spawnp()
+posix_trace_clear()
+posix_trace_close()
+posix_trace_create()
+posix_trace_create_withlog()
+posix_trace_eventtypelist_getnext_id()
+posix_trace_eventtypelist_rewind()
+posix_trace_flush()
+posix_trace_get_attr()
+posix_trace_get_filter()
+posix_trace_get_status()
+posix_trace_getnext_event()
+posix_trace_open()
+posix_trace_rewind()
+posix_trace_set_filter()
+posix_trace_shutdown()
+posix_trace_timedgetnext_event()
+posix_typed_mem_open()
+printf()
+psiginfo() [Added in POSIX.1-2008]
+psignal() [Added in POSIX.1-2008]
+pthread_rwlock_rdlock()
+pthread_rwlock_timedrdlock()
+pthread_rwlock_timedwrlock()
+pthread_rwlock_wrlock()
+putc()
+putc_unlocked()
+putchar()
+putchar_unlocked()
+puts()
+pututxline()
+putwc()
+putwchar()
+readdir()
+readdir_r()
+readlink() [Added in POSIX.1-2008]
+readlinkat() [Added in POSIX.1-2008]
+remove()
+rename()
+renameat() [Added in POSIX.1-2008]
+rewind()
+rewinddir()
+scandir() [Added in POSIX.1-2008]
+scanf()
+seekdir()
+semop()
+setgrent()
+sethostent()
+setnetent()
+setprotoent()
+setpwent()
+setservent()
+setutxent()
+sigpause() [Added in POSIX.1-2008]
+stat()
+strerror()
+strerror_r()
+strftime()
+symlink()
+symlinkat() [Added in POSIX.1-2008]
+sync()
+syslog()
+tmpfile()
+tmpnam()
+ttyname()
+ttyname_r()
+tzset()
+ungetc()
+ungetwc()
+unlink()
+unlinkat() [Added in POSIX.1-2008]
+utime() [Added in POSIX.1-2008]
+utimensat() [Added in POSIX.1-2008]
+utimes() [Added in POSIX.1-2008]
+vdprintf() [Added in POSIX.1-2008]
+vfprintf()
+vfwprintf()
+vprintf()
+vwprintf()
+wcsftime()
+wordexp()
+wprintf()
+wscanf()
+.EE
+.in
+.PP
+An implementation may also mark other functions
+not specified in the standard as cancelation points.
+In particular, an implementation is likely to mark
+any nonstandard function that may block as a cancelation point.
+(This includes most functions that can touch files.)
+.PP
+It should be noted that even if an application is not using
+asynchronous cancelation, that calling a function from the above list
+from an asynchronous signal handler may cause the equivalent of
+asynchronous cancelation.
+The underlying user code may not expect
+asynchronous cancelation and the state of the user data may become
+inconsistent.
+Therefore signals should be used with caution when
+entering a region of deferred cancelation.
+.\" So, scanning "cancelation point" comments in the glibc 2.8 header
+.\" files, it looks as though at least the following nonstandard
+.\" functions are cancelation points:
+.\" endnetgrent
+.\" endspent
+.\" epoll_pwait
+.\" epoll_wait
+.\" fcloseall
+.\" fdopendir
+.\" fflush_unlocked
+.\" fgetc_unlocked
+.\" fgetgrent
+.\" fgetgrent_r
+.\" fgetpwent
+.\" fgetpwent_r
+.\" fgets_unlocked
+.\" fgetspent
+.\" fgetspent_r
+.\" fgetwc_unlocked
+.\" fgetws_unlocked
+.\" fputc_unlocked
+.\" fputs_unlocked
+.\" fputwc_unlocked
+.\" fputws_unlocked
+.\" fread_unlocked
+.\" fwrite_unlocked
+.\" gai_suspend
+.\" getaddrinfo_a
+.\" getdate_r
+.\" getgrent_r
+.\" getgrouplist
+.\" gethostbyaddr_r
+.\" gethostbyname2
+.\" gethostbyname2_r
+.\" gethostbyname_r
+.\" gethostent_r
+.\" getnetbyaddr_r
+.\" getnetbyname_r
+.\" getnetent_r
+.\" getnetgrent
+.\" getnetgrent_r
+.\" getprotobyname_r
+.\" getprotobynumber_r
+.\" getprotoent_r
+.\" getpw
+.\" getpwent_r
+.\" getservbyname_r
+.\" getservbyport_r
+.\" getservent_r
+.\" getspent
+.\" getspent_r
+.\" getspnam
+.\" getspnam_r
+.\" getutmp
+.\" getutmpx
+.\" getw
+.\" getwc_unlocked
+.\" getwchar_unlocked
+.\" initgroups
+.\" innetgr
+.\" mkostemp
+.\" mkostemp64
+.\" mkstemp64
+.\" ppoll
+.\" pthread_timedjoin_np
+.\" putgrent
+.\" putpwent
+.\" putspent
+.\" putw
+.\" putwc_unlocked
+.\" putwchar_unlocked
+.\" rcmd
+.\" rcmd_af
+.\" rexec
+.\" rexec_af
+.\" rresvport
+.\" rresvport_af
+.\" ruserok
+.\" ruserok_af
+.\" setnetgrent
+.\" setspent
+.\" sgetspent
+.\" sgetspent_r
+.\" updwtmpx
+.\" utmpxname
+.\" vfscanf
+.\" vfwscanf
+.\" vscanf
+.\" vsyslog
+.\" vwscanf
+.SS Compiling on Linux
+On Linux, programs that use the Pthreads API should be compiled using
+.IR "cc \-pthread" .
+.SS Linux implementations of POSIX threads
+Over time, two threading implementations have been provided by
+the GNU C library on Linux:
+.TP
+.B LinuxThreads
+This is the original Pthreads implementation.
+Since glibc 2.4, this implementation is no longer supported.
+.TP
+.BR NPTL " (Native POSIX Threads Library)"
+This is the modern Pthreads implementation.
+By comparison with LinuxThreads, NPTL provides closer conformance to
+the requirements of the POSIX.1 specification and better performance
+when creating large numbers of threads.
+NPTL is available since glibc 2.3.2,
+and requires features that are present in the Linux 2.6 kernel.
+.PP
+Both of these are so-called 1:1 implementations, meaning that each
+thread maps to a kernel scheduling entity.
+Both threading implementations employ the Linux
+.BR clone (2)
+system call.
+In NPTL, thread synchronization primitives (mutexes,
+thread joining, and so on) are implemented using the Linux
+.BR futex (2)
+system call.
+.SS LinuxThreads
+The notable features of this implementation are the following:
+.IP \[bu] 3
+In addition to the main (initial) thread,
+and the threads that the program creates using
+.BR pthread_create (3),
+the implementation creates a "manager" thread.
+This thread handles thread creation and termination.
+(Problems can result if this thread is inadvertently killed.)
+.IP \[bu]
+Signals are used internally by the implementation.
+On Linux 2.2 and later, the first three real-time signals are used
+(see also
+.BR signal (7)).
+On older Linux kernels,
+.B SIGUSR1
+and
+.B SIGUSR2
+are used.
+Applications must avoid the use of whichever set of signals is
+employed by the implementation.
+.IP \[bu]
+Threads do not share process IDs.
+(In effect, LinuxThreads threads are implemented as processes which share
+more information than usual, but which do not share a common process ID.)
+LinuxThreads threads (including the manager thread)
+are visible as separate processes using
+.BR ps (1).
+.PP
+The LinuxThreads implementation deviates from the POSIX.1
+specification in a number of ways, including the following:
+.IP \[bu] 3
+Calls to
+.BR getpid (2)
+return a different value in each thread.
+.IP \[bu]
+Calls to
+.BR getppid (2)
+in threads other than the main thread return the process ID of the
+manager thread; instead
+.BR getppid (2)
+in these threads should return the same value as
+.BR getppid (2)
+in the main thread.
+.IP \[bu]
+When one thread creates a new child process using
+.BR fork (2),
+any thread should be able to
+.BR wait (2)
+on the child.
+However, the implementation allows only the thread that
+created the child to
+.BR wait (2)
+on it.
+.IP \[bu]
+When a thread calls
+.BR execve (2),
+all other threads are terminated (as required by POSIX.1).
+However, the resulting process has the same PID as the thread that called
+.BR execve (2):
+it should have the same PID as the main thread.
+.IP \[bu]
+Threads do not share user and group IDs.
+This can cause complications with set-user-ID programs and
+can cause failures in Pthreads functions if an application
+changes its credentials using
+.BR seteuid (2)
+or similar.
+.IP \[bu]
+Threads do not share a common session ID and process group ID.
+.IP \[bu]
+Threads do not share record locks created using
+.BR fcntl (2).
+.IP \[bu]
+The information returned by
+.BR times (2)
+and
+.BR getrusage (2)
+is per-thread rather than process-wide.
+.IP \[bu]
+Threads do not share semaphore undo values (see
+.BR semop (2)).
+.IP \[bu]
+Threads do not share interval timers.
+.IP \[bu]
+Threads do not share a common nice value.
+.IP \[bu]
+POSIX.1 distinguishes the notions of signals that are directed
+to the process as a whole and signals that are directed to individual
+threads.
+According to POSIX.1, a process-directed signal (sent using
+.BR kill (2),
+for example) should be handled by a single,
+arbitrarily selected thread within the process.
+LinuxThreads does not support the notion of process-directed signals:
+signals may be sent only to specific threads.
+.IP \[bu]
+Threads have distinct alternate signal stack settings.
+However, a new thread's alternate signal stack settings
+are copied from the thread that created it, so that
+the threads initially share an alternate signal stack.
+(A new thread should start with no alternate signal stack defined.
+If two threads handle signals on their shared alternate signal
+stack at the same time, unpredictable program failures are
+likely to occur.)
+.SS NPTL
+With NPTL, all of the threads in a process are placed
+in the same thread group;
+all members of a thread group share the same PID.
+NPTL does not employ a manager thread.
+.PP
+NPTL makes internal use of the first two real-time signals;
+these signals cannot be used in applications.
+See
+.BR nptl (7)
+for further details.
+.PP
+NPTL still has at least one nonconformance with POSIX.1:
+.IP \[bu] 3
+Threads do not share a common nice value.
+.\" FIXME . bug report filed for NPTL nice nonconformance
+.\" http://bugzilla.kernel.org/show_bug.cgi?id=6258
+.\" Sep 08: there is a patch by Denys Vlasenko to address this
+.\" "make setpriority POSIX compliant; introduce PRIO_THREAD extension"
+.\" Monitor this to see if it makes it into mainline.
+.PP
+Some NPTL nonconformances occur only with older kernels:
+.IP \[bu] 3
+The information returned by
+.BR times (2)
+and
+.BR getrusage (2)
+is per-thread rather than process-wide (fixed in Linux 2.6.9).
+.IP \[bu]
+Threads do not share resource limits (fixed in Linux 2.6.10).
+.IP \[bu]
+Threads do not share interval timers (fixed in Linux 2.6.12).
+.IP \[bu]
+Only the main thread is permitted to start a new session using
+.BR setsid (2)
+(fixed in Linux 2.6.16).
+.IP \[bu]
+Only the main thread is permitted to make the process into a
+process group leader using
+.BR setpgid (2)
+(fixed in Linux 2.6.16).
+.IP \[bu]
+Threads have distinct alternate signal stack settings.
+However, a new thread's alternate signal stack settings
+are copied from the thread that created it, so that
+the threads initially share an alternate signal stack
+(fixed in Linux 2.6.16).
+.PP
+Note the following further points about the NPTL implementation:
+.IP \[bu] 3
+If the stack size soft resource limit (see the description of
+.B RLIMIT_STACK
+in
+.BR setrlimit (2))
+is set to a value other than
+.IR unlimited ,
+then this value defines the default stack size for new threads.
+To be effective, this limit must be set before the program
+is executed, perhaps using the
+.I ulimit \-s
+shell built-in command
+.RI ( "limit stacksize"
+in the C shell).
+.SS Determining the threading implementation
+Since glibc 2.3.2, the
+.BR getconf (1)
+command can be used to determine
+the system's threading implementation, for example:
+.PP
+.in +4n
+.EX
+bash$ getconf GNU_LIBPTHREAD_VERSION
+NPTL 2.3.4
+.EE
+.in
+.PP
+With older glibc versions, a command such as the following should
+be sufficient to determine the default threading implementation:
+.PP
+.in +4n
+.EX
+bash$ $( ldd /bin/ls | grep libc.so | awk \[aq]{print $3}\[aq] ) | \e
+ egrep \-i \[aq]threads|nptl\[aq]
+ Native POSIX Threads Library by Ulrich Drepper et al
+.EE
+.in
+.SS Selecting the threading implementation: LD_ASSUME_KERNEL
+On systems with a glibc that supports both LinuxThreads and NPTL
+(i.e., glibc 2.3.\fIx\fP), the
+.B LD_ASSUME_KERNEL
+environment variable can be used to override
+the dynamic linker's default choice of threading implementation.
+This variable tells the dynamic linker to assume that it is
+running on top of a particular kernel version.
+By specifying a kernel version that does not
+provide the support required by NPTL, we can force the use
+of LinuxThreads.
+(The most likely reason for doing this is to run a
+(broken) application that depends on some nonconformant behavior
+in LinuxThreads.)
+For example:
+.PP
+.in +4n
+.EX
+bash$ $( LD_ASSUME_KERNEL=2.2.5 ldd /bin/ls | grep libc.so | \e
+ awk \[aq]{print $3}\[aq] ) | egrep \-i \[aq]threads|nptl\[aq]
+ linuxthreads\-0.10 by Xavier Leroy
+.EE
+.in
+.SH SEE ALSO
+.ad l
+.nh
+.BR clone (2),
+.BR fork (2),
+.BR futex (2),
+.BR gettid (2),
+.BR proc (5),
+.BR attributes (7),
+.BR futex (7),
+.BR nptl (7),
+.BR sigevent (7),
+.BR signal (7)
+.PP
+Various Pthreads manual pages, for example:
+.BR pthread_atfork (3),
+.BR pthread_attr_init (3),
+.BR pthread_cancel (3),
+.BR pthread_cleanup_push (3),
+.BR pthread_cond_signal (3),
+.BR pthread_cond_wait (3),
+.BR pthread_create (3),
+.BR pthread_detach (3),
+.BR pthread_equal (3),
+.BR pthread_exit (3),
+.BR pthread_key_create (3),
+.BR pthread_kill (3),
+.BR pthread_mutex_lock (3),
+.BR pthread_mutex_unlock (3),
+.BR pthread_mutexattr_destroy (3),
+.BR pthread_mutexattr_init (3),
+.BR pthread_once (3),
+.BR pthread_spin_init (3),
+.BR pthread_spin_lock (3),
+.BR pthread_rwlockattr_setkind_np (3),
+.BR pthread_setcancelstate (3),
+.BR pthread_setcanceltype (3),
+.BR pthread_setspecific (3),
+.BR pthread_sigmask (3),
+.BR pthread_sigqueue (3),
+and
+.BR pthread_testcancel (3)
diff --git a/man7/pty.7 b/man7/pty.7
new file mode 100644
index 0000000..5d9b429
--- /dev/null
+++ b/man7/pty.7
@@ -0,0 +1,158 @@
+.\" Copyright (C) 2005 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH pty 7 2022-12-04 "Linux man-pages 6.05.01"
+.SH NAME
+pty \- pseudoterminal interfaces
+.SH DESCRIPTION
+A pseudoterminal (sometimes abbreviated "pty")
+is a pair of virtual character devices that
+provide a bidirectional communication channel.
+One end of the channel is called the
+.IR master ;
+the other end is called the
+.IR slave .
+.PP
+The slave end of the pseudoterminal provides an interface
+that behaves exactly like a classical terminal.
+A process that expects to be connected to a terminal,
+can open the slave end of a pseudoterminal and
+then be driven by a program that has opened the master end.
+Anything that is written on the master end is provided to the process
+on the slave end as though it was input typed on a terminal.
+For example, writing the interrupt character (usually control-C)
+to the master device would cause an interrupt signal
+.RB ( SIGINT )
+to be generated for the foreground process group
+that is connected to the slave.
+Conversely, anything that is written to the slave end of the
+pseudoterminal can be read by the process that is connected to
+the master end.
+.PP
+Data flow between master and slave is handled asynchronously,
+much like data flow with a physical terminal.
+Data written to the slave will be available at the master promptly,
+but may not be available immediately.
+Similarly, there may be a small processing delay between
+a write to the master, and the effect being visible at the slave.
+.PP
+Historically, two pseudoterminal APIs have evolved: BSD and System V.
+SUSv1 standardized a pseudoterminal API based on the System V API,
+and this API should be employed in all new programs that use
+pseudoterminals.
+.PP
+Linux provides both BSD-style and (standardized) System V-style
+pseudoterminals.
+System V-style terminals are commonly called UNIX 98 pseudoterminals
+on Linux systems.
+.PP
+Since Linux 2.6.4, BSD-style pseudoterminals are considered deprecated:
+support can be disabled when building the kernel by disabling the
+.B CONFIG_LEGACY_PTYS
+option.
+(Starting with Linux 2.6.30,
+that option is disabled by default in the mainline kernel.)
+UNIX 98 pseudoterminals should be used in new applications.
+.SS UNIX 98 pseudoterminals
+An unused UNIX 98 pseudoterminal master is opened by calling
+.BR posix_openpt (3).
+(This function opens the master clone device,
+.IR /dev/ptmx ;
+see
+.BR pts (4).)
+After performing any program-specific initializations,
+changing the ownership and permissions of the slave device using
+.BR grantpt (3),
+and unlocking the slave using
+.BR unlockpt (3)),
+the corresponding slave device can be opened by passing
+the name returned by
+.BR ptsname (3)
+in a call to
+.BR open (2).
+.PP
+The Linux kernel imposes a limit on the number of available
+UNIX 98 pseudoterminals.
+Up to and including Linux 2.6.3, this limit is configured
+at kernel compilation time
+.RB ( CONFIG_UNIX98_PTYS ),
+and the permitted number of pseudoterminals can be up to 2048,
+with a default setting of 256.
+Since Linux 2.6.4, the limit is dynamically adjustable via
+.IR /proc/sys/kernel/pty/max ,
+and a corresponding file,
+.IR /proc/sys/kernel/pty/nr ,
+indicates how many pseudoterminals are currently in use.
+For further details on these two files, see
+.BR proc (5).
+.SS BSD pseudoterminals
+BSD-style pseudoterminals are provided as precreated pairs, with
+names of the form
+.I /dev/ptyXY
+(master) and
+.I /dev/ttyXY
+(slave),
+where X is a letter from the 16-character set [p\-za\-e],
+and Y is a letter from the 16-character set [0\-9a\-f].
+(The precise range of letters in these two sets varies across UNIX
+implementations.)
+For example,
+.I /dev/ptyp1
+and
+.I /dev/ttyp1
+constitute a BSD pseudoterminal pair.
+A process finds an unused pseudoterminal pair by trying to
+.BR open (2)
+each pseudoterminal master until an open succeeds.
+The corresponding pseudoterminal slave (substitute "tty"
+for "pty" in the name of the master) can then be opened.
+.SH FILES
+.TP
+.I /dev/ptmx
+UNIX 98 master clone device
+.TP
+.I /dev/pts/*
+UNIX 98 slave devices
+.TP
+.I /dev/pty[p\-za\-e][0\-9a\-f]
+BSD master devices
+.TP
+.I /dev/tty[p\-za\-e][0\-9a\-f]
+BSD slave devices
+.SH NOTES
+Pseudoterminals are used by applications such as network login services
+.RB ( ssh "(1), " rlogin "(1), " telnet (1)),
+terminal emulators such as
+.BR xterm (1),
+.BR script (1),
+.BR screen (1),
+.BR tmux (1),
+.BR unbuffer (1),
+and
+.BR expect (1).
+.PP
+A description of the
+.B TIOCPKT
+.BR ioctl (2),
+which controls packet mode operation, can be found in
+.BR ioctl_tty (2).
+.PP
+The BSD
+.BR ioctl (2)
+operations
+.BR TIOCSTOP ,
+.BR TIOCSTART ,
+.BR TIOCUCNTL ,
+and
+.B TIOCREMOTE
+have not been implemented under Linux.
+.SH SEE ALSO
+.BR ioctl_tty (2),
+.BR select (2),
+.BR setsid (2),
+.BR forkpty (3),
+.BR openpty (3),
+.BR termios (3),
+.BR pts (4),
+.BR tty (4)
diff --git a/man7/queue.7 b/man7/queue.7
new file mode 100644
index 0000000..6f2d5ab
--- /dev/null
+++ b/man7/queue.7
@@ -0,0 +1,138 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\" and Copyright (c) 2020 by Alejandro Colomar <alx@kernel.org>
+.\"
+.\" SPDX-License-Identifier: BSD-3-Clause
+.\"
+.\"
+.TH queue 7 2023-03-30 "Linux man-pages 6.05.01"
+.SH NAME
+queue \- implementations of linked lists and queues
+.SH DESCRIPTION
+The
+.I <sys/queue.h>
+header file provides a set of macros that
+define and operate on the following data structures:
+.TP
+SLIST
+singly linked lists
+.TP
+LIST
+doubly linked lists
+.TP
+STAILQ
+singly linked tail queues
+.TP
+TAILQ
+doubly linked tail queues
+.TP
+CIRCLEQ
+doubly linked circular queues
+.PP
+All structures support the following functionality:
+.IP \[bu] 3
+Insertion of a new entry at the head of the list.
+.IP \[bu]
+Insertion of a new entry after any element in the list.
+.IP \[bu]
+O(1) removal of an entry from the head of the list.
+.IP \[bu]
+Forward traversal through the list.
+.\".IP *
+.\" Swapping the contents of two lists.
+.PP
+Code size and execution time
+depend on the complexity of the data structure being used,
+so programmers should take care to choose the appropriate one.
+.SS Singly linked lists (SLIST)
+Singly linked lists are the simplest
+and support only the above functionality.
+Singly linked lists are ideal for applications with
+large datasets and few or no removals,
+or for implementing a LIFO queue.
+Singly linked lists add the following functionality:
+.IP \[bu] 3
+O(n) removal of any entry in the list.
+.SS Singly linked tail queues (STAILQ)
+Singly linked tail queues add the following functionality:
+.IP \[bu] 3
+Entries can be added at the end of a list.
+.IP \[bu]
+O(n) removal of any entry in the list.
+.IP \[bu]
+They may be concatenated.
+.PP
+However:
+.IP \[bu] 3
+All list insertions must specify the head of the list.
+.IP \[bu]
+Each head entry requires two pointers rather than one.
+.PP
+Singly linked tail queues are ideal for applications with
+large datasets and few or no removals,
+or for implementing a FIFO queue.
+.SS Doubly linked data structures
+All doubly linked types of data structures (lists and tail queues)
+additionally allow:
+.IP \[bu] 3
+Insertion of a new entry before any element in the list.
+.IP \[bu]
+O(1) removal of any entry in the list.
+.PP
+However:
+.IP \[bu] 3
+Each element requires two pointers rather than one.
+.SS Doubly linked lists (LIST)
+Linked lists are the simplest of the doubly linked data structures.
+They add the following functionality over the above:
+.IP \[bu] 3
+They may be traversed backwards.
+.PP
+However:
+.IP \[bu] 3
+To traverse backwards, an entry to begin the traversal and the list in
+which it is contained must be specified.
+.SS Doubly linked tail queues (TAILQ)
+Tail queues add the following functionality:
+.IP \[bu] 3
+Entries can be added at the end of a list.
+.IP \[bu]
+They may be traversed backwards, from tail to head.
+.IP \[bu]
+They may be concatenated.
+.PP
+However:
+.IP \[bu] 3
+All list insertions and removals must specify the head of the list.
+.IP \[bu]
+Each head entry requires two pointers rather than one.
+.SS Doubly linked circular queues (CIRCLEQ)
+Circular queues add the following functionality over the above:
+.IP \[bu] 3
+The first and last entries are connected.
+.PP
+However:
+.IP \[bu] 3
+The termination condition for traversal is more complex.
+.SH STANDARDS
+BSD.
+.SH HISTORY
+.I <sys/queue.h>
+macros first appeared in 4.4BSD.
+.SH NOTES
+Some BSDs provide SIMPLEQ instead of STAILQ.
+They are identical, but for historical reasons
+they were named differently on different BSDs.
+STAILQ originated on FreeBSD, and SIMPLEQ originated on NetBSD.
+For compatibility reasons, some systems provide both sets of macros.
+glibc provides both STAILQ and SIMPLEQ,
+which are identical except for a missing SIMPLEQ equivalent to
+.BR STAILQ_CONCAT ().
+.SH SEE ALSO
+.BR circleq (3),
+.BR insque (3),
+.BR list (3),
+.BR slist (3),
+.BR stailq (3),
+.BR tailq (3)
+.\" .BR tree (3)
diff --git a/man7/random.7 b/man7/random.7
new file mode 100644
index 0000000..bca67ce
--- /dev/null
+++ b/man7/random.7
@@ -0,0 +1,213 @@
+'\" t
+.\" Copyright (C) 2008, George Spelvin <linux@horizon.com>,
+.\" and Copyright (C) 2008, Matt Mackall <mpm@selenic.com>
+.\" and Copyright (C) 2016, Laurent Georget <laurent.georget@supelec.fr>
+.\" and Copyright (C) 2016, Nikos Mavrogiannopoulos <nmav@redhat.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" The following web page is quite informative:
+.\" http://www.2uo.de/myths-about-urandom/
+.\"
+.TH random 7 2023-02-10 "Linux man-pages 6.05.01"
+.SH NAME
+random \- overview of interfaces for obtaining randomness
+.SH DESCRIPTION
+The kernel random-number generator relies on entropy gathered from
+device drivers and other sources of environmental noise to seed
+a cryptographically secure pseudorandom number generator (CSPRNG).
+It is designed for security, rather than speed.
+.PP
+The following interfaces provide access to output from the kernel CSPRNG:
+.IP \[bu] 3
+The
+.I /dev/urandom
+and
+.I /dev/random
+devices, both described in
+.BR random (4).
+These devices have been present on Linux since early times,
+and are also available on many other systems.
+.IP \[bu]
+The Linux-specific
+.BR getrandom (2)
+system call, available since Linux 3.17.
+This system call provides access either to the same source as
+.I /dev/urandom
+(called the
+.I urandom
+source in this page)
+or to the same source as
+.I /dev/random
+(called the
+.I random
+source in this page).
+The default is the
+.I urandom
+source; the
+.I random
+source is selected by specifying the
+.B GRND_RANDOM
+flag to the system call.
+(The
+.BR getentropy (3)
+function provides a slightly more portable interface on top of
+.BR getrandom (2).)
+.\"
+.SS Initialization of the entropy pool
+The kernel collects bits of entropy from the environment.
+When a sufficient number of random bits has been collected, the
+entropy pool is considered to be initialized.
+.SS Choice of random source
+Unless you are doing long-term key generation (and most likely not even
+then), you probably shouldn't be reading from the
+.I /dev/random
+device or employing
+.BR getrandom (2)
+with the
+.B GRND_RANDOM
+flag.
+Instead, either read from the
+.I /dev/urandom
+device or employ
+.BR getrandom (2)
+without the
+.B GRND_RANDOM
+flag.
+The cryptographic algorithms used for the
+.I urandom
+source are quite conservative, and so should be sufficient for all purposes.
+.PP
+The disadvantage of
+.B GRND_RANDOM
+and reads from
+.I /dev/random
+is that the operation can block for an indefinite period of time.
+Furthermore, dealing with the partially fulfilled
+requests that can occur when using
+.B GRND_RANDOM
+or when reading from
+.I /dev/random
+increases code complexity.
+.\"
+.SS Monte Carlo and other probabilistic sampling applications
+Using these interfaces to provide large quantities of data for
+Monte Carlo simulations or other programs/algorithms which are
+doing probabilistic sampling will be slow.
+Furthermore, it is unnecessary, because such applications do not
+need cryptographically secure random numbers.
+Instead, use the interfaces described in this page to obtain
+a small amount of data to seed a user-space pseudorandom
+number generator for use by such applications.
+.\"
+.SS Comparison between getrandom, /dev/urandom, and /dev/random
+The following table summarizes the behavior of the various
+interfaces that can be used to obtain randomness.
+.B GRND_NONBLOCK
+is a flag that can be used to control the blocking behavior of
+.BR getrandom (2).
+The final column of the table considers the case that can occur
+in early boot time when the entropy pool is not yet initialized.
+.ad l
+.TS
+allbox;
+lbw13 lbw12 lbw14 lbw18
+l l l l.
+Interface Pool T{
+Blocking
+\%behavior
+T} T{
+Behavior when pool is not yet ready
+T}
+T{
+.I /dev/random
+T} T{
+Blocking pool
+T} T{
+If entropy too low, blocks until there is enough entropy again
+T} T{
+Blocks until enough entropy gathered
+T}
+T{
+.I /dev/urandom
+T} T{
+CSPRNG output
+T} T{
+Never blocks
+T} T{
+Returns output from uninitialized CSPRNG (may be low entropy and unsuitable for cryptography)
+T}
+T{
+.BR getrandom ()
+T} T{
+Same as
+.I /dev/urandom
+T} T{
+Does not block once is pool ready
+T} T{
+Blocks until pool ready
+T}
+T{
+.BR getrandom ()
+.B GRND_RANDOM
+T} T{
+Same as
+.I /dev/random
+T} T{
+If entropy too low, blocks until there is enough entropy again
+T} T{
+Blocks until pool ready
+T}
+T{
+.BR getrandom ()
+.B GRND_NONBLOCK
+T} T{
+Same as
+.I /dev/urandom
+T} T{
+Does not block once is pool ready
+T} T{
+.B EAGAIN
+T}
+T{
+.BR getrandom ()
+.B GRND_RANDOM
++
+.B GRND_NONBLOCK
+T} T{
+Same as
+.I /dev/random
+T} T{
+.B EAGAIN
+if not enough entropy available
+T} T{
+.B EAGAIN
+T}
+.TE
+.ad
+.\"
+.SS Generating cryptographic keys
+The amount of seed material required to generate a cryptographic key
+equals the effective key size of the key.
+For example, a 3072-bit RSA
+or Diffie-Hellman private key has an effective key size of 128 bits
+(it requires about 2\[ha]128 operations to break) so a key generator
+needs only 128 bits (16 bytes) of seed material from
+.IR /dev/random .
+.PP
+While some safety margin above that minimum is reasonable, as a guard
+against flaws in the CSPRNG algorithm, no cryptographic primitive
+available today can hope to promise more than 256 bits of security,
+so if any program reads more than 256 bits (32 bytes) from the kernel
+random pool per invocation, or per reasonable reseed interval (not less
+than one minute), that should be taken as a sign that its cryptography is
+.I not
+skillfully implemented.
+.\"
+.SH SEE ALSO
+.BR getrandom (2),
+.BR getauxval (3),
+.BR getentropy (3),
+.BR random (4),
+.BR urandom (4),
+.BR signal (7)
diff --git a/man7/raw.7 b/man7/raw.7
new file mode 100644
index 0000000..ab43dd4
--- /dev/null
+++ b/man7/raw.7
@@ -0,0 +1,281 @@
+'\" t
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\"
+.\" $Id: raw.7,v 1.6 1999/06/05 10:32:08 freitag Exp $
+.\"
+.TH raw 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+raw \- Linux IPv4 raw sockets
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <netinet/in.h>
+.BI "raw_socket = socket(AF_INET, SOCK_RAW, int " protocol );
+.fi
+.SH DESCRIPTION
+Raw sockets allow new IPv4 protocols to be implemented in user space.
+A raw socket receives or sends the raw datagram not
+including link level headers.
+.PP
+The IPv4 layer generates an IP header when sending a packet unless the
+.B IP_HDRINCL
+socket option is enabled on the socket.
+When it is enabled, the packet must contain an IP header.
+For receiving, the IP header is always included in the packet.
+.PP
+In order to create a raw socket, a process must have the
+.B CAP_NET_RAW
+capability in the user namespace that governs its network namespace.
+.PP
+All packets or errors matching the
+.I protocol
+number specified
+for the raw socket are passed to this socket.
+For a list of the allowed protocols,
+see the IANA list of assigned protocol numbers at
+.UR http://www.iana.org/assignments/protocol\-numbers/
+.UE
+and
+.BR getprotobyname (3).
+.PP
+A protocol of
+.B IPPROTO_RAW
+implies enabled
+.B IP_HDRINCL
+and is able to send any IP protocol that is specified in the passed
+header.
+Receiving of all IP protocols via
+.B IPPROTO_RAW
+is not possible using raw sockets.
+.RS
+.TS
+tab(:) allbox;
+c s
+l l.
+IP Header fields modified on sending by \fBIP_HDRINCL\fP
+IP Checksum:Always filled in
+Source Address:Filled in when zero
+Packet ID:Filled in when zero
+Total Length:Always filled in
+.TE
+.RE
+.PP
+If
+.B IP_HDRINCL
+is specified and the IP header has a nonzero destination address, then
+the destination address of the socket is used to route the packet.
+When
+.B MSG_DONTROUTE
+is specified, the destination address should refer to a local interface,
+otherwise a routing table lookup is done anyway but gatewayed routes
+are ignored.
+.PP
+If
+.B IP_HDRINCL
+isn't set, then IP header options can be set on raw sockets with
+.BR setsockopt (2);
+see
+.BR ip (7)
+for more information.
+.PP
+Starting with Linux 2.2, all IP header fields and options can be set using
+IP socket options.
+This means raw sockets are usually needed only for new
+protocols or protocols with no user interface (like ICMP).
+.PP
+When a packet is received, it is passed to any raw sockets which have
+been bound to its protocol before it is passed to other protocol handlers
+(e.g., kernel protocol modules).
+.SS Address format
+For sending and receiving datagrams
+.RB ( sendto (2),
+.BR recvfrom (2),
+and similar),
+raw sockets use the standard
+.I sockaddr_in
+address structure defined in
+.BR ip (7).
+The
+.I sin_port
+field could be used to specify the IP protocol number,
+but it is ignored for sending in Linux 2.2 and later, and should be always
+set to 0 (see BUGS).
+For incoming packets,
+.I sin_port
+.\" commit f59fc7f30b710d45aadf715460b3e60dbe9d3418
+is set to zero.
+.SS Socket options
+Raw socket options can be set with
+.BR setsockopt (2)
+and read with
+.BR getsockopt (2)
+by passing the
+.B IPPROTO_RAW
+.\" Or SOL_RAW on Linux
+family flag.
+.TP
+.B ICMP_FILTER
+Enable a special filter for raw sockets bound to the
+.B IPPROTO_ICMP
+protocol.
+The value has a bit set for each ICMP message type which
+should be filtered out.
+The default is to filter no ICMP messages.
+.PP
+In addition, all
+.BR ip (7)
+.B IPPROTO_IP
+socket options valid for datagram sockets are supported.
+.SS Error handling
+Errors originating from the network are passed to the user only when the
+socket is connected or the
+.B IP_RECVERR
+flag is enabled.
+For connected sockets, only
+.B EMSGSIZE
+and
+.B EPROTO
+are passed for compatibility.
+With
+.BR IP_RECVERR ,
+all network errors are saved in the error queue.
+.SH ERRORS
+.TP
+.B EACCES
+User tried to send to a broadcast address without having the
+broadcast flag set on the socket.
+.TP
+.B EFAULT
+An invalid memory address was supplied.
+.TP
+.B EINVAL
+Invalid argument.
+.TP
+.B EMSGSIZE
+Packet too big.
+Either Path MTU Discovery is enabled (the
+.B IP_MTU_DISCOVER
+socket flag) or the packet size exceeds the maximum allowed IPv4
+packet size of 64\ kB.
+.TP
+.B EOPNOTSUPP
+Invalid flag has been passed to a socket call (like
+.BR MSG_OOB ).
+.TP
+.B EPERM
+The user doesn't have permission to open raw sockets.
+Only processes with an effective user ID of 0 or the
+.B CAP_NET_RAW
+attribute may do that.
+.TP
+.B EPROTO
+An ICMP error has arrived reporting a parameter problem.
+.SH VERSIONS
+.B IP_RECVERR
+and
+.B ICMP_FILTER
+are new in Linux 2.2.
+They are Linux extensions and should not be used in portable programs.
+.PP
+Linux 2.0 enabled some bug-to-bug compatibility with BSD in the
+raw socket code when the
+.B SO_BSDCOMPAT
+socket option was set; since Linux 2.2,
+this option no longer has that effect.
+.SH NOTES
+By default, raw sockets do path MTU (Maximum Transmission Unit) discovery.
+This means the kernel
+will keep track of the MTU to a specific target IP address and return
+.B EMSGSIZE
+when a raw packet write exceeds it.
+When this happens, the application should decrease the packet size.
+Path MTU discovery can be also turned off using the
+.B IP_MTU_DISCOVER
+socket option or the
+.I /proc/sys/net/ipv4/ip_no_pmtu_disc
+file, see
+.BR ip (7)
+for details.
+When turned off, raw sockets will fragment outgoing packets
+that exceed the interface MTU.
+However, disabling it is not recommended
+for performance and reliability reasons.
+.PP
+A raw socket can be bound to a specific local address using the
+.BR bind (2)
+call.
+If it isn't bound, all packets with the specified IP protocol are received.
+In addition, a raw socket can be bound to a specific network device using
+.BR SO_BINDTODEVICE ;
+see
+.BR socket (7).
+.PP
+An
+.B IPPROTO_RAW
+socket is send only.
+If you really want to receive all IP packets, use a
+.BR packet (7)
+socket with the
+.B ETH_P_IP
+protocol.
+Note that packet sockets don't reassemble IP fragments,
+unlike raw sockets.
+.PP
+If you want to receive all ICMP packets for a datagram socket,
+it is often better to use
+.B IP_RECVERR
+on that particular socket; see
+.BR ip (7).
+.PP
+Raw sockets may tap all IP protocols in Linux, even
+protocols like ICMP or TCP which have a protocol module in the kernel.
+In this case, the packets are passed to both the kernel module and the raw
+socket(s).
+This should not be relied upon in portable programs, many other BSD
+socket implementation have limitations here.
+.PP
+Linux never changes headers passed from the user (except for filling
+in some zeroed fields as described for
+.BR IP_HDRINCL ).
+This differs from many other implementations of raw sockets.
+.PP
+Raw sockets are generally rather unportable and should be avoided in
+programs intended to be portable.
+.PP
+Sending on raw sockets should take the IP protocol from
+.IR sin_port ;
+this ability was lost in Linux 2.2.
+The workaround is to use
+.BR IP_HDRINCL .
+.SH BUGS
+Transparent proxy extensions are not described.
+.PP
+When the
+.B IP_HDRINCL
+option is set, datagrams will not be fragmented and are limited to
+the interface MTU.
+.PP
+Setting the IP protocol for sending in
+.I sin_port
+got lost in Linux 2.2.
+The protocol that the socket was bound to or that
+was specified in the initial
+.BR socket (2)
+call is always used.
+.\" .SH AUTHORS
+.\" This man page was written by Andi Kleen.
+.SH SEE ALSO
+.BR recvmsg (2),
+.BR sendmsg (2),
+.BR capabilities (7),
+.BR ip (7),
+.BR socket (7)
+.PP
+.B RFC\ 1191
+for path MTU discovery.
+.B RFC\ 791
+and the
+.I <linux/ip.h>
+header file for the IP protocol.
diff --git a/man7/regex.7 b/man7/regex.7
new file mode 100644
index 0000000..7a5b2d8
--- /dev/null
+++ b/man7/regex.7
@@ -0,0 +1,293 @@
+'\" t
+.\" From Henry Spencer's regex package (as found in the apache
+.\" distribution). The package carries the following copyright:
+.\"
+.\" Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved.
+.\" %%%LICENSE_START(MISC)
+.\" This software is not subject to any license of the American Telephone
+.\" and Telegraph Company or of the Regents of the University of California.
+.\"
+.\" Permission is granted to anyone to use this software for any purpose
+.\" on any computer system, and to alter it and redistribute it, subject
+.\" to the following restrictions:
+.\"
+.\" 1. The author is not responsible for the consequences of use of this
+.\" software, no matter how awful, even if they arise from flaws in it.
+.\"
+.\" 2. The origin of this software must not be misrepresented, either by
+.\" explicit claim or by omission. Since few users ever read sources,
+.\" credits must appear in the documentation.
+.\"
+.\" 3. Altered versions must be plainly marked as such, and must not be
+.\" misrepresented as being the original software. Since few users
+.\" ever read sources, credits must appear in the documentation.
+.\"
+.\" 4. This notice may not be removed or altered.
+.\" %%%LICENSE_END
+.\"
+.\" In order to comply with `credits must appear in the documentation'
+.\" I added an AUTHOR paragraph below - aeb.
+.\"
+.\" In the default nroff environment there is no dagger \(dg.
+.\"
+.\" 2005-05-11 Removed discussion of `[[:<:]]' and `[[:>:]]', which
+.\" appear not to be in the glibc implementation of regcomp
+.\"
+.ie t .ds dg \(dg
+.el .ds dg (!)
+.TH regex 7 2023-03-08 "Linux man-pages 6.05.01"
+.SH NAME
+regex \- POSIX.2 regular expressions
+.SH DESCRIPTION
+Regular expressions ("RE"s),
+as defined in POSIX.2, come in two forms:
+modern REs (roughly those of
+.IR egrep ;
+POSIX.2 calls these "extended" REs)
+and obsolete REs (roughly those of
+.BR ed (1);
+POSIX.2 "basic" REs).
+Obsolete REs mostly exist for backward compatibility in some old programs;
+they will be discussed at the end.
+POSIX.2 leaves some aspects of RE syntax and semantics open;
+"\*(dg" marks decisions on these aspects that
+may not be fully portable to other POSIX.2 implementations.
+.PP
+A (modern) RE is one\*(dg or more nonempty\*(dg \fIbranches\fR,
+separated by \[aq]|\[aq].
+It matches anything that matches one of the branches.
+.PP
+A branch is one\*(dg or more \fIpieces\fR, concatenated.
+It matches a match for the first, followed by a match for the second,
+and so on.
+.PP
+A piece is an \fIatom\fR possibly followed
+by a single\*(dg \[aq]*\[aq], \[aq]+\[aq], \[aq]?\[aq], or \fIbound\fR.
+An atom followed by \[aq]*\[aq]
+matches a sequence of 0 or more matches of the atom.
+An atom followed by \[aq]+\[aq]
+matches a sequence of 1 or more matches of the atom.
+An atom followed by \[aq]?\[aq]
+matches a sequence of 0 or 1 matches of the atom.
+.PP
+A \fIbound\fR is \[aq]{\[aq] followed by an unsigned decimal integer,
+possibly followed by \[aq],\[aq]
+possibly followed by another unsigned decimal integer,
+always followed by \[aq]}\[aq].
+The integers must lie between 0 and
+.B RE_DUP_MAX
+(255\*(dg) inclusive,
+and if there are two of them, the first may not exceed the second.
+An atom followed by a bound containing one integer \fIi\fR
+and no comma matches
+a sequence of exactly \fIi\fR matches of the atom.
+An atom followed by a bound
+containing one integer \fIi\fR and a comma matches
+a sequence of \fIi\fR or more matches of the atom.
+An atom followed by a bound
+containing two integers \fIi\fR and \fIj\fR matches
+a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom.
+.PP
+An atom is a regular expression enclosed in "\fI()\fP"
+(matching a match for the regular expression),
+an empty set of "\fI()\fP" (matching the null string)\*(dg,
+a \fIbracket expression\fR (see below),
+\[aq].\[aq] (matching any single character),
+\[aq]\[ha]\[aq] (matching the null string at the beginning of a line),
+\[aq]$\[aq] (matching the null string at the end of a line),
+a \[aq]\e\[aq] followed by one of the characters "\fI\[ha].[$()|*+?{\e\fP"
+(matching that character taken as an ordinary character),
+a \[aq]\e\[aq] followed by any other character\*(dg
+(matching that character taken as an ordinary character,
+as if the \[aq]\e\[aq] had not been present\*(dg),
+or a single character with no other significance (matching that character).
+A \[aq]{\[aq] followed by a character other than a digit
+is an ordinary character,
+not the beginning of a bound\*(dg.
+It is illegal to end an RE with \[aq]\e\[aq].
+.PP
+A \fIbracket expression\fR is a list of characters enclosed in "\fI[]\fP".
+It normally matches any single character from the list (but see below).
+If the list begins with \[aq]\[ha]\[aq],
+it matches any single character
+(but see below) \fInot\fR from the rest of the list.
+If two characters in the list are separated by \[aq]\-\[aq], this is shorthand
+for the full \fIrange\fR of characters between those two (inclusive) in the
+collating sequence,
+for example, "\fI[0\-9]\fP" in ASCII matches any decimal digit.
+It is illegal\*(dg for two ranges to share an
+endpoint, for example, "\fIa\-c\-e\fP".
+Ranges are very collating-sequence-dependent,
+and portable programs should avoid relying on them.
+.PP
+To include a literal \[aq]]\[aq] in the list, make it the first character
+(following a possible \[aq]\[ha]\[aq]).
+To include a literal \[aq]\-\[aq], make it the first or last character,
+or the second endpoint of a range.
+To use a literal \[aq]\-\[aq] as the first endpoint of a range,
+enclose it in "\fI[.\fP" and "\fI.]\fP"
+to make it a collating element (see below).
+With the exception of these and some combinations using \[aq][\[aq] (see next
+paragraphs), all other special characters, including \[aq]\e\[aq], lose their
+special significance within a bracket expression.
+.PP
+Within a bracket expression, a collating element (a character,
+a multicharacter sequence that collates as if it were a single character,
+or a collating-sequence name for either)
+enclosed in "\fI[.\fP" and "\fI.]\fP" stands for the
+sequence of characters of that collating element.
+The sequence is a single element of the bracket expression's list.
+A bracket expression containing a multicharacter collating element
+can thus match more than one character,
+for example, if the collating sequence includes a "ch" collating element,
+then the RE "\fI[[.ch.]]*c\fP" matches the first five characters
+of "chchcc".
+.PP
+Within a bracket expression, a collating element enclosed in "\fI[=\fP" and
+"\fI=]\fP" is an equivalence class, standing for the sequences of characters
+of all collating elements equivalent to that one, including itself.
+(If there are no other equivalent collating elements,
+the treatment is as if the enclosing delimiters
+were "\fI[.\fP" and "\fI.]\fP".)
+For example, if o and \(^o are the members of an equivalence class,
+then "\fI[[=o=]]\fP", "\fI[[=\(^o=]]\fP",
+and "\fI[o\(^o]\fP" are all synonymous.
+An equivalence class may not\*(dg be an endpoint
+of a range.
+.PP
+Within a bracket expression, the name of a \fIcharacter class\fR enclosed
+in "\fI[:\fP" and "\fI:]\fP" stands for the list
+of all characters belonging to that
+class.
+Standard character class names are:
+.PP
+.RS
+.TS
+l l l.
+alnum digit punct
+alpha graph space
+blank lower upper
+cntrl print xdigit
+.TE
+.RE
+.PP
+These stand for the character classes defined in
+.BR wctype (3).
+A locale may provide others.
+A character class may not be used as an endpoint of a range.
+.\" As per http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=295666
+.\" The following does not seem to apply in the glibc implementation
+.\" .PP
+.\" There are two special cases\*(dg of bracket expressions:
+.\" the bracket expressions "\fI[[:<:]]\fP" and "\fI[[:>:]]\fP" match
+.\" the null string at the beginning and end of a word respectively.
+.\" A word is defined as a sequence of
+.\" word characters
+.\" which is neither preceded nor followed by
+.\" word characters.
+.\" A word character is an
+.\" .I alnum
+.\" character (as defined by
+.\" .BR wctype (3))
+.\" or an underscore.
+.\" This is an extension,
+.\" compatible with but not specified by POSIX.2,
+.\" and should be used with
+.\" caution in software intended to be portable to other systems.
+.PP
+In the event that an RE could match more than one substring of a given
+string,
+the RE matches the one starting earliest in the string.
+If the RE could match more than one substring starting at that point,
+it matches the longest.
+Subexpressions also match the longest possible substrings, subject to
+the constraint that the whole match be as long as possible,
+with subexpressions starting earlier in the RE taking priority over
+ones starting later.
+Note that higher-level subexpressions thus take priority over
+their lower-level component subexpressions.
+.PP
+Match lengths are measured in characters, not collating elements.
+A null string is considered longer than no match at all.
+For example,
+"\fIbb*\fP" matches the three middle characters of "abbbc",
+"\fI(wee|week)(knights|nights)\fP"
+matches all ten characters of "weeknights",
+when "\fI(.*).*\fP" is matched against "abc" the parenthesized subexpression
+matches all three characters, and
+when "\fI(a*)*\fP" is matched against "bc"
+both the whole RE and the parenthesized
+subexpression match the null string.
+.PP
+If case-independent matching is specified,
+the effect is much as if all case distinctions had vanished from the
+alphabet.
+When an alphabetic that exists in multiple cases appears as an
+ordinary character outside a bracket expression, it is effectively
+transformed into a bracket expression containing both cases,
+for example, \[aq]x\[aq] becomes "\fI[xX]\fP".
+When it appears inside a bracket expression, all case counterparts
+of it are added to the bracket expression, so that, for example, "\fI[x]\fP"
+becomes "\fI[xX]\fP" and "\fI[\[ha]x]\fP" becomes "\fI[\[ha]xX]\fP".
+.PP
+No particular limit is imposed on the length of REs\*(dg.
+Programs intended to be portable should not employ REs longer
+than 256 bytes,
+as an implementation can refuse to accept such REs and remain
+POSIX-compliant.
+.PP
+Obsolete ("basic") regular expressions differ in several respects.
+\[aq]|\[aq], \[aq]+\[aq], and \[aq]?\[aq] are
+ordinary characters and there is no equivalent
+for their functionality.
+The delimiters for bounds are "\fI\e{\fP" and "\fI\e}\fP",
+with \[aq]{\[aq] and \[aq]}\[aq] by themselves ordinary characters.
+The parentheses for nested subexpressions are "\fI\e(\fP" and "\fI\e)\fP",
+with \[aq](\[aq] and \[aq])\[aq] by themselves ordinary characters.
+\[aq]\[ha]\[aq] is an ordinary character except at the beginning of the
+RE or\*(dg the beginning of a parenthesized subexpression,
+\[aq]$\[aq] is an ordinary character except at the end of the
+RE or\*(dg the end of a parenthesized subexpression,
+and \[aq]*\[aq] is an ordinary character if it appears at the beginning of the
+RE or the beginning of a parenthesized subexpression
+(after a possible leading \[aq]\[ha]\[aq]).
+.PP
+Finally, there is one new type of atom, a \fIback reference\fR:
+\[aq]\e\[aq] followed by a nonzero decimal digit \fId\fR
+matches the same sequence of characters
+matched by the \fId\fRth parenthesized subexpression
+(numbering subexpressions by the positions of their opening parentheses,
+left to right),
+so that, for example, "\fI\e([bc]\e)\e1\fP" matches "bb" or "cc" but not "bc".
+.SH BUGS
+Having two kinds of REs is a botch.
+.PP
+The current POSIX.2 spec says that \[aq])\[aq] is an ordinary character in
+the absence of an unmatched \[aq](\[aq];
+this was an unintentional result of a wording error,
+and change is likely.
+Avoid relying on it.
+.PP
+Back references are a dreadful botch,
+posing major problems for efficient implementations.
+They are also somewhat vaguely defined
+(does
+"\fIa\e(\e(b\e)*\e2\e)*d\fP" match "abbbd"?).
+Avoid using them.
+.PP
+POSIX.2's specification of case-independent matching is vague.
+The "one case implies all cases" definition given above
+is current consensus among implementors as to the right interpretation.
+.\" As per http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=295666
+.\" The following does not seem to apply in the glibc implementation
+.\" .PP
+.\" The syntax for word boundaries is incredibly ugly.
+.SH AUTHOR
+.\" Sigh... The page license means we must have the author's name
+.\" in the formatted output.
+This page was taken from Henry Spencer's regex package.
+.SH SEE ALSO
+.BR grep (1),
+.BR regex (3)
+.PP
+POSIX.2, section 2.8 (Regular Expression Notation).
diff --git a/man7/rtld-audit.7 b/man7/rtld-audit.7
new file mode 100644
index 0000000..df04a8c
--- /dev/null
+++ b/man7/rtld-audit.7
@@ -0,0 +1,606 @@
+.\" Copyright (c) 2009 Linux Foundation, written by Michael Kerrisk
+.\" <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" 2009-01-12, mtk, Created
+.\"
+.TH RTLD-AUDIT 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+rtld\-audit \- auditing API for the dynamic linker
+.SH SYNOPSIS
+.nf
+.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
+.B #include <link.h>
+.fi
+.SH DESCRIPTION
+The GNU dynamic linker (run-time linker)
+provides an auditing API that allows an application
+to be notified when various dynamic linking events occur.
+This API is very similar to the auditing interface provided by the
+Solaris run-time linker.
+The necessary constants and prototypes are defined by including
+.IR <link.h> .
+.PP
+To use this interface, the programmer creates a shared library
+that implements a standard set of function names.
+Not all of the functions need to be implemented: in most cases,
+if the programmer is not interested in a particular class of auditing event,
+then no implementation needs to be provided for the corresponding
+auditing function.
+.PP
+To employ the auditing interface, the environment variable
+.B LD_AUDIT
+must be defined to contain a colon-separated list of shared libraries,
+each of which can implement (parts of) the auditing API.
+When an auditable event occurs,
+the corresponding function is invoked in each library,
+in the order that the libraries are listed.
+.SS la_version()
+\&
+.nf
+.BI "unsigned int la_version(unsigned int " version );
+.fi
+.PP
+This is the only function that
+.I must
+be defined by an auditing library:
+it performs the initial handshake between the dynamic linker and
+the auditing library.
+When invoking this function, the dynamic linker passes, in
+.IR version ,
+the highest version of the auditing interface that the linker supports.
+.PP
+A typical implementation of this function simply returns the constant
+.BR LAV_CURRENT ,
+which indicates the version of
+.I <link.h>
+that was used to build the audit module.
+If the dynamic linker does
+not support this version of the audit interface, it will refuse to
+activate this audit module.
+If the function returns zero, the dynamic
+linker also does not activate this audit module.
+.PP
+In order to enable backwards compatibility with older dynamic linkers,
+an audit module can examine the
+.I version
+argument and return an earlier version than
+.BR LAV_CURRENT ,
+assuming the module can adjust its implementation to match the
+requirements of the previous version of the audit interface.
+The
+.B la_version
+function should not return the value of
+.I version
+without further checks because it could correspond to an interface
+that does not match the
+.I <link.h>
+definitions used to build the audit module.
+.SS la_objsearch()
+\&
+.nf
+.BI "char *la_objsearch(const char *" name ", uintptr_t *" cookie ,
+.BI " unsigned int " flag );
+.fi
+.PP
+The dynamic linker invokes this function to inform the auditing library
+that it is about to search for a shared object.
+The
+.I name
+argument is the filename or pathname that is to be searched for.
+.I cookie
+identifies the shared object that initiated the search.
+.I flag
+is set to one of the following values:
+.TP 17
+.B LA_SER_ORIG
+This is the original name that is being searched for.
+Typically, this name comes from an ELF
+.B DT_NEEDED
+entry, or is the
+.I filename
+argument given to
+.BR dlopen (3).
+.TP
+.B LA_SER_LIBPATH
+.I name
+was created using a directory specified in
+.BR LD_LIBRARY_PATH .
+.TP
+.B LA_SER_RUNPATH
+.I name
+was created using a directory specified in an ELF
+.B DT_RPATH
+or
+.B DT_RUNPATH
+list.
+.TP
+.B LA_SER_CONFIG
+.I name
+was found via the
+.BR ldconfig (8)
+cache
+.RI ( /etc/ld.so.cache ).
+.TP
+.B LA_SER_DEFAULT
+.I name
+was found via a search of one of the default directories.
+.TP
+.B LA_SER_SECURE
+.I name
+is specific to a secure object (unused on Linux).
+.PP
+As its function result,
+.BR la_objsearch ()
+returns the pathname that the dynamic linker should use
+for further processing.
+If NULL is returned, then this pathname is ignored for further processing.
+If this audit library simply intends to monitor search paths, then
+.I name
+should be returned.
+.SS la_activity()
+\&
+.nf
+.BI "void la_activity( uintptr_t *" cookie ", unsigned int "flag );
+.fi
+.PP
+The dynamic linker calls this function to inform the auditing library
+that link-map activity is occurring.
+.I cookie
+identifies the object at the head of the link map.
+When the dynamic linker invokes this function,
+.I flag
+is set to one of the following values:
+.TP 19
+.B LA_ACT_ADD
+New objects are being added to the link map.
+.TP
+.B LA_ACT_DELETE
+Objects are being removed from the link map.
+.TP
+.B LA_ACT_CONSISTENT
+Link-map activity has been completed: the map is once again consistent.
+.SS la_objopen()
+\&
+.nf
+.BI "unsigned int la_objopen(struct link_map *" map ", Lmid_t " lmid ,
+.BI " uintptr_t *" cookie );
+.fi
+.PP
+The dynamic linker calls this function when a new shared object is loaded.
+The
+.I map
+argument is a pointer to a link-map structure that describes the object.
+The
+.I lmid
+field has one of the following values
+.TP 17
+.B LM_ID_BASE
+Link map is part of the initial namespace.
+.TP
+.B LM_ID_NEWLM
+Link map is part of a new namespace requested via
+.BR dlmopen (3).
+.PP
+.I cookie
+is a pointer to an identifier for this object.
+The identifier is provided to later calls to functions
+in the auditing library in order to identify this object.
+This identifier is initialized to point to object's link map,
+but the audit library can change the identifier to some other value
+that it may prefer to use to identify the object.
+.PP
+As its return value,
+.BR la_objopen ()
+returns a bit mask created by ORing zero or more of the
+following constants,
+which allow the auditing library to select the objects to be monitored by
+.BR la_symbind* ():
+.TP 17
+.B LA_FLG_BINDTO
+Audit symbol bindings to this object.
+.TP
+.B LA_FLG_BINDFROM
+Audit symbol bindings from this object.
+.PP
+A return value of 0 from
+.BR la_objopen ()
+indicates that no symbol bindings should be audited for this object.
+.SS la_objclose()
+\&
+.nf
+.BI "unsigned int la_objclose(uintptr_t *" cookie );
+.fi
+.PP
+The dynamic linker invokes this function after any finalization
+code for the object has been executed,
+before the object is unloaded.
+The
+.I cookie
+argument is the identifier obtained from a previous invocation of
+.BR la_objopen ().
+.PP
+In the current implementation, the value returned by
+.BR la_objclose ()
+is ignored.
+.SS la_preinit()
+\&
+.nf
+.BI "void la_preinit(uintptr_t *" cookie );
+.fi
+.PP
+The dynamic linker invokes this function after all shared objects
+have been loaded, before control is passed to the application
+(i.e., before calling
+.IR main ()).
+Note that
+.IR main ()
+may still later dynamically load objects using
+.BR dlopen (3).
+.SS la_symbind*()
+\&
+.nf
+.BI "uintptr_t la_symbind32(Elf32_Sym *" sym ", unsigned int " ndx ,
+.BI " uintptr_t *" refcook ", uintptr_t *" defcook ,
+.BI " unsigned int *" flags ", const char *" symname );
+.BI "uintptr_t la_symbind64(Elf64_Sym *" sym ", unsigned int " ndx ,
+.BI " uintptr_t *" refcook ", uintptr_t *" defcook ,
+.BI " unsigned int *" flags ", const char *" symname );
+.fi
+.PP
+The dynamic linker invokes one of these functions
+when a symbol binding occurs between two shared objects
+that have been marked for auditing notification by
+.BR la_objopen ().
+The
+.BR la_symbind32 ()
+function is employed on 32-bit platforms;
+the
+.BR la_symbind64 ()
+function is employed on 64-bit platforms.
+.PP
+The
+.I sym
+argument is a pointer to a structure
+that provides information about the symbol being bound.
+The structure definition is shown in
+.IR <elf.h> .
+Among the fields of this structure,
+.I st_value
+indicates the address to which the symbol is bound.
+.PP
+The
+.I ndx
+argument gives the index of the symbol in the symbol table
+of the bound shared object.
+.PP
+The
+.I refcook
+argument identifies the shared object that is making the symbol reference;
+this is the same identifier that is provided to the
+.BR la_objopen ()
+function that returned
+.BR LA_FLG_BINDFROM .
+The
+.I defcook
+argument identifies the shared object that defines the referenced symbol;
+this is the same identifier that is provided to the
+.BR la_objopen ()
+function that returned
+.BR LA_FLG_BINDTO .
+.PP
+The
+.I symname
+argument points a string containing the name of the symbol.
+.PP
+The
+.I flags
+argument is a bit mask that both provides information about the symbol
+and can be used to modify further auditing of this
+PLT (Procedure Linkage Table) entry.
+The dynamic linker may supply the following bit values in this argument:
+.\" LA_SYMB_STRUCTCALL appears to be unused
+.TP 22
+.B LA_SYMB_DLSYM
+The binding resulted from a call to
+.BR dlsym (3).
+.TP
+.B LA_SYMB_ALTVALUE
+A previous
+.BR la_symbind* ()
+call returned an alternate value for this symbol.
+.PP
+By default, if the auditing library implements
+.BR la_pltenter ()
+and
+.BR la_pltexit ()
+functions (see below), then these functions are invoked, after
+.BR la_symbind (),
+for PLT entries, each time the symbol is referenced.
+.\" pltenter/pltexit are called for non-dynamically loaded libraries,
+.\" but don't seem to be called for dynamically loaded libs?
+.\" Is this the same on Solaris?
+The following flags can be ORed into
+.I *flags
+to change this default behavior:
+.TP 22
+.B LA_SYMB_NOPLTENTER
+Don't call
+.BR la_pltenter ()
+for this symbol.
+.TP 22
+.B LA_SYMB_NOPLTEXIT
+Don't call
+.BR la_pltexit ()
+for this symbol.
+.PP
+The return value of
+.BR la_symbind32 ()
+and
+.BR la_symbind64 ()
+is the address to which control should be passed after the function returns.
+If the auditing library is simply monitoring symbol bindings,
+then it should return
+.IR sym\->st_value .
+A different value may be returned if the library wishes to direct control
+to an alternate location.
+.SS la_pltenter()
+The precise name and argument types for this function
+depend on the hardware platform.
+(The appropriate definition is supplied by
+.IR <link.h> .)
+Here is the definition for x86-32:
+.PP
+.nf
+.BI "Elf32_Addr la_i86_gnu_pltenter(Elf32_Sym *" sym ", unsigned int " ndx ,
+.BI " uintptr_t *" refcook ", uintptr_t *" defcook ,
+.BI " La_i86_regs *" regs ", unsigned int *" flags ,
+.BI " const char *" symname ", long *" framesizep );
+.fi
+.PP
+This function is invoked just before a PLT entry is called,
+between two shared objects that have been marked for binding notification.
+.PP
+The
+.IR sym ,
+.IR ndx ,
+.IR refcook ,
+.IR defcook ,
+and
+.I symname
+are as for
+.BR la_symbind* ().
+.PP
+The
+.I regs
+argument points to a structure (defined in
+.IR <link.h> )
+containing the values of registers to be used for
+the call to this PLT entry.
+.PP
+The
+.I flags
+argument points to a bit mask that conveys information about,
+and can be used to modify subsequent auditing of, this PLT entry, as for
+.BR la_symbind* ().
+.PP
+.\" FIXME . Is the following correct?
+The
+.I framesizep
+argument points to a
+.I long\~int
+buffer that can be used to explicitly set the frame size
+used for the call to this PLT entry.
+If different
+.BR la_pltenter ()
+invocations for this symbol return different values,
+then the maximum returned value is used.
+The
+.BR la_pltexit ()
+function is called only if this buffer is
+explicitly set to a suitable value.
+.PP
+The return value of
+.BR la_pltenter ()
+is as for
+.BR la_symbind* ().
+.SS la_pltexit()
+The precise name and argument types for this function
+depend on the hardware platform.
+(The appropriate definition is supplied by
+.IR <link.h> .)
+Here is the definition for x86-32:
+.PP
+.nf
+.BI "unsigned int la_i86_gnu_pltexit(Elf32_Sym *" sym ", unsigned int " ndx ,
+.BI " uintptr_t *" refcook ", uintptr_t *" defcook ,
+.BI " const La_i86_regs *" inregs ", La_i86_retval *" outregs ,
+.BI " const char *" symname );
+.fi
+.PP
+This function is called when a PLT entry,
+made between two shared objects that have been marked
+for binding notification, returns.
+The function is called just before control returns to the caller
+of the PLT entry.
+.PP
+The
+.IR sym ,
+.IR ndx ,
+.IR refcook ,
+.IR defcook ,
+and
+.I symname
+are as for
+.BR la_symbind* ().
+.PP
+The
+.I inregs
+argument points to a structure (defined in
+.IR <link.h> )
+containing the values of registers used for the call to this PLT entry.
+The
+.I outregs
+argument points to a structure (defined in
+.IR <link.h> )
+containing return values for the call to this PLT entry.
+These values can be modified by the caller,
+and the changes will be visible to the caller of the PLT entry.
+.PP
+In the current GNU implementation, the return value of
+.BR la_pltexit ()
+is ignored.
+.\" This differs from Solaris, where an audit library that monitors
+.\" symbol binding should return the value of the 'retval' argument
+.\" (not provided by GNU, but equivalent to returning outregs->lrv_eax
+.\" on (say) x86-32).
+.SH VERSIONS
+This API is very similar to the Solaris API
+described in the Solaris
+.IR "Linker and Libraries Guide" ,
+in the chapter
+.IR "Runtime Linker Auditing Interface" .
+.SH STANDARDS
+None.
+.SH NOTES
+Note the following differences from the Solaris dynamic linker
+auditing API:
+.IP \[bu] 3
+The Solaris
+.BR la_objfilter ()
+interface is not supported by the GNU implementation.
+.IP \[bu]
+The Solaris
+.BR la_symbind32 ()
+and
+.BR la_pltexit ()
+functions do not provide a
+.I symname
+argument.
+.IP \[bu]
+The Solaris
+.BR la_pltexit ()
+function does not provide
+.I inregs
+and
+.I outregs
+arguments (but does provide a
+.I retval
+argument with the function return value).
+.SH BUGS
+In glibc versions up to and include 2.9,
+specifying more than one audit library in
+.B LD_AUDIT
+results in a run-time crash.
+This is reportedly fixed in glibc 2.10.
+.\" FIXME . Specifying multiple audit libraries doesn't work on GNU.
+.\" My simple tests on Solaris work okay, but not on Linux -- mtk, Jan 2009
+.\" glibc bug filed: http://sourceware.org/bugzilla/show_bug.cgi?id=9733
+.\" Reportedly, this is fixed on 16 Mar 2009 (i.e., for glibc 2.10)
+.SH EXAMPLES
+.EX
+#include <link.h>
+#include <stdio.h>
+\&
+unsigned int
+la_version(unsigned int version)
+{
+ printf("la_version(): version = %u; LAV_CURRENT = %u\en",
+ version, LAV_CURRENT);
+\&
+ return LAV_CURRENT;
+}
+\&
+char *
+la_objsearch(const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ printf("la_objsearch(): name = %s; cookie = %p", name, cookie);
+ printf("; flag = %s\en",
+ (flag == LA_SER_ORIG) ? "LA_SER_ORIG" :
+ (flag == LA_SER_LIBPATH) ? "LA_SER_LIBPATH" :
+ (flag == LA_SER_RUNPATH) ? "LA_SER_RUNPATH" :
+ (flag == LA_SER_DEFAULT) ? "LA_SER_DEFAULT" :
+ (flag == LA_SER_CONFIG) ? "LA_SER_CONFIG" :
+ (flag == LA_SER_SECURE) ? "LA_SER_SECURE" :
+ "???");
+\&
+ return name;
+}
+\&
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ printf("la_activity(): cookie = %p; flag = %s\en", cookie,
+ (flag == LA_ACT_CONSISTENT) ? "LA_ACT_CONSISTENT" :
+ (flag == LA_ACT_ADD) ? "LA_ACT_ADD" :
+ (flag == LA_ACT_DELETE) ? "LA_ACT_DELETE" :
+ "???");
+}
+\&
+unsigned int
+la_objopen(struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ printf("la_objopen(): loading \e"%s\e"; lmid = %s; cookie=%p\en",
+ map\->l_name,
+ (lmid == LM_ID_BASE) ? "LM_ID_BASE" :
+ (lmid == LM_ID_NEWLM) ? "LM_ID_NEWLM" :
+ "???",
+ cookie);
+\&
+ return LA_FLG_BINDTO | LA_FLG_BINDFROM;
+}
+\&
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ printf("la_objclose(): %p\en", cookie);
+\&
+ return 0;
+}
+\&
+void
+la_preinit(uintptr_t *cookie)
+{
+ printf("la_preinit(): %p\en", cookie);
+}
+\&
+uintptr_t
+la_symbind32(Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf("la_symbind32(): symname = %s; sym\->st_value = %p\en",
+ symname, sym\->st_value);
+ printf(" ndx = %u; flags = %#x", ndx, *flags);
+ printf("; refcook = %p; defcook = %p\en", refcook, defcook);
+\&
+ return sym\->st_value;
+}
+\&
+uintptr_t
+la_symbind64(Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+{
+ printf("la_symbind64(): symname = %s; sym\->st_value = %p\en",
+ symname, sym\->st_value);
+ printf(" ndx = %u; flags = %#x", ndx, *flags);
+ printf("; refcook = %p; defcook = %p\en", refcook, defcook);
+\&
+ return sym\->st_value;
+}
+\&
+Elf32_Addr
+la_i86_gnu_pltenter(Elf32_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook, La_i86_regs *regs,
+ unsigned int *flags, const char *symname, long *framesizep)
+{
+ printf("la_i86_gnu_pltenter(): %s (%p)\en", symname, sym\->st_value);
+\&
+ return sym\->st_value;
+}
+.EE
+.SH SEE ALSO
+.BR ldd (1),
+.BR dlopen (3),
+.BR ld.so (8),
+.BR ldconfig (8)
diff --git a/man7/rtnetlink.7 b/man7/rtnetlink.7
new file mode 100644
index 0000000..3b6465f
--- /dev/null
+++ b/man7/rtnetlink.7
@@ -0,0 +1,558 @@
+'\" t
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\"
+.\" Based on the original comments from Alexey Kuznetsov, written with
+.\" help from Matthew Wilcox.
+.\" $Id: rtnetlink.7,v 1.8 2000/01/22 01:55:04 freitag Exp $
+.\"
+.TH rtnetlink 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+rtnetlink \- Linux routing socket
+.SH SYNOPSIS
+.nf
+.B #include <asm/types.h>
+.B #include <linux/netlink.h>
+.B #include <linux/rtnetlink.h>
+.B #include <sys/socket.h>
+.PP
+.BI "rtnetlink_socket = socket(AF_NETLINK, int " socket_type ", NETLINK_ROUTE);"
+.fi
+.SH DESCRIPTION
+Rtnetlink allows the kernel's routing tables to be read and altered.
+It is used within the kernel to communicate between
+various subsystems, though this usage is not documented here, and for
+communication with user-space programs.
+Network routes, IP addresses, link parameters, neighbor setups, queueing
+disciplines, traffic classes and packet classifiers may all be controlled
+through
+.B NETLINK_ROUTE
+sockets.
+It is based on netlink messages; see
+.BR netlink (7)
+for more information.
+.\" FIXME . ? all these macros could be moved to rtnetlink(3)
+.SS Routing attributes
+Some rtnetlink messages have optional attributes after the initial header:
+.PP
+.in +4n
+.EX
+struct rtattr {
+ unsigned short rta_len; /* Length of option */
+ unsigned short rta_type; /* Type of option */
+ /* Data follows */
+};
+.EE
+.in
+.PP
+These attributes should be manipulated using only the RTA_* macros
+or libnetlink, see
+.BR rtnetlink (3).
+.SS Messages
+Rtnetlink consists of these message types
+(in addition to standard netlink messages):
+.TP
+.BR RTM_NEWLINK ", " RTM_DELLINK ", " RTM_GETLINK
+Create, remove, or get information about a specific network interface.
+These messages contain an
+.I ifinfomsg
+structure followed by a series of
+.I rtattr
+structures.
+.IP
+.EX
+struct ifinfomsg {
+ unsigned char ifi_family; /* AF_UNSPEC */
+ unsigned short ifi_type; /* Device type */
+ int ifi_index; /* Interface index */
+ unsigned int ifi_flags; /* Device flags */
+ unsigned int ifi_change; /* change mask */
+};
+.EE
+.IP
+.\" FIXME Document ifinfomsg.ifi_type
+.I ifi_flags
+contains the device flags, see
+.BR netdevice (7);
+.I ifi_index
+is the unique interface index
+(since Linux 3.7, it is possible to feed a nonzero value with the
+.B RTM_NEWLINK
+message, thus creating a link with the given
+.IR ifindex );
+.I ifi_change
+is reserved for future use and should be always set to 0xFFFFFFFF.
+.TS
+tab(:);
+c s s
+lb l l.
+Routing attributes
+rta_type:Value type:Description
+_
+IFLA_UNSPEC:-:unspecified
+IFLA_ADDRESS:hardware address:interface L2 address
+IFLA_BROADCAST:hardware address:L2 broadcast address
+IFLA_IFNAME:asciiz string:Device name
+IFLA_MTU:unsigned int:MTU of the device
+IFLA_LINK:int:Link type
+IFLA_QDISC:asciiz string:Queueing discipline
+IFLA_STATS:T{
+see below
+T}:Interface Statistics
+IFLA_PERM_ADDRESS:hardware address:T{
+hardware address provided by device (since Linux 5.5)
+T}
+.TE
+.IP
+The value type for
+.B IFLA_STATS
+is
+.I struct rtnl_link_stats
+.RI ( "struct net_device_stats"
+in Linux 2.4 and earlier).
+.TP
+.BR RTM_NEWADDR ", " RTM_DELADDR ", " RTM_GETADDR
+Add, remove, or receive information about an IP address associated with
+an interface.
+In Linux 2.2, an interface can carry multiple IP addresses,
+this replaces the alias device concept in Linux 2.0.
+In Linux 2.2, these messages
+support IPv4 and IPv6 addresses.
+They contain an
+.I ifaddrmsg
+structure, optionally followed by
+.I rtattr
+routing attributes.
+.IP
+.EX
+struct ifaddrmsg {
+ unsigned char ifa_family; /* Address type */
+ unsigned char ifa_prefixlen; /* Prefixlength of address */
+ unsigned char ifa_flags; /* Address flags */
+ unsigned char ifa_scope; /* Address scope */
+ unsigned int ifa_index; /* Interface index */
+};
+.EE
+.IP
+.I ifa_family
+is the address family type (currently
+.B AF_INET
+or
+.BR AF_INET6 ),
+.I ifa_prefixlen
+is the length of the address mask of the address if defined for the
+family (like for IPv4),
+.I ifa_scope
+is the address scope,
+.I ifa_index
+is the interface index of the interface the address is associated with.
+.I ifa_flags
+is a flag word of
+.B IFA_F_SECONDARY
+for secondary address (old alias interface),
+.B IFA_F_PERMANENT
+for a permanent address set by the user and other undocumented flags.
+.TS
+tab(:);
+c s s
+lb l l.
+Attributes
+rta_type:Value type:Description
+_
+IFA_UNSPEC:-:unspecified
+IFA_ADDRESS:raw protocol address:interface address
+IFA_LOCAL:raw protocol address:local address
+IFA_LABEL:asciiz string:name of the interface
+IFA_BROADCAST:raw protocol address:broadcast address
+IFA_ANYCAST:raw protocol address:anycast address
+IFA_CACHEINFO:struct ifa_cacheinfo:Address information
+.TE
+.\" FIXME Document struct ifa_cacheinfo
+.TP
+.BR RTM_NEWROUTE ", " RTM_DELROUTE ", " RTM_GETROUTE
+Create, remove, or receive information about a network route.
+These messages contain an
+.I rtmsg
+structure with an optional sequence of
+.I rtattr
+structures following.
+For
+.BR RTM_GETROUTE ,
+setting
+.I rtm_dst_len
+and
+.I rtm_src_len
+to 0 means you get all entries for the specified routing table.
+For the other fields, except
+.I rtm_table
+and
+.IR rtm_protocol ,
+0 is the wildcard.
+.IP
+.EX
+struct rtmsg {
+ unsigned char rtm_family; /* Address family of route */
+ unsigned char rtm_dst_len; /* Length of destination */
+ unsigned char rtm_src_len; /* Length of source */
+ unsigned char rtm_tos; /* TOS filter */
+ unsigned char rtm_table; /* Routing table ID;
+ see RTA_TABLE below */
+ unsigned char rtm_protocol; /* Routing protocol; see below */
+ unsigned char rtm_scope; /* See below */
+ unsigned char rtm_type; /* See below */
+\&
+ unsigned int rtm_flags;
+};
+.EE
+.TS
+tab(:);
+lb l.
+rtm_type:Route type
+_
+RTN_UNSPEC:unknown route
+RTN_UNICAST:a gateway or direct route
+RTN_LOCAL:a local interface route
+RTN_BROADCAST:T{
+a local broadcast route (sent as a broadcast)
+T}
+RTN_ANYCAST:T{
+a local broadcast route (sent as a unicast)
+T}
+RTN_MULTICAST:a multicast route
+RTN_BLACKHOLE:a packet dropping route
+RTN_UNREACHABLE:an unreachable destination
+RTN_PROHIBIT:a packet rejection route
+RTN_THROW:continue routing lookup in another table
+RTN_NAT:a network address translation rule
+RTN_XRESOLVE:T{
+refer to an external resolver (not implemented)
+T}
+.TE
+.TS
+tab(:);
+lb l.
+rtm_protocol:Route origin
+_
+RTPROT_UNSPEC:unknown
+RTPROT_REDIRECT:T{
+by an ICMP redirect (currently unused)
+T}
+RTPROT_KERNEL:by the kernel
+RTPROT_BOOT:during boot
+RTPROT_STATIC:by the administrator
+.TE
+.sp 1
+Values larger than
+.B RTPROT_STATIC
+are not interpreted by the kernel, they are just for user information.
+They may be used to tag the source of a routing information or to
+distinguish between multiple routing daemons.
+See
+.I <linux/rtnetlink.h>
+for the routing daemon identifiers which are already assigned.
+.IP
+.I rtm_scope
+is the distance to the destination:
+.TS
+tab(:);
+lb l.
+RT_SCOPE_UNIVERSE:global route
+RT_SCOPE_SITE:T{
+interior route in the local autonomous system
+T}
+RT_SCOPE_LINK:route on this link
+RT_SCOPE_HOST:route on the local host
+RT_SCOPE_NOWHERE:destination doesn't exist
+.TE
+.sp 1
+The values between
+.B RT_SCOPE_UNIVERSE
+and
+.B RT_SCOPE_SITE
+are available to the user.
+.IP
+The
+.I rtm_flags
+have the following meanings:
+.TS
+tab(:);
+lb l.
+RTM_F_NOTIFY:T{
+if the route changes, notify the user via rtnetlink
+T}
+RTM_F_CLONED:route is cloned from another route
+RTM_F_EQUALIZE:a multipath equalizer (not yet implemented)
+.TE
+.sp 1
+.I rtm_table
+specifies the routing table
+.TS
+tab(:);
+lb l.
+RT_TABLE_UNSPEC:an unspecified routing table
+RT_TABLE_DEFAULT:the default table
+RT_TABLE_MAIN:the main table
+RT_TABLE_LOCAL:the local table
+.TE
+.sp 1
+The user may assign arbitrary values between
+.B RT_TABLE_UNSPEC
+and
+.BR RT_TABLE_DEFAULT .
+.\" Keep table on same page
+.bp +1
+.TS
+tab(:);
+c s s
+lb2 l2 l.
+Attributes
+rta_type:Value type:Description
+_
+RTA_UNSPEC:-:ignored
+RTA_DST:protocol address:Route destination address
+RTA_SRC:protocol address:Route source address
+RTA_IIF:int:Input interface index
+RTA_OIF:int:Output interface index
+RTA_GATEWAY:protocol address:The gateway of the route
+RTA_PRIORITY:int:Priority of route
+RTA_PREFSRC:protocol address:Preferred source address
+RTA_METRICS:int:Route metric
+RTA_MULTIPATH::T{
+Multipath nexthop data
+br
+(see below).
+T}
+RTA_PROTOINFO::No longer used
+RTA_FLOW:int:Route realm
+RTA_CACHEINFO:struct rta_cacheinfo:(see linux/rtnetlink.h)
+RTA_SESSION::No longer used
+RTA_MP_ALGO::No longer used
+RTA_TABLE:int:T{
+Routing table ID; if set,
+.br
+rtm_table is ignored
+T}
+RTA_MARK:int:
+RTA_MFC_STATS:struct rta_mfc_stats:(see linux/rtnetlink.h)
+RTA_VIA:struct rtvia:T{
+Gateway in different AF
+(see below)
+T}
+RTA_NEWDST:protocol address:T{
+Change packet
+destination address
+T}
+RTA_PREF:char:T{
+RFC4191 IPv6 router
+preference (see below)
+T}
+RTA_ENCAP_TYPE:short:T{
+Encapsulation type for
+.br
+lwtunnels (see below)
+T}
+RTA_ENCAP::Defined by RTA_ENCAP_TYPE
+RTA_EXPIRES:int:T{
+Expire time for IPv6
+routes (in seconds)
+T}
+.TE
+.IP
+.B RTA_MULTIPATH
+contains several packed instances of
+.I struct rtnexthop
+together with nested RTAs
+.RB ( RTA_GATEWAY ):
+.IP
+.in +4n
+.EX
+struct rtnexthop {
+ unsigned short rtnh_len; /* Length of struct + length
+ of RTAs */
+ unsigned char rtnh_flags; /* Flags (see
+ linux/rtnetlink.h) */
+ unsigned char rtnh_hops; /* Nexthop priority */
+ int rtnh_ifindex; /* Interface index for this
+ nexthop */
+}
+.EE
+.in
+.IP
+There exist a bunch of
+.B RTNH_*
+macros similar to
+.B RTA_*
+and
+.B NLHDR_*
+macros
+useful to handle these structures.
+.IP
+.in +4n
+.EX
+struct rtvia {
+ unsigned short rtvia_family;
+ unsigned char rtvia_addr[0];
+};
+.EE
+.in
+.IP
+.I rtvia_addr
+is the address,
+.I rtvia_family
+is its family type.
+.IP
+.B RTA_PREF
+may contain values
+.BR ICMPV6_ROUTER_PREF_LOW ,
+.BR ICMPV6_ROUTER_PREF_MEDIUM ,
+and
+.B ICMPV6_ROUTER_PREF_HIGH
+defined incw
+.IR <linux/icmpv6.h> .
+.IP
+.B RTA_ENCAP_TYPE
+may contain values
+.BR LWTUNNEL_ENCAP_MPLS ,
+.BR LWTUNNEL_ENCAP_IP ,
+.BR LWTUNNEL_ENCAP_ILA ,
+or
+.B LWTUNNEL_ENCAP_IP6
+defined in
+.IR <linux/lwtunnel.h> .
+.IP
+.B Fill these values in!
+.TP
+.BR RTM_NEWNEIGH ", " RTM_DELNEIGH ", " RTM_GETNEIGH
+Add, remove, or receive information about a neighbor table
+entry (e.g., an ARP entry).
+The message contains an
+.I ndmsg
+structure.
+.IP
+.EX
+struct ndmsg {
+ unsigned char ndm_family;
+ int ndm_ifindex; /* Interface index */
+ __u16 ndm_state; /* State */
+ __u8 ndm_flags; /* Flags */
+ __u8 ndm_type;
+};
+\&
+struct nda_cacheinfo {
+ __u32 ndm_confirmed;
+ __u32 ndm_used;
+ __u32 ndm_updated;
+ __u32 ndm_refcnt;
+};
+.EE
+.IP
+.I ndm_state
+is a bit mask of the following states:
+.TS
+tab(:);
+lb l.
+NUD_INCOMPLETE:a currently resolving cache entry
+NUD_REACHABLE:a confirmed working cache entry
+NUD_STALE:an expired cache entry
+NUD_DELAY:an entry waiting for a timer
+NUD_PROBE:a cache entry that is currently reprobed
+NUD_FAILED:an invalid cache entry
+NUD_NOARP:a device with no destination cache
+NUD_PERMANENT:a static entry
+.TE
+.sp 1
+Valid
+.I ndm_flags
+are:
+.TS
+tab(:);
+lb l.
+NTF_PROXY:a proxy arp entry
+NTF_ROUTER:an IPv6 router
+.TE
+.sp 1
+.\" FIXME .
+.\" document the members of the struct better
+The
+.I rtattr
+struct has the following meanings for the
+.I rta_type
+field:
+.TS
+tab(:);
+lb l.
+NDA_UNSPEC:unknown type
+NDA_DST:a neighbor cache n/w layer destination address
+NDA_LLADDR:a neighbor cache link layer address
+NDA_CACHEINFO:cache statistics
+.TE
+.sp 1
+If the
+.I rta_type
+field is
+.BR NDA_CACHEINFO ,
+then a
+.I struct nda_cacheinfo
+header follows.
+.TP
+.BR RTM_NEWRULE ", " RTM_DELRULE ", " RTM_GETRULE
+Add, delete, or retrieve a routing rule.
+Carries a
+.I struct rtmsg
+.TP
+.BR RTM_NEWQDISC ", " RTM_DELQDISC ", " RTM_GETQDISC
+Add, remove, or get a queueing discipline.
+The message contains a
+.I struct tcmsg
+and may be followed by a series of
+attributes.
+.IP
+.EX
+struct tcmsg {
+ unsigned char tcm_family;
+ int tcm_ifindex; /* interface index */
+ __u32 tcm_handle; /* Qdisc handle */
+ __u32 tcm_parent; /* Parent qdisc */
+ __u32 tcm_info;
+};
+.EE
+.TS
+tab(:);
+c s s
+lb2 l2 l.
+Attributes
+rta_type:Value type:Description
+_
+TCA_UNSPEC:-:unspecified
+TCA_KIND:asciiz string:Name of queueing discipline
+TCA_OPTIONS:byte sequence:Qdisc-specific options follow
+TCA_STATS:struct tc_stats:Qdisc statistics
+TCA_XSTATS:qdisc-specific:Module-specific statistics
+TCA_RATE:struct tc_estimator:Rate limit
+.TE
+.sp 1
+In addition, various other qdisc-module-specific attributes are allowed.
+For more information see the appropriate include files.
+.TP
+.BR RTM_NEWTCLASS ", " RTM_DELTCLASS ", " RTM_GETTCLASS
+Add, remove, or get a traffic class.
+These messages contain a
+.I struct tcmsg
+as described above.
+.TP
+.BR RTM_NEWTFILTER ", " RTM_DELTFILTER ", " RTM_GETTFILTER
+Add, remove, or receive information about a traffic filter.
+These messages contain a
+.I struct tcmsg
+as described above.
+.SH VERSIONS
+.B rtnetlink
+is a new feature of Linux 2.2.
+.SH BUGS
+This manual page is incomplete.
+.SH SEE ALSO
+.BR cmsg (3),
+.BR rtnetlink (3),
+.BR ip (7),
+.BR netlink (7)
diff --git a/man7/sched.7 b/man7/sched.7
new file mode 100644
index 0000000..7854505
--- /dev/null
+++ b/man7/sched.7
@@ -0,0 +1,992 @@
+.\" Copyright (C) 2014 Michael Kerrisk <mtk.manpages@gmail.com>
+.\" and Copyright (C) 2014 Peter Zijlstra <peterz@infradead.org>
+.\" and Copyright (C) 2014 Juri Lelli <juri.lelli@gmail.com>
+.\" Various pieces from the old sched_setscheduler(2) page
+.\" Copyright (C) Tom Bjorkholm, Markus Kuhn & David A. Wheeler 1996-1999
+.\" and Copyright (C) 2007 Carsten Emde <Carsten.Emde@osadl.org>
+.\" and Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" Worth looking at: http://rt.wiki.kernel.org/index.php
+.\"
+.TH sched 7 2023-02-10 "Linux man-pages 6.05.01"
+.SH NAME
+sched \- overview of CPU scheduling
+.SH DESCRIPTION
+Since Linux 2.6.23, the default scheduler is CFS,
+the "Completely Fair Scheduler".
+The CFS scheduler replaced the earlier "O(1)" scheduler.
+.\"
+.SS API summary
+Linux provides the following system calls for controlling
+the CPU scheduling behavior, policy, and priority of processes
+(or, more precisely, threads).
+.TP
+.BR nice (2)
+Set a new nice value for the calling thread,
+and return the new nice value.
+.TP
+.BR getpriority (2)
+Return the nice value of a thread, a process group,
+or the set of threads owned by a specified user.
+.TP
+.BR setpriority (2)
+Set the nice value of a thread, a process group,
+or the set of threads owned by a specified user.
+.TP
+.BR sched_setscheduler (2)
+Set the scheduling policy and parameters of a specified thread.
+.TP
+.BR sched_getscheduler (2)
+Return the scheduling policy of a specified thread.
+.TP
+.BR sched_setparam (2)
+Set the scheduling parameters of a specified thread.
+.TP
+.BR sched_getparam (2)
+Fetch the scheduling parameters of a specified thread.
+.TP
+.BR sched_get_priority_max (2)
+Return the maximum priority available in a specified scheduling policy.
+.TP
+.BR sched_get_priority_min (2)
+Return the minimum priority available in a specified scheduling policy.
+.TP
+.BR sched_rr_get_interval (2)
+Fetch the quantum used for threads that are scheduled under
+the "round-robin" scheduling policy.
+.TP
+.BR sched_yield (2)
+Cause the caller to relinquish the CPU,
+so that some other thread be executed.
+.TP
+.BR sched_setaffinity (2)
+(Linux-specific)
+Set the CPU affinity of a specified thread.
+.TP
+.BR sched_getaffinity (2)
+(Linux-specific)
+Get the CPU affinity of a specified thread.
+.TP
+.BR sched_setattr (2)
+Set the scheduling policy and parameters of a specified thread.
+This (Linux-specific) system call provides a superset of the functionality of
+.BR sched_setscheduler (2)
+and
+.BR sched_setparam (2).
+.TP
+.BR sched_getattr (2)
+Fetch the scheduling policy and parameters of a specified thread.
+This (Linux-specific) system call provides a superset of the functionality of
+.BR sched_getscheduler (2)
+and
+.BR sched_getparam (2).
+.\"
+.SS Scheduling policies
+The scheduler is the kernel component that decides which runnable thread
+will be executed by the CPU next.
+Each thread has an associated scheduling policy and a \fIstatic\fP
+scheduling priority,
+.IR sched_priority .
+The scheduler makes its decisions based on knowledge of the scheduling
+policy and static priority of all threads on the system.
+.PP
+For threads scheduled under one of the normal scheduling policies
+(\fBSCHED_OTHER\fP, \fBSCHED_IDLE\fP, \fBSCHED_BATCH\fP),
+\fIsched_priority\fP is not used in scheduling
+decisions (it must be specified as 0).
+.PP
+Processes scheduled under one of the real-time policies
+(\fBSCHED_FIFO\fP, \fBSCHED_RR\fP) have a
+\fIsched_priority\fP value in the range 1 (low) to 99 (high).
+(As the numbers imply, real-time threads always have higher priority
+than normal threads.)
+Note well: POSIX.1 requires an implementation to support only a
+minimum 32 distinct priority levels for the real-time policies,
+and some systems supply just this minimum.
+Portable programs should use
+.BR sched_get_priority_min (2)
+and
+.BR sched_get_priority_max (2)
+to find the range of priorities supported for a particular policy.
+.PP
+Conceptually, the scheduler maintains a list of runnable
+threads for each possible \fIsched_priority\fP value.
+In order to determine which thread runs next, the scheduler looks for
+the nonempty list with the highest static priority and selects the
+thread at the head of this list.
+.PP
+A thread's scheduling policy determines
+where it will be inserted into the list of threads
+with equal static priority and how it will move inside this list.
+.PP
+All scheduling is preemptive: if a thread with a higher static
+priority becomes ready to run, the currently running thread
+will be preempted and
+returned to the wait list for its static priority level.
+The scheduling policy determines the
+ordering only within the list of runnable threads with equal static
+priority.
+.SS SCHED_FIFO: First in-first out scheduling
+\fBSCHED_FIFO\fP can be used only with static priorities higher than
+0, which means that when a \fBSCHED_FIFO\fP thread becomes runnable,
+it will always immediately preempt any currently running
+\fBSCHED_OTHER\fP, \fBSCHED_BATCH\fP, or \fBSCHED_IDLE\fP thread.
+\fBSCHED_FIFO\fP is a simple scheduling
+algorithm without time slicing.
+For threads scheduled under the
+\fBSCHED_FIFO\fP policy, the following rules apply:
+.IP \[bu] 3
+A running \fBSCHED_FIFO\fP thread that has been preempted by another thread of
+higher priority will stay at the head of the list for its priority and
+will resume execution as soon as all threads of higher priority are
+blocked again.
+.IP \[bu]
+When a blocked \fBSCHED_FIFO\fP thread becomes runnable, it
+will be inserted at the end of the list for its priority.
+.IP \[bu]
+If a call to
+.BR sched_setscheduler (2),
+.BR sched_setparam (2),
+.BR sched_setattr (2),
+.BR pthread_setschedparam (3),
+or
+.BR pthread_setschedprio (3)
+changes the priority of the running or runnable
+.B SCHED_FIFO
+thread identified by
+.I pid
+the effect on the thread's position in the list depends on
+the direction of the change to threads priority:
+.RS
+.IP (a) 5
+If the thread's priority is raised,
+it is placed at the end of the list for its new priority.
+As a consequence,
+it may preempt a currently running thread with the same priority.
+.IP (b)
+If the thread's priority is unchanged,
+its position in the run list is unchanged.
+.IP (c)
+If the thread's priority is lowered,
+it is placed at the front of the list for its new priority.
+.RE
+.IP
+According to POSIX.1-2008,
+changes to a thread's priority (or policy) using any mechanism other than
+.BR pthread_setschedprio (3)
+should result in the thread being placed at the end of
+the list for its priority.
+.\" In Linux 2.2.x and Linux 2.4.x, the thread is placed at the front of the queue
+.\" In Linux 2.0.x, the Right Thing happened: the thread went to the back -- MTK
+.IP \[bu]
+A thread calling
+.BR sched_yield (2)
+will be put at the end of the list.
+.PP
+No other events will move a thread
+scheduled under the \fBSCHED_FIFO\fP policy in the wait list of
+runnable threads with equal static priority.
+.PP
+A \fBSCHED_FIFO\fP
+thread runs until either it is blocked by an I/O request, it is
+preempted by a higher priority thread, or it calls
+.BR sched_yield (2).
+.SS SCHED_RR: Round-robin scheduling
+\fBSCHED_RR\fP is a simple enhancement of \fBSCHED_FIFO\fP.
+Everything
+described above for \fBSCHED_FIFO\fP also applies to \fBSCHED_RR\fP,
+except that each thread is allowed to run only for a maximum time
+quantum.
+If a \fBSCHED_RR\fP thread has been running for a time
+period equal to or longer than the time quantum, it will be put at the
+end of the list for its priority.
+A \fBSCHED_RR\fP thread that has
+been preempted by a higher priority thread and subsequently resumes
+execution as a running thread will complete the unexpired portion of
+its round-robin time quantum.
+The length of the time quantum can be
+retrieved using
+.BR sched_rr_get_interval (2).
+.\" On Linux 2.4, the length of the RR interval is influenced
+.\" by the process nice value -- MTK
+.\"
+.SS SCHED_DEADLINE: Sporadic task model deadline scheduling
+Since Linux 3.14, Linux provides a deadline scheduling policy
+.RB ( SCHED_DEADLINE ).
+This policy is currently implemented using
+GEDF (Global Earliest Deadline First)
+in conjunction with CBS (Constant Bandwidth Server).
+To set and fetch this policy and associated attributes,
+one must use the Linux-specific
+.BR sched_setattr (2)
+and
+.BR sched_getattr (2)
+system calls.
+.PP
+A sporadic task is one that has a sequence of jobs, where each
+job is activated at most once per period.
+Each job also has a
+.IR "relative deadline" ,
+before which it should finish execution, and a
+.IR "computation time" ,
+which is the CPU time necessary for executing the job.
+The moment when a task wakes up
+because a new job has to be executed is called the
+.I arrival time
+(also referred to as the request time or release time).
+The
+.I start time
+is the time at which a task starts its execution.
+The
+.I absolute deadline
+is thus obtained by adding the relative deadline to the arrival time.
+.PP
+The following diagram clarifies these terms:
+.PP
+.in +4n
+.EX
+arrival/wakeup absolute deadline
+ | start time |
+ | | |
+ v v v
+-----x--------xooooooooooooooooo--------x--------x---
+ |<- comp. time ->|
+ |<------- relative deadline ------>|
+ |<-------------- period ------------------->|
+.EE
+.in
+.PP
+When setting a
+.B SCHED_DEADLINE
+policy for a thread using
+.BR sched_setattr (2),
+one can specify three parameters:
+.IR Runtime ,
+.IR Deadline ,
+and
+.IR Period .
+These parameters do not necessarily correspond to the aforementioned terms:
+usual practice is to set Runtime to something bigger than the average
+computation time (or worst-case execution time for hard real-time tasks),
+Deadline to the relative deadline, and Period to the period of the task.
+Thus, for
+.B SCHED_DEADLINE
+scheduling, we have:
+.PP
+.in +4n
+.EX
+arrival/wakeup absolute deadline
+ | start time |
+ | | |
+ v v v
+-----x--------xooooooooooooooooo--------x--------x---
+ |<-- Runtime ------->|
+ |<----------- Deadline ----------->|
+ |<-------------- Period ------------------->|
+.EE
+.in
+.PP
+The three deadline-scheduling parameters correspond to the
+.IR sched_runtime ,
+.IR sched_deadline ,
+and
+.I sched_period
+fields of the
+.I sched_attr
+structure; see
+.BR sched_setattr (2).
+These fields express values in nanoseconds.
+.\" FIXME It looks as though specifying sched_period as 0 means
+.\" "make sched_period the same as sched_deadline".
+.\" This needs to be documented.
+If
+.I sched_period
+is specified as 0, then it is made the same as
+.IR sched_deadline .
+.PP
+The kernel requires that:
+.PP
+.in +4n
+.EX
+sched_runtime <= sched_deadline <= sched_period
+.EE
+.in
+.PP
+.\" See __checkparam_dl in kernel/sched/core.c
+In addition, under the current implementation,
+all of the parameter values must be at least 1024
+(i.e., just over one microsecond,
+which is the resolution of the implementation), and less than 2\[ha]63.
+If any of these checks fails,
+.BR sched_setattr (2)
+fails with the error
+.BR EINVAL .
+.PP
+The CBS guarantees non-interference between tasks, by throttling
+threads that attempt to over-run their specified Runtime.
+.PP
+To ensure deadline scheduling guarantees,
+the kernel must prevent situations where the set of
+.B SCHED_DEADLINE
+threads is not feasible (schedulable) within the given constraints.
+The kernel thus performs an admittance test when setting or changing
+.B SCHED_DEADLINE
+policy and attributes.
+This admission test calculates whether the change is feasible;
+if it is not,
+.BR sched_setattr (2)
+fails with the error
+.BR EBUSY .
+.PP
+For example, it is required (but not necessarily sufficient) for
+the total utilization to be less than or equal to the total number of
+CPUs available, where, since each thread can maximally run for
+Runtime per Period, that thread's utilization is its
+Runtime divided by its Period.
+.PP
+In order to fulfill the guarantees that are made when
+a thread is admitted to the
+.B SCHED_DEADLINE
+policy,
+.B SCHED_DEADLINE
+threads are the highest priority (user controllable) threads in the
+system; if any
+.B SCHED_DEADLINE
+thread is runnable,
+it will preempt any thread scheduled under one of the other policies.
+.PP
+A call to
+.BR fork (2)
+by a thread scheduled under the
+.B SCHED_DEADLINE
+policy fails with the error
+.BR EAGAIN ,
+unless the thread has its reset-on-fork flag set (see below).
+.PP
+A
+.B SCHED_DEADLINE
+thread that calls
+.BR sched_yield (2)
+will yield the current job and wait for a new period to begin.
+.\"
+.\" FIXME Calling sched_getparam() on a SCHED_DEADLINE thread
+.\" fails with EINVAL, but sched_getscheduler() succeeds.
+.\" Is that intended? (Why?)
+.\"
+.SS SCHED_OTHER: Default Linux time-sharing scheduling
+\fBSCHED_OTHER\fP can be used at only static priority 0
+(i.e., threads under real-time policies always have priority over
+.B SCHED_OTHER
+processes).
+\fBSCHED_OTHER\fP is the standard Linux time-sharing scheduler that is
+intended for all threads that do not require the special
+real-time mechanisms.
+.PP
+The thread to run is chosen from the static
+priority 0 list based on a \fIdynamic\fP priority that is determined only
+inside this list.
+The dynamic priority is based on the nice value (see below)
+and is increased for each time quantum the thread is ready to run,
+but denied to run by the scheduler.
+This ensures fair progress among all \fBSCHED_OTHER\fP threads.
+.PP
+In the Linux kernel source code, the
+.B SCHED_OTHER
+policy is actually named
+.BR SCHED_NORMAL .
+.\"
+.SS The nice value
+The nice value is an attribute
+that can be used to influence the CPU scheduler to
+favor or disfavor a process in scheduling decisions.
+It affects the scheduling of
+.B SCHED_OTHER
+and
+.B SCHED_BATCH
+(see below) processes.
+The nice value can be modified using
+.BR nice (2),
+.BR setpriority (2),
+or
+.BR sched_setattr (2).
+.PP
+According to POSIX.1, the nice value is a per-process attribute;
+that is, the threads in a process should share a nice value.
+However, on Linux, the nice value is a per-thread attribute:
+different threads in the same process may have different nice values.
+.PP
+The range of the nice value
+varies across UNIX systems.
+On modern Linux, the range is \-20 (high priority) to +19 (low priority).
+On some other systems, the range is \-20..20.
+Very early Linux kernels (before Linux 2.0) had the range \-infinity..15.
+.\" Linux before 1.3.36 had \-infinity..15.
+.\" Since Linux 1.3.43, Linux has the range \-20..19.
+.PP
+The degree to which the nice value affects the relative scheduling of
+.B SCHED_OTHER
+processes likewise varies across UNIX systems and
+across Linux kernel versions.
+.PP
+With the advent of the CFS scheduler in Linux 2.6.23,
+Linux adopted an algorithm that causes
+relative differences in nice values to have a much stronger effect.
+In the current implementation, each unit of difference in the
+nice values of two processes results in a factor of 1.25
+in the degree to which the scheduler favors the higher priority process.
+This causes very low nice values (+19) to truly provide little CPU
+to a process whenever there is any other
+higher priority load on the system,
+and makes high nice values (\-20) deliver most of the CPU to applications
+that require it (e.g., some audio applications).
+.PP
+On Linux, the
+.B RLIMIT_NICE
+resource limit can be used to define a limit to which
+an unprivileged process's nice value can be raised; see
+.BR setrlimit (2)
+for details.
+.PP
+For further details on the nice value, see the subsections on
+the autogroup feature and group scheduling, below.
+.\"
+.SS SCHED_BATCH: Scheduling batch processes
+(Since Linux 2.6.16.)
+\fBSCHED_BATCH\fP can be used only at static priority 0.
+This policy is similar to \fBSCHED_OTHER\fP in that it schedules
+the thread according to its dynamic priority
+(based on the nice value).
+The difference is that this policy
+will cause the scheduler to always assume
+that the thread is CPU-intensive.
+Consequently, the scheduler will apply a small scheduling
+penalty with respect to wakeup behavior,
+so that this thread is mildly disfavored in scheduling decisions.
+.PP
+.\" The following paragraph is drawn largely from the text that
+.\" accompanied Ingo Molnar's patch for the implementation of
+.\" SCHED_BATCH.
+.\" commit b0a9499c3dd50d333e2aedb7e894873c58da3785
+This policy is useful for workloads that are noninteractive,
+but do not want to lower their nice value,
+and for workloads that want a deterministic scheduling policy without
+interactivity causing extra preemptions (between the workload's tasks).
+.\"
+.SS SCHED_IDLE: Scheduling very low priority jobs
+(Since Linux 2.6.23.)
+\fBSCHED_IDLE\fP can be used only at static priority 0;
+the process nice value has no influence for this policy.
+.PP
+This policy is intended for running jobs at extremely low
+priority (lower even than a +19 nice value with the
+.B SCHED_OTHER
+or
+.B SCHED_BATCH
+policies).
+.\"
+.SS Resetting scheduling policy for child processes
+Each thread has a reset-on-fork scheduling flag.
+When this flag is set, children created by
+.BR fork (2)
+do not inherit privileged scheduling policies.
+The reset-on-fork flag can be set by either:
+.IP \[bu] 3
+ORing the
+.B SCHED_RESET_ON_FORK
+flag into the
+.I policy
+argument when calling
+.BR sched_setscheduler (2)
+(since Linux 2.6.32);
+or
+.IP \[bu]
+specifying the
+.B SCHED_FLAG_RESET_ON_FORK
+flag in
+.I attr.sched_flags
+when calling
+.BR sched_setattr (2).
+.PP
+Note that the constants used with these two APIs have different names.
+The state of the reset-on-fork flag can analogously be retrieved using
+.BR sched_getscheduler (2)
+and
+.BR sched_getattr (2).
+.PP
+The reset-on-fork feature is intended for media-playback applications,
+and can be used to prevent applications evading the
+.B RLIMIT_RTTIME
+resource limit (see
+.BR getrlimit (2))
+by creating multiple child processes.
+.PP
+More precisely, if the reset-on-fork flag is set,
+the following rules apply for subsequently created children:
+.IP \[bu] 3
+If the calling thread has a scheduling policy of
+.B SCHED_FIFO
+or
+.BR SCHED_RR ,
+the policy is reset to
+.B SCHED_OTHER
+in child processes.
+.IP \[bu]
+If the calling process has a negative nice value,
+the nice value is reset to zero in child processes.
+.PP
+After the reset-on-fork flag has been enabled,
+it can be reset only if the thread has the
+.B CAP_SYS_NICE
+capability.
+This flag is disabled in child processes created by
+.BR fork (2).
+.\"
+.SS Privileges and resource limits
+Before Linux 2.6.12, only privileged
+.RB ( CAP_SYS_NICE )
+threads can set a nonzero static priority (i.e., set a real-time
+scheduling policy).
+The only change that an unprivileged thread can make is to set the
+.B SCHED_OTHER
+policy, and this can be done only if the effective user ID of the caller
+matches the real or effective user ID of the target thread
+(i.e., the thread specified by
+.IR pid )
+whose policy is being changed.
+.PP
+A thread must be privileged
+.RB ( CAP_SYS_NICE )
+in order to set or modify a
+.B SCHED_DEADLINE
+policy.
+.PP
+Since Linux 2.6.12, the
+.B RLIMIT_RTPRIO
+resource limit defines a ceiling on an unprivileged thread's
+static priority for the
+.B SCHED_RR
+and
+.B SCHED_FIFO
+policies.
+The rules for changing scheduling policy and priority are as follows:
+.IP \[bu] 3
+If an unprivileged thread has a nonzero
+.B RLIMIT_RTPRIO
+soft limit, then it can change its scheduling policy and priority,
+subject to the restriction that the priority cannot be set to a
+value higher than the maximum of its current priority and its
+.B RLIMIT_RTPRIO
+soft limit.
+.IP \[bu]
+If the
+.B RLIMIT_RTPRIO
+soft limit is 0, then the only permitted changes are to lower the priority,
+or to switch to a non-real-time policy.
+.IP \[bu]
+Subject to the same rules,
+another unprivileged thread can also make these changes,
+as long as the effective user ID of the thread making the change
+matches the real or effective user ID of the target thread.
+.IP \[bu]
+Special rules apply for the
+.B SCHED_IDLE
+policy.
+Before Linux 2.6.39,
+an unprivileged thread operating under this policy cannot
+change its policy, regardless of the value of its
+.B RLIMIT_RTPRIO
+resource limit.
+Since Linux 2.6.39,
+.\" commit c02aa73b1d18e43cfd79c2f193b225e84ca497c8
+an unprivileged thread can switch to either the
+.B SCHED_BATCH
+or the
+.B SCHED_OTHER
+policy so long as its nice value falls within the range permitted by its
+.B RLIMIT_NICE
+resource limit (see
+.BR getrlimit (2)).
+.PP
+Privileged
+.RB ( CAP_SYS_NICE )
+threads ignore the
+.B RLIMIT_RTPRIO
+limit; as with older kernels,
+they can make arbitrary changes to scheduling policy and priority.
+See
+.BR getrlimit (2)
+for further information on
+.BR RLIMIT_RTPRIO .
+.SS Limiting the CPU usage of real-time and deadline processes
+A nonblocking infinite loop in a thread scheduled under the
+.BR SCHED_FIFO ,
+.BR SCHED_RR ,
+or
+.B SCHED_DEADLINE
+policy can potentially block all other threads from accessing
+the CPU forever.
+Before Linux 2.6.25, the only way of preventing a runaway real-time
+process from freezing the system was to run (at the console)
+a shell scheduled under a higher static priority than the tested application.
+This allows an emergency kill of tested
+real-time applications that do not block or terminate as expected.
+.PP
+Since Linux 2.6.25, there are other techniques for dealing with runaway
+real-time and deadline processes.
+One of these is to use the
+.B RLIMIT_RTTIME
+resource limit to set a ceiling on the CPU time that
+a real-time process may consume.
+See
+.BR getrlimit (2)
+for details.
+.PP
+Since Linux 2.6.25, Linux also provides two
+.I /proc
+files that can be used to reserve a certain amount of CPU time
+to be used by non-real-time processes.
+Reserving CPU time in this fashion allows some CPU time to be
+allocated to (say) a root shell that can be used to kill a runaway process.
+Both of these files specify time values in microseconds:
+.TP
+.I /proc/sys/kernel/sched_rt_period_us
+This file specifies a scheduling period that is equivalent to
+100% CPU bandwidth.
+The value in this file can range from 1 to
+.BR INT_MAX ,
+giving an operating range of 1 microsecond to around 35 minutes.
+The default value in this file is 1,000,000 (1 second).
+.TP
+.I /proc/sys/kernel/sched_rt_runtime_us
+The value in this file specifies how much of the "period" time
+can be used by all real-time and deadline scheduled processes
+on the system.
+The value in this file can range from \-1 to
+.BR INT_MAX \-1.
+Specifying \-1 makes the run time the same as the period;
+that is, no CPU time is set aside for non-real-time processes
+(which was the behavior before Linux 2.6.25).
+The default value in this file is 950,000 (0.95 seconds),
+meaning that 5% of the CPU time is reserved for processes that
+don't run under a real-time or deadline scheduling policy.
+.SS Response time
+A blocked high priority thread waiting for I/O has a certain
+response time before it is scheduled again.
+The device driver writer
+can greatly reduce this response time by using a "slow interrupt"
+interrupt handler.
+.\" as described in
+.\" .BR request_irq (9).
+.SS Miscellaneous
+Child processes inherit the scheduling policy and parameters across a
+.BR fork (2).
+The scheduling policy and parameters are preserved across
+.BR execve (2).
+.PP
+Memory locking is usually needed for real-time processes to avoid
+paging delays; this can be done with
+.BR mlock (2)
+or
+.BR mlockall (2).
+.\"
+.SS The autogroup feature
+.\" commit 5091faa449ee0b7d73bc296a93bca9540fc51d0a
+Since Linux 2.6.38,
+the kernel provides a feature known as autogrouping to improve interactive
+desktop performance in the face of multiprocess, CPU-intensive
+workloads such as building the Linux kernel with large numbers of
+parallel build processes (i.e., the
+.BR make (1)
+.B \-j
+flag).
+.PP
+This feature operates in conjunction with the
+CFS scheduler and requires a kernel that is configured with
+.BR CONFIG_SCHED_AUTOGROUP .
+On a running system, this feature is enabled or disabled via the file
+.IR /proc/sys/kernel/sched_autogroup_enabled ;
+a value of 0 disables the feature, while a value of 1 enables it.
+The default value in this file is 1, unless the kernel was booted with the
+.I noautogroup
+parameter.
+.PP
+A new autogroup is created when a new session is created via
+.BR setsid (2);
+this happens, for example, when a new terminal window is started.
+A new process created by
+.BR fork (2)
+inherits its parent's autogroup membership.
+Thus, all of the processes in a session are members of the same autogroup.
+An autogroup is automatically destroyed when the last process
+in the group terminates.
+.PP
+When autogrouping is enabled, all of the members of an autogroup
+are placed in the same kernel scheduler "task group".
+The CFS scheduler employs an algorithm that equalizes the
+distribution of CPU cycles across task groups.
+The benefits of this for interactive desktop performance
+can be described via the following example.
+.PP
+Suppose that there are two autogroups competing for the same CPU
+(i.e., presume either a single CPU system or the use of
+.BR taskset (1)
+to confine all the processes to the same CPU on an SMP system).
+The first group contains ten CPU-bound processes from
+a kernel build started with
+.IR "make\~\-j10" .
+The other contains a single CPU-bound process: a video player.
+The effect of autogrouping is that the two groups will
+each receive half of the CPU cycles.
+That is, the video player will receive 50% of the CPU cycles,
+rather than just 9% of the cycles,
+which would likely lead to degraded video playback.
+The situation on an SMP system is more complex,
+.\" Mike Galbraith, 25 Nov 2016:
+.\" I'd say something more wishy-washy here, like cycles are
+.\" distributed fairly across groups and leave it at that, as your
+.\" detailed example is incorrect due to SMP fairness (which I don't
+.\" like much because [very unlikely] worst case scenario
+.\" renders a box sized group incapable of utilizing more that
+.\" a single CPU total). For example, if a group of NR_CPUS
+.\" size competes with a singleton, load balancing will try to give
+.\" the singleton a full CPU of its very own. If groups intersect for
+.\" whatever reason on say my quad lappy, distribution is 80/20 in
+.\" favor of the singleton.
+but the general effect is the same:
+the scheduler distributes CPU cycles across task groups such that
+an autogroup that contains a large number of CPU-bound processes
+does not end up hogging CPU cycles at the expense of the other
+jobs on the system.
+.PP
+A process's autogroup (task group) membership can be viewed via the file
+.IR /proc/ pid /autogroup :
+.PP
+.in +4n
+.EX
+$ \fBcat /proc/1/autogroup\fP
+/autogroup\-1 nice 0
+.EE
+.in
+.PP
+This file can also be used to modify the CPU bandwidth allocated
+to an autogroup.
+This is done by writing a number in the "nice" range to the file
+to set the autogroup's nice value.
+The allowed range is from +19 (low priority) to \-20 (high priority).
+(Writing values outside of this range causes
+.BR write (2)
+to fail with the error
+.BR EINVAL .)
+.\" FIXME .
+.\" Because of a bug introduced in Linux 4.7
+.\" (commit 2159197d66770ec01f75c93fb11dc66df81fd45b made changes
+.\" that exposed the fact that autogroup didn't call scale_load()),
+.\" it happened that *all* values in this range caused a task group
+.\" to be further disfavored by the scheduler, with \-20 resulting
+.\" in the scheduler mildly disfavoring the task group and +19 greatly
+.\" disfavoring it.
+.\"
+.\" A patch was posted on 23 Nov 2016
+.\" ("sched/autogroup: Fix 64bit kernel nice adjustment";
+.\" check later to see in which kernel version it lands.
+.PP
+The autogroup nice setting has the same meaning as the process nice value,
+but applies to distribution of CPU cycles to the autogroup as a whole,
+based on the relative nice values of other autogroups.
+For a process inside an autogroup, the CPU cycles that it receives
+will be a product of the autogroup's nice value
+(compared to other autogroups)
+and the process's nice value
+(compared to other processes in the same autogroup.
+.PP
+The use of the
+.BR cgroups (7)
+CPU controller to place processes in cgroups other than the
+root CPU cgroup overrides the effect of autogrouping.
+.PP
+The autogroup feature groups only processes scheduled under
+non-real-time policies
+.RB ( SCHED_OTHER ,
+.BR SCHED_BATCH ,
+and
+.BR SCHED_IDLE ).
+It does not group processes scheduled under real-time and
+deadline policies.
+Those processes are scheduled according to the rules described earlier.
+.\"
+.SS The nice value and group scheduling
+When scheduling non-real-time processes (i.e., those scheduled under the
+.BR SCHED_OTHER ,
+.BR SCHED_BATCH ,
+and
+.B SCHED_IDLE
+policies), the CFS scheduler employs a technique known as "group scheduling",
+if the kernel was configured with the
+.B CONFIG_FAIR_GROUP_SCHED
+option (which is typical).
+.PP
+Under group scheduling, threads are scheduled in "task groups".
+Task groups have a hierarchical relationship,
+rooted under the initial task group on the system,
+known as the "root task group".
+Task groups are formed in the following circumstances:
+.IP \[bu] 3
+All of the threads in a CPU cgroup form a task group.
+The parent of this task group is the task group of the
+corresponding parent cgroup.
+.IP \[bu]
+If autogrouping is enabled,
+then all of the threads that are (implicitly) placed in an autogroup
+(i.e., the same session, as created by
+.BR setsid (2))
+form a task group.
+Each new autogroup is thus a separate task group.
+The root task group is the parent of all such autogroups.
+.IP \[bu]
+If autogrouping is enabled, then the root task group consists of
+all processes in the root CPU cgroup that were not
+otherwise implicitly placed into a new autogroup.
+.IP \[bu]
+If autogrouping is disabled, then the root task group consists of
+all processes in the root CPU cgroup.
+.IP \[bu]
+If group scheduling was disabled (i.e., the kernel was configured without
+.BR CONFIG_FAIR_GROUP_SCHED ),
+then all of the processes on the system are notionally placed
+in a single task group.
+.PP
+Under group scheduling,
+a thread's nice value has an effect for scheduling decisions
+.IR "only relative to other threads in the same task group" .
+This has some surprising consequences in terms of the traditional semantics
+of the nice value on UNIX systems.
+In particular, if autogrouping
+is enabled (which is the default in various distributions), then employing
+.BR setpriority (2)
+or
+.BR nice (1)
+on a process has an effect only for scheduling relative
+to other processes executed in the same session
+(typically: the same terminal window).
+.PP
+Conversely, for two processes that are (for example)
+the sole CPU-bound processes in different sessions
+(e.g., different terminal windows,
+each of whose jobs are tied to different autogroups),
+.I modifying the nice value of the process in one of the sessions
+.I has no effect
+in terms of the scheduler's decisions relative to the
+process in the other session.
+.\" More succinctly: the nice(1) command is in many cases a no-op since
+.\" Linux 2.6.38.
+.\"
+A possibly useful workaround here is to use a command such as
+the following to modify the autogroup nice value for
+.I all
+of the processes in a terminal session:
+.PP
+.in +4n
+.EX
+$ \fBecho 10 > /proc/self/autogroup\fP
+.EE
+.in
+.SS Real-time features in the mainline Linux kernel
+.\" FIXME . Probably this text will need some minor tweaking
+.\" ask Carsten Emde about this.
+Since Linux 2.6.18, Linux is gradually
+becoming equipped with real-time capabilities,
+most of which are derived from the former
+.I realtime\-preempt
+patch set.
+Until the patches have been completely merged into the
+mainline kernel,
+they must be installed to achieve the best real-time performance.
+These patches are named:
+.PP
+.in +4n
+.EX
+patch\-\fIkernelversion\fP\-rt\fIpatchversion\fP
+.EE
+.in
+.PP
+and can be downloaded from
+.UR http://www.kernel.org\:/pub\:/linux\:/kernel\:/projects\:/rt/
+.UE .
+.PP
+Without the patches and prior to their full inclusion into the mainline
+kernel, the kernel configuration offers only the three preemption classes
+.BR CONFIG_PREEMPT_NONE ,
+.BR CONFIG_PREEMPT_VOLUNTARY ,
+and
+.B CONFIG_PREEMPT_DESKTOP
+which respectively provide no, some, and considerable
+reduction of the worst-case scheduling latency.
+.PP
+With the patches applied or after their full inclusion into the mainline
+kernel, the additional configuration item
+.B CONFIG_PREEMPT_RT
+becomes available.
+If this is selected, Linux is transformed into a regular
+real-time operating system.
+The FIFO and RR scheduling policies are then used to run a thread
+with true real-time priority and a minimum worst-case scheduling latency.
+.SH NOTES
+The
+.BR cgroups (7)
+CPU controller can be used to limit the CPU consumption of
+groups of processes.
+.PP
+Originally, Standard Linux was intended as a general-purpose operating
+system being able to handle background processes, interactive
+applications, and less demanding real-time applications (applications that
+need to usually meet timing deadlines).
+Although the Linux 2.6
+allowed for kernel preemption and the newly introduced O(1) scheduler
+ensures that the time needed to schedule is fixed and deterministic
+irrespective of the number of active tasks, true real-time computing
+was not possible up to Linux 2.6.17.
+.SH SEE ALSO
+.ad l
+.nh
+.BR chcpu (1),
+.BR chrt (1),
+.BR lscpu (1),
+.BR ps (1),
+.BR taskset (1),
+.BR top (1),
+.BR getpriority (2),
+.BR mlock (2),
+.BR mlockall (2),
+.BR munlock (2),
+.BR munlockall (2),
+.BR nice (2),
+.BR sched_get_priority_max (2),
+.BR sched_get_priority_min (2),
+.BR sched_getaffinity (2),
+.BR sched_getparam (2),
+.BR sched_getscheduler (2),
+.BR sched_rr_get_interval (2),
+.BR sched_setaffinity (2),
+.BR sched_setparam (2),
+.BR sched_setscheduler (2),
+.BR sched_yield (2),
+.BR setpriority (2),
+.BR pthread_getaffinity_np (3),
+.BR pthread_getschedparam (3),
+.BR pthread_setaffinity_np (3),
+.BR sched_getcpu (3),
+.BR capabilities (7),
+.BR cpuset (7)
+.ad
+.PP
+.I Programming for the real world \- POSIX.4
+by Bill O.\& Gallmeister, O'Reilly & Associates, Inc., ISBN 1-56592-074-0.
+.PP
+The Linux kernel source files
+.IR \%Documentation/\:scheduler/\:sched\-deadline\:.txt ,
+.IR \%Documentation/\:scheduler/\:sched\-rt\-group\:.txt ,
+.IR \%Documentation/\:scheduler/\:sched\-design\-CFS\:.txt ,
+and
+.I \%Documentation/\:scheduler/\:sched\-nice\-design\:.txt
diff --git a/man7/sem_overview.7 b/man7/sem_overview.7
new file mode 100644
index 0000000..c452a1c
--- /dev/null
+++ b/man7/sem_overview.7
@@ -0,0 +1,139 @@
+.\" Copyright (C) 2006 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH sem_overview 7 2022-12-04 "Linux man-pages 6.05.01"
+.SH NAME
+sem_overview \- overview of POSIX semaphores
+.SH DESCRIPTION
+POSIX semaphores allow processes and threads to synchronize their actions.
+.PP
+A semaphore is an integer whose value is never allowed to fall below zero.
+Two operations can be performed on semaphores:
+increment the semaphore value by one
+.RB ( sem_post (3));
+and decrement the semaphore value by one
+.RB ( sem_wait (3)).
+If the value of a semaphore is currently zero, then a
+.BR sem_wait (3)
+operation will block until the value becomes greater than zero.
+.PP
+POSIX semaphores come in two forms: named semaphores and
+unnamed semaphores.
+.TP
+.B Named semaphores
+A named semaphore is identified by a name of the form
+.IR /somename ;
+that is, a null-terminated string of up to
+.BI NAME_MAX \-4
+(i.e., 251) characters consisting of an initial slash,
+.\" glibc allows the initial slash to be omitted, and makes
+.\" multiple initial slashes equivalent to a single slash.
+.\" This differs from the implementation of POSIX message queues.
+followed by one or more characters, none of which are slashes.
+.\" glibc allows subdirectory components in the name, in which
+.\" case the subdirectory tree must exist under /dev/shm, and
+.\" the fist subdirectory component must exist as the name
+.\" sem.name, and all of the subdirectory components must allow the
+.\" required permissions if a user wants to create a semaphore
+.\" object in a subdirectory.
+Two processes can operate on the same named semaphore by passing
+the same name to
+.BR sem_open (3).
+.IP
+The
+.BR sem_open (3)
+function creates a new named semaphore or opens an existing
+named semaphore.
+After the semaphore has been opened, it can be operated on using
+.BR sem_post (3)
+and
+.BR sem_wait (3).
+When a process has finished using the semaphore, it can use
+.BR sem_close (3)
+to close the semaphore.
+When all processes have finished using the semaphore,
+it can be removed from the system using
+.BR sem_unlink (3).
+.TP
+.B Unnamed semaphores (memory-based semaphores)
+An unnamed semaphore does not have a name.
+Instead the semaphore is placed in a region of memory that
+is shared between multiple threads (a
+.IR "thread-shared semaphore" )
+or processes (a
+.IR "process-shared semaphore" ).
+A thread-shared semaphore is placed in an area of memory shared
+between the threads of a process, for example, a global variable.
+A process-shared semaphore must be placed in a shared memory region
+(e.g., a System V shared memory segment created using
+.BR shmget (2),
+or a POSIX shared memory object built created using
+.BR shm_open (3)).
+.IP
+Before being used, an unnamed semaphore must be initialized using
+.BR sem_init (3).
+It can then be operated on using
+.BR sem_post (3)
+and
+.BR sem_wait (3).
+When the semaphore is no longer required,
+and before the memory in which it is located is deallocated,
+the semaphore should be destroyed using
+.BR sem_destroy (3).
+.PP
+The remainder of this section describes some specific details
+of the Linux implementation of POSIX semaphores.
+.SS Versions
+Before Linux 2.6, Linux supported only unnamed,
+thread-shared semaphores.
+On a system with Linux 2.6 and a glibc that provides the NPTL
+threading implementation,
+a complete implementation of POSIX semaphores is provided.
+.SS Persistence
+POSIX named semaphores have kernel persistence:
+if not removed by
+.BR sem_unlink (3),
+a semaphore will exist until the system is shut down.
+.SS Linking
+Programs using the POSIX semaphores API must be compiled with
+.I cc \-pthread
+to link against the real-time library,
+.IR librt .
+.SS Accessing named semaphores via the filesystem
+On Linux, named semaphores are created in a virtual filesystem,
+normally mounted under
+.IR /dev/shm ,
+with names of the form
+.IR \fBsem.\fPsomename .
+(This is the reason that semaphore names are limited to
+.BI NAME_MAX \-4
+rather than
+.B NAME_MAX
+characters.)
+.PP
+Since Linux 2.6.19, ACLs can be placed on files under this directory,
+to control object permissions on a per-user and per-group basis.
+.SH NOTES
+System V semaphores
+.RB ( semget (2),
+.BR semop (2),
+etc.) are an older semaphore API.
+POSIX semaphores provide a simpler, and better designed interface than
+System V semaphores;
+on the other hand POSIX semaphores are less widely available
+(especially on older systems) than System V semaphores.
+.SH EXAMPLES
+An example of the use of various POSIX semaphore functions is shown in
+.BR sem_wait (3).
+.SH SEE ALSO
+.BR sem_close (3),
+.BR sem_destroy (3),
+.BR sem_getvalue (3),
+.BR sem_init (3),
+.BR sem_open (3),
+.BR sem_post (3),
+.BR sem_unlink (3),
+.BR sem_wait (3),
+.BR pthreads (7),
+.BR shm_overview (7)
diff --git a/man7/session-keyring.7 b/man7/session-keyring.7
new file mode 100644
index 0000000..d8a950f
--- /dev/null
+++ b/man7/session-keyring.7
@@ -0,0 +1,113 @@
+.\" Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH session-keyring 7 2023-03-12 "Linux man-pages 6.05.01"
+.SH NAME
+session-keyring \- session shared process keyring
+.SH DESCRIPTION
+The session keyring is a keyring used to anchor keys on behalf of a process.
+It is typically created by
+.BR pam_keyinit (8)
+when a user logs in and a link will be added that refers to the
+.BR user\-keyring (7).
+Optionally,
+.BR PAM (7)
+may revoke the session keyring on logout.
+(In typical configurations, PAM does do this revocation.)
+The session keyring has the name (description)
+.IR _ses .
+.PP
+A special serial number value,
+.BR KEY_SPEC_SESSION_KEYRING ,
+is defined that can be used in lieu of the actual serial number of
+the calling process's session keyring.
+.PP
+From the
+.BR keyctl (1)
+utility, '\fB@s\fP' can be used instead of a numeric key ID in
+much the same way.
+.PP
+A process's session keyring is inherited across
+.BR clone (2),
+.BR fork (2),
+and
+.BR vfork (2).
+The session keyring
+is preserved across
+.BR execve (2),
+even when the executable is set-user-ID or set-group-ID or has capabilities.
+The session keyring is destroyed when the last process that
+refers to it exits.
+.PP
+If a process doesn't have a session keyring when it is accessed, then,
+under certain circumstances, the
+.BR user\-session\-keyring (7)
+will be attached as the session keyring
+and under others a new session keyring will be created.
+(See
+.BR user\-session\-keyring (7)
+for further details.)
+.SS Special operations
+The
+.I keyutils
+library provides the following special operations for manipulating
+session keyrings:
+.TP
+.BR keyctl_join_session_keyring (3)
+This operation allows the caller to change the session keyring
+that it subscribes to.
+The caller can join an existing keyring with a specified name (description),
+create a new keyring with a given name,
+or ask the kernel to create a new "anonymous"
+session keyring with the name "_ses".
+(This function is an interface to the
+.BR keyctl (2)
+.B KEYCTL_JOIN_SESSION_KEYRING
+operation.)
+.TP
+.BR keyctl_session_to_parent (3)
+This operation allows the caller to make the parent process's
+session keyring to the same as its own.
+For this to succeed, the parent process must have
+identical security attributes and must be single threaded.
+(This function is an interface to the
+.BR keyctl (2)
+.B KEYCTL_SESSION_TO_PARENT
+operation.)
+.PP
+These operations are also exposed through the
+.BR keyctl (1)
+utility as:
+.PP
+.in +4n
+.EX
+keyctl session
+keyctl session \- [<prog> <arg1> <arg2> ...]
+keyctl session <name> [<prog> <arg1> <arg2> ...]
+.EE
+.in
+.PP
+and:
+.PP
+.in +4n
+.EX
+keyctl new_session
+.EE
+.in
+.SH SEE ALSO
+.ad l
+.nh
+.BR keyctl (1),
+.BR keyctl (3),
+.BR keyctl_join_session_keyring (3),
+.BR keyctl_session_to_parent (3),
+.BR keyrings (7),
+.BR PAM (7),
+.BR persistent\-keyring (7),
+.BR process\-keyring (7),
+.BR thread\-keyring (7),
+.BR user\-keyring (7),
+.BR user\-session\-keyring (7),
+.BR pam_keyinit (8)
diff --git a/man7/shm_overview.7 b/man7/shm_overview.7
new file mode 100644
index 0000000..9a33ea0
--- /dev/null
+++ b/man7/shm_overview.7
@@ -0,0 +1,104 @@
+.\" Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
+.\" <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH shm_overview 7 2022-12-04 "Linux man-pages 6.05.01"
+.SH NAME
+shm_overview \- overview of POSIX shared memory
+.SH DESCRIPTION
+The POSIX shared memory API allows processes to communicate information
+by sharing a region of memory.
+.PP
+The interfaces employed in the API are:
+.TP 15
+.BR shm_open (3)
+Create and open a new object, or open an existing object.
+This is analogous to
+.BR open (2).
+The call returns a file descriptor for use by the other
+interfaces listed below.
+.TP
+.BR ftruncate (2)
+Set the size of the shared memory object.
+(A newly created shared memory object has a length of zero.)
+.TP
+.BR mmap (2)
+Map the shared memory object into the virtual address space
+of the calling process.
+.TP
+.BR munmap (2)
+Unmap the shared memory object from the virtual address space
+of the calling process.
+.TP
+.BR shm_unlink (3)
+Remove a shared memory object name.
+.TP
+.BR close (2)
+Close the file descriptor allocated by
+.BR shm_open (3)
+when it is no longer needed.
+.TP
+.BR fstat (2)
+Obtain a
+.I stat
+structure that describes the shared memory object.
+Among the information returned by this call are the object's
+size
+.RI ( st_size ),
+permissions
+.RI ( st_mode ),
+owner
+.RI ( st_uid ),
+and group
+.RI ( st_gid ).
+.TP
+.BR fchown (2)
+To change the ownership of a shared memory object.
+.TP
+.BR fchmod (2)
+To change the permissions of a shared memory object.
+.SS Versions
+POSIX shared memory is supported since Linux 2.4 and glibc 2.2.
+.SS Persistence
+POSIX shared memory objects have kernel persistence:
+a shared memory object will exist until the system is shut down,
+or until all processes have unmapped the object and it has been deleted with
+.BR shm_unlink (3)
+.SS Linking
+Programs using the POSIX shared memory API must be compiled with
+.I cc \-lrt
+to link against the real-time library,
+.IR librt .
+.SS Accessing shared memory objects via the filesystem
+On Linux, shared memory objects are created in a
+.RB ( tmpfs (5))
+virtual filesystem, normally mounted under
+.IR /dev/shm .
+Since Linux 2.6.19, Linux supports the use of access control lists (ACLs)
+to control the permissions of objects in the virtual filesystem.
+.SH NOTES
+Typically, processes must synchronize their access to a shared
+memory object, using, for example, POSIX semaphores.
+.PP
+System V shared memory
+.RB ( shmget (2),
+.BR shmop (2),
+etc.) is an older shared memory API.
+POSIX shared memory provides a simpler, and better designed interface;
+on the other hand POSIX shared memory is somewhat less widely available
+(especially on older systems) than System V shared memory.
+.SH SEE ALSO
+.BR fchmod (2),
+.BR fchown (2),
+.BR fstat (2),
+.BR ftruncate (2),
+.BR memfd_create (2),
+.BR mmap (2),
+.BR mprotect (2),
+.BR munmap (2),
+.BR shmget (2),
+.BR shmop (2),
+.BR shm_open (3),
+.BR shm_unlink (3),
+.BR sem_overview (7)
diff --git a/man7/sigevent.7 b/man7/sigevent.7
new file mode 100644
index 0000000..1ae860f
--- /dev/null
+++ b/man7/sigevent.7
@@ -0,0 +1,120 @@
+.\" Copyright (C) 2006, 2010 Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Copyright (C) 2009 Petr Baudis <pasky@suse.cz>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH sigevent 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+sigevent \- structure for notification from asynchronous routines
+.SH SYNOPSIS
+.nf
+#include <signal.h>
+.PP
+union sigval { /* Data passed with notification */
+ int sival_int; /* Integer value */
+ void *sival_ptr; /* Pointer value */
+};
+.PP
+struct sigevent {
+ int sigev_notify; /* Notification method */
+ int sigev_signo; /* Notification signal */
+ union sigval sigev_value;
+ /* Data passed with notification */
+ void (*sigev_notify_function)(union sigval);
+ /* Function used for thread
+ notification (SIGEV_THREAD) */
+ void *sigev_notify_attributes;
+ /* Attributes for notification thread
+ (SIGEV_THREAD) */
+ pid_t sigev_notify_thread_id;
+ /* ID of thread to signal
+ (SIGEV_THREAD_ID); Linux-specific */
+};
+.fi
+.SH DESCRIPTION
+The
+.I sigevent
+structure is used by various APIs
+to describe the way a process is to be notified about an event
+(e.g., completion of an asynchronous request, expiration of a timer,
+or the arrival of a message).
+.PP
+The definition shown in the SYNOPSIS is approximate:
+some of the fields in the
+.I sigevent
+structure may be defined as part of a union.
+Programs should employ only those fields relevant
+to the value specified in
+.IR sigev_notify .
+.PP
+The
+.I sigev_notify
+field specifies how notification is to be performed.
+This field can have one of the following values:
+.TP
+.B SIGEV_NONE
+A "null" notification: don't do anything when the event occurs.
+.TP
+.B SIGEV_SIGNAL
+Notify the process by sending the signal specified in
+.IR sigev_signo .
+.IP
+If the signal is caught with a signal handler that was registered using the
+.BR sigaction (2)
+.B SA_SIGINFO
+flag, then the following fields are set in the
+.I siginfo_t
+structure that is passed as the second argument of the handler:
+.RS
+.TP 10
+.I si_code
+This field is set to a value that depends on the API
+delivering the notification.
+.TP
+.I si_signo
+This field is set to the signal number (i.e., the same value as in
+.IR sigev_signo ).
+.TP
+.I si_value
+This field is set to the value specified in
+.IR sigev_value .
+.RE
+.IP
+Depending on the API, other fields may also be set in the
+.I siginfo_t
+structure.
+.IP
+The same information is also available if the signal is accepted using
+.BR sigwaitinfo (2).
+.TP
+.B SIGEV_THREAD
+Notify the process by invoking
+.I sigev_notify_function
+"as if" it were the start function of a new thread.
+(Among the implementation possibilities here are that
+each timer notification could result in the creation of a new thread,
+or that a single thread is created to receive all notifications.)
+The function is invoked with
+.I sigev_value
+as its sole argument.
+If
+.I sigev_notify_attributes
+is not NULL, it should point to a
+.I pthread_attr_t
+structure that defines attributes for the new thread (see
+.BR pthread_attr_init (3)).
+.TP
+.BR SIGEV_THREAD_ID " (Linux-specific)"
+.\" | SIGEV_SIGNAL vs not?
+Currently used only by POSIX timers; see
+.BR timer_create (2).
+.SH SEE ALSO
+.BR timer_create (2),
+.BR aio_fsync (3),
+.BR aio_read (3),
+.BR aio_write (3),
+.BR getaddrinfo_a (3),
+.BR lio_listio (3),
+.BR mq_notify (3),
+.BR aio (7),
+.BR pthreads (7)
diff --git a/man7/signal-safety.7 b/man7/signal-safety.7
new file mode 100644
index 0000000..4bcb478
--- /dev/null
+++ b/man7/signal-safety.7
@@ -0,0 +1,341 @@
+'\" t
+.\" Copyright (c) 2016 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH signal-safety 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+signal-safety \- async-signal-safe functions
+.SH DESCRIPTION
+An
+.I async-signal-safe
+function is one that can be safely called from within a signal handler.
+Many functions are
+.I not
+async-signal-safe.
+In particular,
+nonreentrant functions are generally unsafe to call from a signal handler.
+.PP
+The kinds of issues that render a function
+unsafe can be quickly understood when one considers
+the implementation of the
+.I stdio
+library, all of whose functions are not async-signal-safe.
+.PP
+When performing buffered I/O on a file, the
+.I stdio
+functions must maintain a statically allocated data buffer
+along with associated counters and indexes (or pointers)
+that record the amount of data and the current position in the buffer.
+Suppose that the main program is in the middle of a call to a
+.I stdio
+function such as
+.BR printf (3)
+where the buffer and associated variables have been partially updated.
+If, at that moment,
+the program is interrupted by a signal handler that also calls
+.BR printf (3),
+then the second call to
+.BR printf (3)
+will operate on inconsistent data, with unpredictable results.
+.PP
+To avoid problems with unsafe functions, there are two possible choices:
+.IP (a) 5
+Ensure that
+(1) the signal handler calls only async-signal-safe functions,
+and
+(2) the signal handler itself is reentrant
+with respect to global variables in the main program.
+.IP (b)
+Block signal delivery in the main program when calling functions
+that are unsafe or operating on global data that is also accessed
+by the signal handler.
+.PP
+Generally, the second choice is difficult in programs of any complexity,
+so the first choice is taken.
+.PP
+POSIX.1 specifies a set of functions that an implementation
+must make async-signal-safe.
+(An implementation may provide safe implementations of additional functions,
+but this is not required by the standard and other implementations
+may not provide the same guarantees.)
+.PP
+In general, a function is async-signal-safe either because it is reentrant
+or because it is atomic with respect to signals
+(i.e., its execution can't be interrupted by a signal handler).
+.PP
+The set of functions required to be async-signal-safe by POSIX.1
+is shown in the following table.
+The functions not otherwise noted were required to be async-signal-safe
+in POSIX.1-2001;
+the table details changes in the subsequent standards.
+.PP
+.TS
+lb lb
+l l.
+Function Notes
+\fBabort\fP(3) Added in POSIX.1-2001 TC1
+\fBaccept\fP(2)
+\fBaccess\fP(2)
+\fBaio_error\fP(3)
+\fBaio_return\fP(3)
+\fBaio_suspend\fP(3) See notes below
+\fBalarm\fP(2)
+\fBbind\fP(2)
+\fBcfgetispeed\fP(3)
+\fBcfgetospeed\fP(3)
+\fBcfsetispeed\fP(3)
+\fBcfsetospeed\fP(3)
+\fBchdir\fP(2)
+\fBchmod\fP(2)
+\fBchown\fP(2)
+\fBclock_gettime\fP(2)
+\fBclose\fP(2)
+\fBconnect\fP(2)
+\fBcreat\fP(2)
+\fBdup\fP(2)
+\fBdup2\fP(2)
+\fBexecl\fP(3) T{
+Added in POSIX.1-2008; see notes below
+T}
+\fBexecle\fP(3) See notes below
+\fBexecv\fP(3) Added in POSIX.1-2008
+\fBexecve\fP(2)
+\fB_exit\fP(2)
+\fB_Exit\fP(2)
+\fBfaccessat\fP(2) Added in POSIX.1-2008
+\fBfchdir\fP(2) Added in POSIX.1-2008 TC1
+\fBfchmod\fP(2)
+\fBfchmodat\fP(2) Added in POSIX.1-2008
+\fBfchown\fP(2)
+\fBfchownat\fP(2) Added in POSIX.1-2008
+\fBfcntl\fP(2)
+\fBfdatasync\fP(2)
+\fBfexecve\fP(3) Added in POSIX.1-2008
+\fBffs\fP(3) Added in POSIX.1-2008 TC2
+\fBfork\fP(2) See notes below
+\fBfstat\fP(2)
+\fBfstatat\fP(2) Added in POSIX.1-2008
+\fBfsync\fP(2)
+\fBftruncate\fP(2)
+\fBfutimens\fP(3) Added in POSIX.1-2008
+\fBgetegid\fP(2)
+\fBgeteuid\fP(2)
+\fBgetgid\fP(2)
+\fBgetgroups\fP(2)
+\fBgetpeername\fP(2)
+\fBgetpgrp\fP(2)
+\fBgetpid\fP(2)
+\fBgetppid\fP(2)
+\fBgetsockname\fP(2)
+\fBgetsockopt\fP(2)
+\fBgetuid\fP(2)
+\fBhtonl\fP(3) Added in POSIX.1-2008 TC2
+\fBhtons\fP(3) Added in POSIX.1-2008 TC2
+\fBkill\fP(2)
+\fBlink\fP(2)
+\fBlinkat\fP(2) Added in POSIX.1-2008
+\fBlisten\fP(2)
+\fBlongjmp\fP(3) T{
+Added in POSIX.1-2008 TC2; see notes below
+T}
+\fBlseek\fP(2)
+\fBlstat\fP(2)
+\fBmemccpy\fP(3) Added in POSIX.1-2008 TC2
+\fBmemchr\fP(3) Added in POSIX.1-2008 TC2
+\fBmemcmp\fP(3) Added in POSIX.1-2008 TC2
+\fBmemcpy\fP(3) Added in POSIX.1-2008 TC2
+\fBmemmove\fP(3) Added in POSIX.1-2008 TC2
+\fBmemset\fP(3) Added in POSIX.1-2008 TC2
+\fBmkdir\fP(2)
+\fBmkdirat\fP(2) Added in POSIX.1-2008
+\fBmkfifo\fP(3)
+\fBmkfifoat\fP(3) Added in POSIX.1-2008
+\fBmknod\fP(2) Added in POSIX.1-2008
+\fBmknodat\fP(2) Added in POSIX.1-2008
+\fBntohl\fP(3) Added in POSIX.1-2008 TC2
+\fBntohs\fP(3) Added in POSIX.1-2008 TC2
+\fBopen\fP(2)
+\fBopenat\fP(2) Added in POSIX.1-2008
+\fBpause\fP(2)
+\fBpipe\fP(2)
+\fBpoll\fP(2)
+\fBposix_trace_event\fP(3)
+\fBpselect\fP(2)
+\fBpthread_kill\fP(3) Added in POSIX.1-2008 TC1
+\fBpthread_self\fP(3) Added in POSIX.1-2008 TC1
+\fBpthread_sigmask\fP(3) Added in POSIX.1-2008 TC1
+\fBraise\fP(3)
+\fBread\fP(2)
+\fBreadlink\fP(2)
+\fBreadlinkat\fP(2) Added in POSIX.1-2008
+\fBrecv\fP(2)
+\fBrecvfrom\fP(2)
+\fBrecvmsg\fP(2)
+\fBrename\fP(2)
+\fBrenameat\fP(2) Added in POSIX.1-2008
+\fBrmdir\fP(2)
+\fBselect\fP(2)
+\fBsem_post\fP(3)
+\fBsend\fP(2)
+\fBsendmsg\fP(2)
+\fBsendto\fP(2)
+\fBsetgid\fP(2)
+\fBsetpgid\fP(2)
+\fBsetsid\fP(2)
+\fBsetsockopt\fP(2)
+\fBsetuid\fP(2)
+\fBshutdown\fP(2)
+\fBsigaction\fP(2)
+\fBsigaddset\fP(3)
+\fBsigdelset\fP(3)
+\fBsigemptyset\fP(3)
+\fBsigfillset\fP(3)
+\fBsigismember\fP(3)
+\fBsiglongjmp\fP(3) T{
+Added in POSIX.1-2008 TC2; see notes below
+T}
+\fBsignal\fP(2)
+\fBsigpause\fP(3)
+\fBsigpending\fP(2)
+\fBsigprocmask\fP(2)
+\fBsigqueue\fP(2)
+\fBsigset\fP(3)
+\fBsigsuspend\fP(2)
+\fBsleep\fP(3)
+\fBsockatmark\fP(3) Added in POSIX.1-2001 TC2
+\fBsocket\fP(2)
+\fBsocketpair\fP(2)
+\fBstat\fP(2)
+\fBstpcpy\fP(3) Added in POSIX.1-2008 TC2
+\fBstpncpy\fP(3) Added in POSIX.1-2008 TC2
+\fBstrcat\fP(3) Added in POSIX.1-2008 TC2
+\fBstrchr\fP(3) Added in POSIX.1-2008 TC2
+\fBstrcmp\fP(3) Added in POSIX.1-2008 TC2
+\fBstrcpy\fP(3) Added in POSIX.1-2008 TC2
+\fBstrcspn\fP(3) Added in POSIX.1-2008 TC2
+\fBstrlen\fP(3) Added in POSIX.1-2008 TC2
+\fBstrncat\fP(3) Added in POSIX.1-2008 TC2
+\fBstrncmp\fP(3) Added in POSIX.1-2008 TC2
+\fBstrncpy\fP(3) Added in POSIX.1-2008 TC2
+\fBstrnlen\fP(3) Added in POSIX.1-2008 TC2
+\fBstrpbrk\fP(3) Added in POSIX.1-2008 TC2
+\fBstrrchr\fP(3) Added in POSIX.1-2008 TC2
+\fBstrspn\fP(3) Added in POSIX.1-2008 TC2
+\fBstrstr\fP(3) Added in POSIX.1-2008 TC2
+\fBstrtok_r\fP(3) Added in POSIX.1-2008 TC2
+\fBsymlink\fP(2)
+\fBsymlinkat\fP(2) Added in POSIX.1-2008
+\fBtcdrain\fP(3)
+\fBtcflow\fP(3)
+\fBtcflush\fP(3)
+\fBtcgetattr\fP(3)
+\fBtcgetpgrp\fP(3)
+\fBtcsendbreak\fP(3)
+\fBtcsetattr\fP(3)
+\fBtcsetpgrp\fP(3)
+\fBtime\fP(2)
+\fBtimer_getoverrun\fP(2)
+\fBtimer_gettime\fP(2)
+\fBtimer_settime\fP(2)
+\fBtimes\fP(2)
+\fBumask\fP(2)
+\fBuname\fP(2)
+\fBunlink\fP(2)
+\fBunlinkat\fP(2) Added in POSIX.1-2008
+\fButime\fP(2)
+\fButimensat\fP(2) Added in POSIX.1-2008
+\fButimes\fP(2) Added in POSIX.1-2008
+\fBwait\fP(2)
+\fBwaitpid\fP(2)
+\fBwcpcpy\fP(3) Added in POSIX.1-2008 TC2
+\fBwcpncpy\fP(3) Added in POSIX.1-2008 TC2
+\fBwcscat\fP(3) Added in POSIX.1-2008 TC2
+\fBwcschr\fP(3) Added in POSIX.1-2008 TC2
+\fBwcscmp\fP(3) Added in POSIX.1-2008 TC2
+\fBwcscpy\fP(3) Added in POSIX.1-2008 TC2
+\fBwcscspn\fP(3) Added in POSIX.1-2008 TC2
+\fBwcslen\fP(3) Added in POSIX.1-2008 TC2
+\fBwcsncat\fP(3) Added in POSIX.1-2008 TC2
+\fBwcsncmp\fP(3) Added in POSIX.1-2008 TC2
+\fBwcsncpy\fP(3) Added in POSIX.1-2008 TC2
+\fBwcsnlen\fP(3) Added in POSIX.1-2008 TC2
+\fBwcspbrk\fP(3) Added in POSIX.1-2008 TC2
+\fBwcsrchr\fP(3) Added in POSIX.1-2008 TC2
+\fBwcsspn\fP(3) Added in POSIX.1-2008 TC2
+\fBwcsstr\fP(3) Added in POSIX.1-2008 TC2
+\fBwcstok\fP(3) Added in POSIX.1-2008 TC2
+\fBwmemchr\fP(3) Added in POSIX.1-2008 TC2
+\fBwmemcmp\fP(3) Added in POSIX.1-2008 TC2
+\fBwmemcpy\fP(3) Added in POSIX.1-2008 TC2
+\fBwmemmove\fP(3) Added in POSIX.1-2008 TC2
+\fBwmemset\fP(3) Added in POSIX.1-2008 TC2
+\fBwrite\fP(2)
+.TE
+.PP
+Notes:
+.IP \[bu] 3
+POSIX.1-2001 and POSIX.1-2001 TC2 required the functions
+.BR fpathconf (3),
+.BR pathconf (3),
+and
+.BR sysconf (3)
+to be async-signal-safe, but this requirement was removed in POSIX.1-2008.
+.IP \[bu]
+If a signal handler interrupts the execution of an unsafe function,
+and the handler terminates via a call to
+.BR longjmp (3)
+or
+.BR siglongjmp (3)
+and the program subsequently calls an unsafe function,
+then the behavior of the program is undefined.
+.IP \[bu]
+POSIX.1-2001 TC1 clarified
+that if an application calls
+.BR fork (2)
+from a signal handler and any of the fork handlers registered by
+.BR pthread_atfork (3)
+calls a function that is not async-signal-safe, the behavior is undefined.
+A future revision of the standard
+.\" http://www.opengroup.org/austin/aardvark/latest/xshbug3.txt
+is likely to remove
+.BR fork (2)
+from the list of async-signal-safe functions.
+.\"
+.IP \[bu]
+Asynchronous signal handlers that call functions which are cancelation
+points and nest over regions of deferred cancelation may trigger
+cancelation whose behavior is as if asynchronous cancelation had
+occurred and may cause application state to become inconsistent.
+.\"
+.SS errno
+Fetching and setting the value of
+.I errno
+is async-signal-safe provided that the signal handler saves
+.I errno
+on entry and restores its value before returning.
+.\"
+.SS Deviations in the GNU C library
+The following known deviations from the standard occur in
+the GNU C library:
+.IP \[bu] 3
+Before glibc 2.24,
+.BR execl (3)
+and
+.BR execle (3)
+employed
+.BR realloc (3)
+internally and were consequently not async-signal-safe.
+.\" https://sourceware.org/bugzilla/show_bug.cgi?id=19534
+This was fixed in glibc 2.24.
+.IP \[bu]
+.\" FIXME . https://sourceware.org/bugzilla/show_bug.cgi?id=13172
+The glibc implementation of
+.BR aio_suspend (3)
+is not async-signal-safe because it uses
+.BR pthread_mutex_lock (3)
+internally.
+.SH SEE ALSO
+.BR sigaction (2),
+.BR signal (7),
+.BR standards (7)
diff --git a/man7/signal.7 b/man7/signal.7
new file mode 100644
index 0000000..6f6f9c9
--- /dev/null
+++ b/man7/signal.7
@@ -0,0 +1,1019 @@
+'\" t
+.\" Copyright (c) 1993 by Thomas Koenig (ig25@rz.uni-karlsruhe.de)
+.\" and Copyright (c) 2002, 2006, 2020 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" and Copyright (c) 2008 Linux Foundation, written by Michael Kerrisk
+.\" <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" Modified Sat Jul 24 17:34:08 1993 by Rik Faith (faith@cs.unc.edu)
+.\" Modified Sun Jan 7 01:41:27 1996 by Andries Brouwer (aeb@cwi.nl)
+.\" Modified Sun Apr 14 12:02:29 1996 by Andries Brouwer (aeb@cwi.nl)
+.\" Modified Sat Nov 13 16:28:23 1999 by Andries Brouwer (aeb@cwi.nl)
+.\" Modified 10 Apr 2002, by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Modified 7 Jun 2002, by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Added information on real-time signals
+.\" Modified 13 Jun 2002, by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Noted that SIGSTKFLT is in fact unused
+.\" 2004-12-03, Modified mtk, added notes on RLIMIT_SIGPENDING
+.\" 2006-04-24, mtk, Added text on changing signal dispositions,
+.\" signal mask, and pending signals.
+.\" 2008-07-04, mtk:
+.\" Added section on system call restarting (SA_RESTART)
+.\" Added section on stop/cont signals interrupting syscalls.
+.\" 2008-10-05, mtk: various additions
+.\"
+.TH signal 7 2023-04-03 "Linux man-pages 6.05.01"
+.SH NAME
+signal \- overview of signals
+.SH DESCRIPTION
+Linux supports both POSIX reliable signals (hereinafter
+"standard signals") and POSIX real-time signals.
+.SS Signal dispositions
+Each signal has a current
+.IR disposition ,
+which determines how the process behaves when it is delivered
+the signal.
+.PP
+The entries in the "Action" column of the table below specify
+the default disposition for each signal, as follows:
+.TP
+Term
+Default action is to terminate the process.
+.TP
+Ign
+Default action is to ignore the signal.
+.TP
+Core
+Default action is to terminate the process and dump core (see
+.BR core (5)).
+.TP
+Stop
+Default action is to stop the process.
+.TP
+Cont
+Default action is to continue the process if it is currently stopped.
+.PP
+A process can change the disposition of a signal using
+.BR sigaction (2)
+or
+.BR signal (2).
+(The latter is less portable when establishing a signal handler;
+see
+.BR signal (2)
+for details.)
+Using these system calls, a process can elect one of the
+following behaviors to occur on delivery of the signal:
+perform the default action; ignore the signal;
+or catch the signal with a
+.IR "signal handler" ,
+a programmer-defined function that is automatically invoked
+when the signal is delivered.
+.PP
+By default, a signal handler is invoked on the
+normal process stack.
+It is possible to arrange that the signal handler
+uses an alternate stack; see
+.BR sigaltstack (2)
+for a discussion of how to do this and when it might be useful.
+.PP
+The signal disposition is a per-process attribute:
+in a multithreaded application, the disposition of a
+particular signal is the same for all threads.
+.PP
+A child created via
+.BR fork (2)
+inherits a copy of its parent's signal dispositions.
+During an
+.BR execve (2),
+the dispositions of handled signals are reset to the default;
+the dispositions of ignored signals are left unchanged.
+.SS Sending a signal
+The following system calls and library functions allow
+the caller to send a signal:
+.TP
+.BR raise (3)
+Sends a signal to the calling thread.
+.TP
+.BR kill (2)
+Sends a signal to a specified process,
+to all members of a specified process group,
+or to all processes on the system.
+.TP
+.BR pidfd_send_signal (2)
+Sends a signal to a process identified by a PID file descriptor.
+.TP
+.BR killpg (3)
+Sends a signal to all of the members of a specified process group.
+.TP
+.BR pthread_kill (3)
+Sends a signal to a specified POSIX thread in the same process as
+the caller.
+.TP
+.BR tgkill (2)
+Sends a signal to a specified thread within a specific process.
+(This is the system call used to implement
+.BR pthread_kill (3).)
+.TP
+.BR sigqueue (3)
+Sends a real-time signal with accompanying data to a specified process.
+.SS Waiting for a signal to be caught
+The following system calls suspend execution of the calling
+thread until a signal is caught
+(or an unhandled signal terminates the process):
+.TP
+.BR pause (2)
+Suspends execution until any signal is caught.
+.TP
+.BR sigsuspend (2)
+Temporarily changes the signal mask (see below) and suspends
+execution until one of the unmasked signals is caught.
+.\"
+.SS Synchronously accepting a signal
+Rather than asynchronously catching a signal via a signal handler,
+it is possible to synchronously accept the signal, that is,
+to block execution until the signal is delivered,
+at which point the kernel returns information about the
+signal to the caller.
+There are two general ways to do this:
+.IP \[bu] 3
+.BR sigwaitinfo (2),
+.BR sigtimedwait (2),
+and
+.BR sigwait (3)
+suspend execution until one of the signals in a specified
+set is delivered.
+Each of these calls returns information about the delivered signal.
+.IP \[bu]
+.BR signalfd (2)
+returns a file descriptor that can be used to read information
+about signals that are delivered to the caller.
+Each
+.BR read (2)
+from this file descriptor blocks until one of the signals
+in the set specified in the
+.BR signalfd (2)
+call is delivered to the caller.
+The buffer returned by
+.BR read (2)
+contains a structure describing the signal.
+.SS Signal mask and pending signals
+A signal may be
+.IR blocked ,
+which means that it will not be delivered until it is later unblocked.
+Between the time when it is generated and when it is delivered
+a signal is said to be
+.IR pending .
+.PP
+Each thread in a process has an independent
+.IR "signal mask" ,
+which indicates the set of signals that the thread is currently blocking.
+A thread can manipulate its signal mask using
+.BR pthread_sigmask (3).
+In a traditional single-threaded application,
+.BR sigprocmask (2)
+can be used to manipulate the signal mask.
+.PP
+A child created via
+.BR fork (2)
+inherits a copy of its parent's signal mask;
+the signal mask is preserved across
+.BR execve (2).
+.PP
+A signal may be process-directed or thread-directed.
+A process-directed signal is one that is targeted at (and thus pending for)
+the process as a whole.
+A signal may be process-directed
+because it was generated by the kernel for reasons
+other than a hardware exception, or because it was sent using
+.BR kill (2)
+or
+.BR sigqueue (3).
+A thread-directed signal is one that is targeted at a specific thread.
+A signal may be thread-directed because it was generated as a consequence
+of executing a specific machine-language instruction
+that triggered a hardware exception (e.g.,
+.B SIGSEGV
+for an invalid memory access, or
+.B SIGFPE
+for a math error), or because it was
+targeted at a specific thread using
+interfaces such as
+.BR tgkill (2)
+or
+.BR pthread_kill (3).
+.PP
+A process-directed signal may be delivered to any one of the
+threads that does not currently have the signal blocked.
+.\" Joseph C. Sible notes:
+.\" On Linux, if the main thread has the signal unblocked, then the kernel
+.\" will always deliver the signal there, citing this kernel code
+.\"
+.\" Per this comment in kernel/signal.c since time immemorial:
+.\"
+.\" /*
+.\" * Now find a thread we can wake up to take the signal off the queue.
+.\" *
+.\" * If the main thread wants the signal, it gets first crack.
+.\" * Probably the least surprising to the average bear.
+.\" */
+.\"
+.\" But this does not mean the signal will be delivered only in the
+.\" main thread, since if a handler is already executing in the main thread
+.\" (and thus the signal is blocked in that thread), then a further
+.\" might be delivered in a different thread.
+.\"
+If more than one of the threads has the signal unblocked, then the
+kernel chooses an arbitrary thread to which to deliver the signal.
+.PP
+A thread can obtain the set of signals that it currently has pending
+using
+.BR sigpending (2).
+This set will consist of the union of the set of pending
+process-directed signals and the set of signals pending for
+the calling thread.
+.PP
+A child created via
+.BR fork (2)
+initially has an empty pending signal set;
+the pending signal set is preserved across an
+.BR execve (2).
+.\"
+.SS Execution of signal handlers
+Whenever there is a transition from kernel-mode to user-mode execution
+(e.g., on return from a system call or scheduling of a thread onto the CPU),
+the kernel checks whether there is a pending unblocked signal
+for which the process has established a signal handler.
+If there is such a pending signal, the following steps occur:
+.IP (1) 5
+The kernel performs the necessary preparatory steps for execution of
+the signal handler:
+.RS
+.IP (1.1) 7
+The signal is removed from the set of pending signals.
+.IP (1.2)
+If the signal handler was installed by a call to
+.BR sigaction (2)
+that specified the
+.B SA_ONSTACK
+flag and the thread has defined an alternate signal stack (using
+.BR sigaltstack (2)),
+then that stack is installed.
+.IP (1.3)
+Various pieces of signal-related context are saved
+into a special frame that is created on the stack.
+The saved information includes:
+.RS
+.IP \[bu] 3
+the program counter register
+(i.e., the address of the next instruction in the main program that
+should be executed when the signal handler returns);
+.IP \[bu]
+architecture-specific register state required for resuming the
+interrupted program;
+.IP \[bu]
+the thread's current signal mask;
+.IP \[bu]
+the thread's alternate signal stack settings.
+.RE
+.IP
+(If the signal handler was installed using the
+.BR sigaction (2)
+.B SA_SIGINFO
+flag, then the above information is accessible via the
+.I ucontext_t
+object that is pointed to by the third argument of the signal handler.)
+.IP (1.4)
+Any signals specified in
+.I act\->sa_mask
+when registering the handler with
+.BR sigprocmask (2)
+are added to the thread's signal mask.
+The signal being delivered is also
+added to the signal mask, unless
+.B SA_NODEFER
+was specified when registering the handler.
+These signals are thus blocked while the handler executes.
+.RE
+.IP (2)
+The kernel constructs a frame for the signal handler on the stack.
+The kernel sets the program counter for the thread to point to the first
+instruction of the signal handler function,
+and configures the return address for that function to point to a piece
+of user-space code known as the signal trampoline (described in
+.BR sigreturn (2)).
+.IP (3)
+The kernel passes control back to user-space, where execution
+commences at the start of the signal handler function.
+.IP (4)
+When the signal handler returns, control passes to the signal trampoline code.
+.IP (5)
+The signal trampoline calls
+.BR sigreturn (2),
+a system call that uses the information in the stack frame created in step 1
+to restore the thread to its state before the signal handler was
+called.
+The thread's signal mask and alternate signal stack settings
+are restored as part of this procedure.
+Upon completion of the call to
+.BR sigreturn (2),
+the kernel transfers control back to user space,
+and the thread recommences execution at the point where it was
+interrupted by the signal handler.
+.PP
+Note that if the signal handler does not return
+(e.g., control is transferred out of the handler using
+.BR siglongjmp (3),
+or the handler executes a new program with
+.BR execve (2)),
+then the final step is not performed.
+In particular, in such scenarios it is the programmer's responsibility
+to restore the state of the signal mask (using
+.BR sigprocmask (2)),
+if it is desired to unblock the signals that were blocked on entry
+to the signal handler.
+(Note that
+.BR siglongjmp (3)
+may or may not restore the signal mask, depending on the
+.I savesigs
+value that was specified in the corresponding call to
+.BR sigsetjmp (3).)
+.PP
+From the kernel's point of view,
+execution of the signal handler code is exactly the same as the execution
+of any other user-space code.
+That is to say, the kernel does not record any special state information
+indicating that the thread is currently executing inside a signal handler.
+All necessary state information is maintained in user-space registers
+and the user-space stack.
+The depth to which nested signal handlers may be invoked is thus
+limited only by the user-space stack (and sensible software design!).
+.\"
+.SS Standard signals
+Linux supports the standard signals listed below.
+The second column of the table indicates which standard (if any)
+specified the signal: "P1990" indicates that the signal is described
+in the original POSIX.1-1990 standard;
+"P2001" indicates that the signal was added in SUSv2 and POSIX.1-2001.
+.TS
+l c c l
+____
+lB c c l.
+Signal Standard Action Comment
+SIGABRT P1990 Core Abort signal from \fBabort\fP(3)
+SIGALRM P1990 Term Timer signal from \fBalarm\fP(2)
+SIGBUS P2001 Core Bus error (bad memory access)
+SIGCHLD P1990 Ign Child stopped or terminated
+SIGCLD \- Ign A synonym for \fBSIGCHLD\fP
+SIGCONT P1990 Cont Continue if stopped
+SIGEMT \- Term Emulator trap
+SIGFPE P1990 Core Floating-point exception
+SIGHUP P1990 Term Hangup detected on controlling terminal
+ or death of controlling process
+SIGILL P1990 Core Illegal Instruction
+SIGINFO \- A synonym for \fBSIGPWR\fP
+SIGINT P1990 Term Interrupt from keyboard
+SIGIO \- Term I/O now possible (4.2BSD)
+SIGIOT \- Core IOT trap. A synonym for \fBSIGABRT\fP
+SIGKILL P1990 Term Kill signal
+SIGLOST \- Term File lock lost (unused)
+SIGPIPE P1990 Term Broken pipe: write to pipe with no
+ readers; see \fBpipe\fP(7)
+SIGPOLL P2001 Term Pollable event (Sys V);
+ synonym for \fBSIGIO\fP
+SIGPROF P2001 Term Profiling timer expired
+SIGPWR \- Term Power failure (System V)
+SIGQUIT P1990 Core Quit from keyboard
+SIGSEGV P1990 Core Invalid memory reference
+SIGSTKFLT \- Term Stack fault on coprocessor (unused)
+SIGSTOP P1990 Stop Stop process
+SIGTSTP P1990 Stop Stop typed at terminal
+SIGSYS P2001 Core Bad system call (SVr4);
+ see also \fBseccomp\fP(2)
+SIGTERM P1990 Term Termination signal
+SIGTRAP P2001 Core Trace/breakpoint trap
+SIGTTIN P1990 Stop Terminal input for background process
+SIGTTOU P1990 Stop Terminal output for background process
+SIGUNUSED \- Core Synonymous with \fBSIGSYS\fP
+SIGURG P2001 Ign Urgent condition on socket (4.2BSD)
+SIGUSR1 P1990 Term User-defined signal 1
+SIGUSR2 P1990 Term User-defined signal 2
+SIGVTALRM P2001 Term Virtual alarm clock (4.2BSD)
+SIGXCPU P2001 Core CPU time limit exceeded (4.2BSD);
+ see \fBsetrlimit\fP(2)
+SIGXFSZ P2001 Core File size limit exceeded (4.2BSD);
+ see \fBsetrlimit\fP(2)
+SIGWINCH \- Ign Window resize signal (4.3BSD, Sun)
+.TE
+.PP
+The signals
+.B SIGKILL
+and
+.B SIGSTOP
+cannot be caught, blocked, or ignored.
+.PP
+Up to and including Linux 2.2, the default behavior for
+.BR SIGSYS ", " SIGXCPU ", " SIGXFSZ ,
+and (on architectures other than SPARC and MIPS)
+.B SIGBUS
+was to terminate the process (without a core dump).
+(On some other UNIX systems the default action for
+.BR SIGXCPU " and " SIGXFSZ
+is to terminate the process without a core dump.)
+Linux 2.4 conforms to the POSIX.1-2001 requirements for these signals,
+terminating the process with a core dump.
+.PP
+.B SIGEMT
+is not specified in POSIX.1-2001, but nevertheless appears
+on most other UNIX systems,
+where its default action is typically to terminate
+the process with a core dump.
+.PP
+.B SIGPWR
+(which is not specified in POSIX.1-2001) is typically ignored
+by default on those other UNIX systems where it appears.
+.PP
+.B SIGIO
+(which is not specified in POSIX.1-2001) is ignored by default
+on several other UNIX systems.
+.\"
+.SS Queueing and delivery semantics for standard signals
+If multiple standard signals are pending for a process,
+the order in which the signals are delivered is unspecified.
+.PP
+Standard signals do not queue.
+If multiple instances of a standard signal are generated while
+that signal is blocked,
+then only one instance of the signal is marked as pending
+(and the signal will be delivered just once when it is unblocked).
+In the case where a standard signal is already pending, the
+.I siginfo_t
+structure (see
+.BR sigaction (2))
+associated with that signal is not overwritten
+on arrival of subsequent instances of the same signal.
+Thus, the process will receive the information
+associated with the first instance of the signal.
+.\"
+.SS Signal numbering for standard signals
+The numeric value for each signal is given in the table below.
+As shown in the table, many signals have different numeric values
+on different architectures.
+The first numeric value in each table row shows the signal number
+on x86, ARM, and most other architectures;
+the second value is for Alpha and SPARC; the third is for MIPS;
+and the last is for PARISC.
+A dash (\-) denotes that a signal is absent on the corresponding architecture.
+.TS
+l c c c c l
+l c c c c l
+______
+lB c c c c l.
+Signal x86/ARM Alpha/ MIPS PARISC Notes
+ most others SPARC
+SIGHUP \01 \01 \01 \01
+SIGINT \02 \02 \02 \02
+SIGQUIT \03 \03 \03 \03
+SIGILL \04 \04 \04 \04
+SIGTRAP \05 \05 \05 \05
+SIGABRT \06 \06 \06 \06
+SIGIOT \06 \06 \06 \06
+SIGBUS \07 10 10 10
+SIGEMT \- \07 \07 -
+SIGFPE \08 \08 \08 \08
+SIGKILL \09 \09 \09 \09
+SIGUSR1 10 30 16 16
+SIGSEGV 11 11 11 11
+SIGUSR2 12 31 17 17
+SIGPIPE 13 13 13 13
+SIGALRM 14 14 14 14
+SIGTERM 15 15 15 15
+SIGSTKFLT 16 \- \- \07
+SIGCHLD 17 20 18 18
+SIGCLD \- \- 18 \-
+SIGCONT 18 19 25 26
+SIGSTOP 19 17 23 24
+SIGTSTP 20 18 24 25
+SIGTTIN 21 21 26 27
+SIGTTOU 22 22 27 28
+SIGURG 23 16 21 29
+SIGXCPU 24 24 30 12
+SIGXFSZ 25 25 31 30
+SIGVTALRM 26 26 28 20
+SIGPROF 27 27 29 21
+SIGWINCH 28 28 20 23
+SIGIO 29 23 22 22
+SIGPOLL Same as SIGIO
+SIGPWR 30 29/\- 19 19
+SIGINFO \- 29/\- \- \-
+SIGLOST \- \-/29 \- \-
+SIGSYS 31 12 12 31
+SIGUNUSED 31 \- \- 31
+.TE
+.PP
+Note the following:
+.IP \[bu] 3
+Where defined,
+.B SIGUNUSED
+is synonymous with
+.BR SIGSYS .
+Since glibc 2.26,
+.B SIGUNUSED
+is no longer defined on any architecture.
+.IP \[bu]
+Signal 29 is
+.BR SIGINFO / SIGPWR
+(synonyms for the same value) on Alpha but
+.B SIGLOST
+on SPARC.
+.\"
+.SS Real-time signals
+Starting with Linux 2.2,
+Linux supports real-time signals as originally defined in the POSIX.1b
+real-time extensions (and now included in POSIX.1-2001).
+The range of supported real-time signals is defined by the macros
+.B SIGRTMIN
+and
+.BR SIGRTMAX .
+POSIX.1-2001 requires that an implementation support at least
+.B _POSIX_RTSIG_MAX
+(8) real-time signals.
+.PP
+The Linux kernel supports a range of 33 different real-time
+signals, numbered 32 to 64.
+However, the glibc POSIX threads implementation internally uses
+two (for NPTL) or three (for LinuxThreads) real-time signals
+(see
+.BR pthreads (7)),
+and adjusts the value of
+.B SIGRTMIN
+suitably (to 34 or 35).
+Because the range of available real-time signals varies according
+to the glibc threading implementation (and this variation can occur
+at run time according to the available kernel and glibc),
+and indeed the range of real-time signals varies across UNIX systems,
+programs should
+.IR "never refer to real-time signals using hard-coded numbers" ,
+but instead should always refer to real-time signals using the notation
+.BR SIGRTMIN +n,
+and include suitable (run-time) checks that
+.BR SIGRTMIN +n
+does not exceed
+.BR SIGRTMAX .
+.PP
+Unlike standard signals, real-time signals have no predefined meanings:
+the entire set of real-time signals can be used for application-defined
+purposes.
+.PP
+The default action for an unhandled real-time signal is to terminate the
+receiving process.
+.PP
+Real-time signals are distinguished by the following:
+.IP \[bu] 3
+Multiple instances of real-time signals can be queued.
+By contrast, if multiple instances of a standard signal are delivered
+while that signal is currently blocked, then only one instance is queued.
+.IP \[bu]
+If the signal is sent using
+.BR sigqueue (3),
+an accompanying value (either an integer or a pointer) can be sent
+with the signal.
+If the receiving process establishes a handler for this signal using the
+.B SA_SIGINFO
+flag to
+.BR sigaction (2),
+then it can obtain this data via the
+.I si_value
+field of the
+.I siginfo_t
+structure passed as the second argument to the handler.
+Furthermore, the
+.I si_pid
+and
+.I si_uid
+fields of this structure can be used to obtain the PID
+and real user ID of the process sending the signal.
+.IP \[bu]
+Real-time signals are delivered in a guaranteed order.
+Multiple real-time signals of the same type are delivered in the order
+they were sent.
+If different real-time signals are sent to a process, they are delivered
+starting with the lowest-numbered signal.
+(I.e., low-numbered signals have highest priority.)
+By contrast, if multiple standard signals are pending for a process,
+the order in which they are delivered is unspecified.
+.PP
+If both standard and real-time signals are pending for a process,
+POSIX leaves it unspecified which is delivered first.
+Linux, like many other implementations, gives priority
+to standard signals in this case.
+.PP
+According to POSIX, an implementation should permit at least
+.B _POSIX_SIGQUEUE_MAX
+(32) real-time signals to be queued to
+a process.
+However, Linux does things differently.
+Up to and including Linux 2.6.7, Linux imposes
+a system-wide limit on the number of queued real-time signals
+for all processes.
+This limit can be viewed and (with privilege) changed via the
+.I /proc/sys/kernel/rtsig\-max
+file.
+A related file,
+.IR /proc/sys/kernel/rtsig\-nr ,
+can be used to find out how many real-time signals are currently queued.
+In Linux 2.6.8, these
+.I /proc
+interfaces were replaced by the
+.B RLIMIT_SIGPENDING
+resource limit, which specifies a per-user limit for queued
+signals; see
+.BR setrlimit (2)
+for further details.
+.PP
+The addition of real-time signals required the widening
+of the signal set structure
+.RI ( sigset_t )
+from 32 to 64 bits.
+Consequently, various system calls were superseded by new system calls
+that supported the larger signal sets.
+The old and new system calls are as follows:
+.TS
+lb lb
+l l.
+Linux 2.0 and earlier Linux 2.2 and later
+\fBsigaction\fP(2) \fBrt_sigaction\fP(2)
+\fBsigpending\fP(2) \fBrt_sigpending\fP(2)
+\fBsigprocmask\fP(2) \fBrt_sigprocmask\fP(2)
+\fBsigreturn\fP(2) \fBrt_sigreturn\fP(2)
+\fBsigsuspend\fP(2) \fBrt_sigsuspend\fP(2)
+\fBsigtimedwait\fP(2) \fBrt_sigtimedwait\fP(2)
+.TE
+.\"
+.SS Interruption of system calls and library functions by signal handlers
+If a signal handler is invoked while a system call or library
+function call is blocked, then either:
+.IP \[bu] 3
+the call is automatically restarted after the signal handler returns; or
+.IP \[bu]
+the call fails with the error
+.BR EINTR .
+.PP
+Which of these two behaviors occurs depends on the interface and
+whether or not the signal handler was established using the
+.B SA_RESTART
+flag (see
+.BR sigaction (2)).
+The details vary across UNIX systems;
+below, the details for Linux.
+.PP
+If a blocked call to one of the following interfaces is interrupted
+by a signal handler, then the call is automatically restarted
+after the signal handler returns if the
+.B SA_RESTART
+flag was used; otherwise the call fails with the error
+.BR EINTR :
+.\" The following system calls use ERESTARTSYS,
+.\" so that they are restartable
+.IP \[bu] 3
+.BR read (2),
+.BR readv (2),
+.BR write (2),
+.BR writev (2),
+and
+.BR ioctl (2)
+calls on "slow" devices.
+A "slow" device is one where the I/O call may block for an
+indefinite time, for example, a terminal, pipe, or socket.
+If an I/O call on a slow device has already transferred some
+data by the time it is interrupted by a signal handler,
+then the call will return a success status
+(normally, the number of bytes transferred).
+Note that a (local) disk is not a slow device according to this definition;
+I/O operations on disk devices are not interrupted by signals.
+.IP \[bu]
+.BR open (2),
+if it can block (e.g., when opening a FIFO; see
+.BR fifo (7)).
+.IP \[bu]
+.BR wait (2),
+.BR wait3 (2),
+.BR wait4 (2),
+.BR waitid (2),
+and
+.BR waitpid (2).
+.IP \[bu]
+Socket interfaces:
+.\" If a timeout (setsockopt()) is in effect on the socket, then these
+.\" system calls switch to using EINTR. Consequently, they and are not
+.\" automatically restarted, and they show the stop/cont behavior
+.\" described below. (Verified from Linux 2.6.26 source, and by experiment; mtk)
+.BR accept (2),
+.BR connect (2),
+.BR recv (2),
+.BR recvfrom (2),
+.BR recvmmsg (2),
+.BR recvmsg (2),
+.BR send (2),
+.BR sendto (2),
+and
+.BR sendmsg (2),
+.\" FIXME What about sendmmsg()?
+unless a timeout has been set on the socket (see below).
+.IP \[bu]
+File locking interfaces:
+.BR flock (2)
+and
+the
+.B F_SETLKW
+and
+.B F_OFD_SETLKW
+operations of
+.BR fcntl (2)
+.IP \[bu]
+POSIX message queue interfaces:
+.BR mq_receive (3),
+.BR mq_timedreceive (3),
+.BR mq_send (3),
+and
+.BR mq_timedsend (3).
+.IP \[bu]
+.BR futex (2)
+.B FUTEX_WAIT
+(since Linux 2.6.22;
+.\" commit 72c1bbf308c75a136803d2d76d0e18258be14c7a
+beforehand, always failed with
+.BR EINTR ).
+.IP \[bu]
+.BR getrandom (2).
+.IP \[bu]
+.BR pthread_mutex_lock (3),
+.BR pthread_cond_wait (3),
+and related APIs.
+.IP \[bu]
+.BR futex (2)
+.BR FUTEX_WAIT_BITSET .
+.IP \[bu]
+POSIX semaphore interfaces:
+.BR sem_wait (3)
+and
+.BR sem_timedwait (3)
+(since Linux 2.6.22;
+.\" as a consequence of the 2.6.22 changes in the futex() implementation
+beforehand, always failed with
+.BR EINTR ).
+.IP \[bu]
+.BR read (2)
+from an
+.BR inotify (7)
+file descriptor
+(since Linux 3.8;
+.\" commit 1ca39ab9d21ac93f94b9e3eb364ea9a5cf2aba06
+beforehand, always failed with
+.BR EINTR ).
+.PP
+The following interfaces are never restarted after
+being interrupted by a signal handler,
+regardless of the use of
+.BR SA_RESTART ;
+they always fail with the error
+.B EINTR
+when interrupted by a signal handler:
+.\" These are the system calls that give EINTR or ERESTARTNOHAND
+.\" on interruption by a signal handler.
+.IP \[bu] 3
+"Input" socket interfaces, when a timeout
+.RB ( SO_RCVTIMEO )
+has been set on the socket using
+.BR setsockopt (2):
+.BR accept (2),
+.BR recv (2),
+.BR recvfrom (2),
+.BR recvmmsg (2)
+(also with a non-NULL
+.I timeout
+argument),
+and
+.BR recvmsg (2).
+.IP \[bu]
+"Output" socket interfaces, when a timeout
+.RB ( SO_RCVTIMEO )
+has been set on the socket using
+.BR setsockopt (2):
+.BR connect (2),
+.BR send (2),
+.BR sendto (2),
+and
+.BR sendmsg (2).
+.\" FIXME What about sendmmsg()?
+.IP \[bu]
+Interfaces used to wait for signals:
+.BR pause (2),
+.BR sigsuspend (2),
+.BR sigtimedwait (2),
+and
+.BR sigwaitinfo (2).
+.IP \[bu]
+File descriptor multiplexing interfaces:
+.BR epoll_wait (2),
+.BR epoll_pwait (2),
+.BR poll (2),
+.BR ppoll (2),
+.BR select (2),
+and
+.BR pselect (2).
+.IP \[bu]
+System V IPC interfaces:
+.\" On some other systems, SA_RESTART does restart these system calls
+.BR msgrcv (2),
+.BR msgsnd (2),
+.BR semop (2),
+and
+.BR semtimedop (2).
+.IP \[bu]
+Sleep interfaces:
+.BR clock_nanosleep (2),
+.BR nanosleep (2),
+and
+.BR usleep (3).
+.IP \[bu]
+.BR io_getevents (2).
+.PP
+The
+.BR sleep (3)
+function is also never restarted if interrupted by a handler,
+but gives a success return: the number of seconds remaining to sleep.
+.PP
+In certain circumstances, the
+.BR seccomp (2)
+user-space notification feature can lead to restarting of system calls
+that would otherwise never be restarted by
+.BR SA_RESTART ;
+for details, see
+.BR seccomp_unotify (2).
+.\"
+.SS Interruption of system calls and library functions by stop signals
+On Linux, even in the absence of signal handlers,
+certain blocking interfaces can fail with the error
+.B EINTR
+after the process is stopped by one of the stop signals
+and then resumed via
+.BR SIGCONT .
+This behavior is not sanctioned by POSIX.1, and doesn't occur
+on other systems.
+.PP
+The Linux interfaces that display this behavior are:
+.IP \[bu] 3
+"Input" socket interfaces, when a timeout
+.RB ( SO_RCVTIMEO )
+has been set on the socket using
+.BR setsockopt (2):
+.BR accept (2),
+.BR recv (2),
+.BR recvfrom (2),
+.BR recvmmsg (2)
+(also with a non-NULL
+.I timeout
+argument),
+and
+.BR recvmsg (2).
+.IP \[bu]
+"Output" socket interfaces, when a timeout
+.RB ( SO_RCVTIMEO )
+has been set on the socket using
+.BR setsockopt (2):
+.BR connect (2),
+.BR send (2),
+.BR sendto (2),
+and
+.\" FIXME What about sendmmsg()?
+.BR sendmsg (2),
+if a send timeout
+.RB ( SO_SNDTIMEO )
+has been set.
+.IP \[bu]
+.BR epoll_wait (2),
+.BR epoll_pwait (2).
+.IP \[bu]
+.BR semop (2),
+.BR semtimedop (2).
+.IP \[bu]
+.BR sigtimedwait (2),
+.BR sigwaitinfo (2).
+.IP \[bu]
+Linux 3.7 and earlier:
+.BR read (2)
+from an
+.BR inotify (7)
+file descriptor
+.\" commit 1ca39ab9d21ac93f94b9e3eb364ea9a5cf2aba06
+.IP \[bu]
+Linux 2.6.21 and earlier:
+.BR futex (2)
+.BR FUTEX_WAIT ,
+.BR sem_timedwait (3),
+.BR sem_wait (3).
+.IP \[bu]
+Linux 2.6.8 and earlier:
+.BR msgrcv (2),
+.BR msgsnd (2).
+.IP \[bu]
+Linux 2.4 and earlier:
+.BR nanosleep (2).
+.SH STANDARDS
+POSIX.1, except as noted.
+.SH NOTES
+For a discussion of async-signal-safe functions, see
+.BR signal\-safety (7).
+.PP
+The
+.IR /proc/ pid /task/ tid /status
+file contains various fields that show the signals
+that a thread is blocking
+.RI ( SigBlk ),
+catching
+.RI ( SigCgt ),
+or ignoring
+.RI ( SigIgn ).
+(The set of signals that are caught or ignored will be the same
+across all threads in a process.)
+Other fields show the set of pending signals that are directed to the thread
+.RI ( SigPnd )
+as well as the set of pending signals that are directed
+to the process as a whole
+.RI ( ShdPnd ).
+The corresponding fields in
+.IR /proc/ pid /status
+show the information for the main thread.
+See
+.BR proc (5)
+for further details.
+.SH BUGS
+There are six signals that can be delivered
+as a consequence of a hardware exception:
+.BR SIGBUS ,
+.BR SIGEMT ,
+.BR SIGFPE ,
+.BR SIGILL ,
+.BR SIGSEGV ,
+and
+.BR SIGTRAP .
+Which of these signals is delivered,
+for any given hardware exception,
+is not documented and does not always make sense.
+.PP
+For example, an invalid memory access that causes delivery of
+.B SIGSEGV
+on one CPU architecture may cause delivery of
+.B SIGBUS
+on another architecture, or vice versa.
+.PP
+For another example, using the x86
+.I int
+instruction with a forbidden argument
+(any number other than 3 or 128)
+causes delivery of
+.BR SIGSEGV ,
+even though
+.B SIGILL
+would make more sense,
+because of how the CPU reports the forbidden operation to the kernel.
+.SH SEE ALSO
+.BR kill (1),
+.BR clone (2),
+.BR getrlimit (2),
+.BR kill (2),
+.BR pidfd_send_signal (2),
+.BR restart_syscall (2),
+.BR rt_sigqueueinfo (2),
+.BR setitimer (2),
+.BR setrlimit (2),
+.BR sgetmask (2),
+.BR sigaction (2),
+.BR sigaltstack (2),
+.BR signal (2),
+.BR signalfd (2),
+.BR sigpending (2),
+.BR sigprocmask (2),
+.BR sigreturn (2),
+.BR sigsuspend (2),
+.BR sigwaitinfo (2),
+.BR abort (3),
+.BR bsd_signal (3),
+.BR killpg (3),
+.BR longjmp (3),
+.BR pthread_sigqueue (3),
+.BR raise (3),
+.BR sigqueue (3),
+.BR sigset (3),
+.BR sigsetops (3),
+.BR sigvec (3),
+.BR sigwait (3),
+.BR strsignal (3),
+.BR swapcontext (3),
+.BR sysv_signal (3),
+.BR core (5),
+.BR proc (5),
+.BR nptl (7),
+.BR pthreads (7),
+.BR sigevent (7)
diff --git a/man7/sock_diag.7 b/man7/sock_diag.7
new file mode 100644
index 0000000..adf47b7
--- /dev/null
+++ b/man7/sock_diag.7
@@ -0,0 +1,825 @@
+.\" Copyright (c) 2016 Pavel Emelyanov <xemul@virtuozzo.com>
+.\" Copyright (c) 2016 Dmitry V. Levin <ldv@altlinux.org>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.TH sock_diag 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+sock_diag \- obtaining information about sockets
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <linux/sock_diag.h>
+.BR "#include <linux/unix_diag.h>" " /* for UNIX domain sockets */"
+.BR "#include <linux/inet_diag.h>" " /* for IPv4 and IPv6 sockets */"
+.PP
+.BI "diag_socket = socket(AF_NETLINK, " socket_type ", NETLINK_SOCK_DIAG);"
+.fi
+.SH DESCRIPTION
+The sock_diag netlink subsystem provides a mechanism for obtaining
+information about sockets of various address families from the kernel.
+This subsystem can be used to obtain information about individual
+sockets or request a list of sockets.
+.PP
+In the request, the caller can specify additional information it would
+like to obtain about the socket, for example, memory information or
+information specific to the address family.
+.PP
+When requesting a list of sockets, the caller can specify filters that
+would be applied by the kernel to select a subset of sockets to report.
+For now, there is only the ability to filter sockets by state (connected,
+listening, and so on.)
+.PP
+Note that sock_diag reports only those sockets that have a name;
+that is, either sockets bound explicitly with
+.BR bind (2)
+or sockets that were automatically bound to an address (e.g., by
+.BR connect (2)).
+This is the same set of sockets that is available via
+.IR /proc/net/unix ,
+.IR /proc/net/tcp ,
+.IR /proc/net/udp ,
+and so on.
+.\"
+.SS Request
+The request starts with a
+.I "struct nlmsghdr"
+header described in
+.BR netlink (7)
+with
+.I nlmsg_type
+field set to
+.BR SOCK_DIAG_BY_FAMILY .
+It is followed by a header specific to the address family that starts with
+a common part shared by all address families:
+.PP
+.in +4n
+.EX
+struct sock_diag_req {
+ __u8 sdiag_family;
+ __u8 sdiag_protocol;
+};
+.EE
+.in
+.PP
+The fields of this structure are as follows:
+.TP
+.I sdiag_family
+An address family.
+It should be set to the appropriate
+.B AF_*
+constant.
+.TP
+.I sdiag_protocol
+Depends on
+.IR sdiag_family .
+It should be set to the appropriate
+.B IPPROTO_*
+constant for
+.B AF_INET
+and
+.BR AF_INET6 ,
+and to 0 otherwise.
+.PP
+If the
+.I nlmsg_flags
+field of the
+.I "struct nlmsghdr"
+header has the
+.B NLM_F_DUMP
+flag set, it means that a list of sockets is being requested;
+otherwise it is a query about an individual socket.
+.\"
+.SS Response
+The response starts with a
+.I "struct nlmsghdr"
+header and is followed by an array of objects specific to the address family.
+The array is to be accessed with the standard
+.B NLMSG_*
+macros from the
+.BR netlink (3)
+API.
+.PP
+Each object is the NLA (netlink attributes) list that is to be accessed
+with the
+.B RTA_*
+macros from
+.BR rtnetlink (3)
+API.
+.\"
+.SS UNIX domain sockets
+For UNIX domain sockets the request is represented in the following structure:
+.PP
+.in +4n
+.EX
+struct unix_diag_req {
+ __u8 sdiag_family;
+ __u8 sdiag_protocol;
+ __u16 pad;
+ __u32 udiag_states;
+ __u32 udiag_ino;
+ __u32 udiag_show;
+ __u32 udiag_cookie[2];
+};
+.EE
+.in
+.PP
+The fields of this structure are as follows:
+.TP
+.I sdiag_family
+The address family; it should be set to
+.BR AF_UNIX .
+.PP
+.I sdiag_protocol
+.PD 0
+.TP
+.PD
+.I pad
+These fields should be set to 0.
+.TP
+.I udiag_states
+This is a bit mask that defines a filter of sockets states.
+Only those sockets whose states are in this mask will be reported.
+Ignored when querying for an individual socket.
+Supported values are:
+.PP
+.RS 12
+1 <<
+.B TCP_ESTABLISHED
+.PP
+1 <<
+.B TCP_LISTEN
+.RE
+.TP
+.I udiag_ino
+This is an inode number when querying for an individual socket.
+Ignored when querying for a list of sockets.
+.TP
+.I udiag_show
+This is a set of flags defining what kind of information to report.
+Each requested kind of information is reported back as a netlink
+attribute as described below:
+.RS
+.TP
+.B UDIAG_SHOW_NAME
+The attribute reported in answer to this request is
+.BR UNIX_DIAG_NAME .
+The payload associated with this attribute is the pathname to which
+the socket was bound (a sequence of bytes up to
+.B UNIX_PATH_MAX
+length).
+.TP
+.B UDIAG_SHOW_VFS
+The attribute reported in answer to this request is
+.BR UNIX_DIAG_VFS .
+The payload associated with this attribute is represented in the following
+structure:
+.IP
+.in +4n
+.EX
+struct unix_diag_vfs {
+ __u32 udiag_vfs_dev;
+ __u32 udiag_vfs_ino;
+};
+.EE
+.in
+.IP
+The fields of this structure are as follows:
+.RS
+.TP
+.I udiag_vfs_dev
+The device number of the corresponding on-disk socket inode.
+.TP
+.I udiag_vfs_ino
+The inode number of the corresponding on-disk socket inode.
+.RE
+.TP
+.B UDIAG_SHOW_PEER
+The attribute reported in answer to this request is
+.BR UNIX_DIAG_PEER .
+The payload associated with this attribute is a __u32 value
+which is the peer's inode number.
+This attribute is reported for connected sockets only.
+.TP
+.B UDIAG_SHOW_ICONS
+The attribute reported in answer to this request is
+.BR UNIX_DIAG_ICONS .
+The payload associated with this attribute is an array of __u32 values
+which are inode numbers of sockets that has passed the
+.BR connect (2)
+call, but hasn't been processed with
+.BR accept (2)
+yet.
+This attribute is reported for listening sockets only.
+.TP
+.B UDIAG_SHOW_RQLEN
+The attribute reported in answer to this request is
+.BR UNIX_DIAG_RQLEN .
+The payload associated with this attribute is represented in the following
+structure:
+.IP
+.in +4n
+.EX
+struct unix_diag_rqlen {
+ __u32 udiag_rqueue;
+ __u32 udiag_wqueue;
+};
+.EE
+.in
+.IP
+The fields of this structure are as follows:
+.RS
+.TP
+.I udiag_rqueue
+For listening sockets:
+the number of pending connections.
+The length of the array associated with the
+.B UNIX_DIAG_ICONS
+response attribute is equal to this value.
+.IP
+For established sockets:
+the amount of data in incoming queue.
+.TP
+.I udiag_wqueue
+For listening sockets:
+the backlog length which equals to the value passed as the second argument to
+.BR listen (2).
+.IP
+For established sockets:
+the amount of memory available for sending.
+.RE
+.TP
+.B UDIAG_SHOW_MEMINFO
+The attribute reported in answer to this request is
+.BR UNIX_DIAG_MEMINFO .
+The payload associated with this attribute is an array of __u32 values
+described below in the subsection "Socket memory information".
+.PP
+The following attributes are reported back without any specific request:
+.TP
+.B UNIX_DIAG_SHUTDOWN
+The payload associated with this attribute is __u8 value which represents
+bits of
+.BR shutdown (2)
+state.
+.RE
+.TP
+.I udiag_cookie
+This is an array of opaque identifiers that could be used along with
+.I udiag_ino
+to specify an individual socket.
+It is ignored when querying for a list
+of sockets, as well as when all its elements are set to \-1.
+.PP
+The response to a query for UNIX domain sockets is represented as an array of
+.PP
+.in +4n
+.EX
+struct unix_diag_msg {
+ __u8 udiag_family;
+ __u8 udiag_type;
+ __u8 udiag_state;
+ __u8 pad;
+ __u32 udiag_ino;
+ __u32 udiag_cookie[2];
+};
+.EE
+.in
+.PP
+followed by netlink attributes.
+.PP
+The fields of this structure are as follows:
+.TP
+.I udiag_family
+This field has the same meaning as in
+.IR "struct unix_diag_req" .
+.TP
+.I udiag_type
+This is set to one of
+.BR SOCK_PACKET ,
+.BR SOCK_STREAM ,
+or
+.BR SOCK_SEQPACKET .
+.TP
+.I udiag_state
+This is set to one of
+.B TCP_LISTEN
+or
+.BR TCP_ESTABLISHED .
+.TP
+.I pad
+This field is set to 0.
+.TP
+.I udiag_ino
+This is the socket inode number.
+.TP
+.I udiag_cookie
+This is an array of opaque identifiers that could be used in subsequent
+queries.
+.\"
+.SS IPv4 and IPv6 sockets
+For IPv4 and IPv6 sockets,
+the request is represented in the following structure:
+.PP
+.in +4n
+.EX
+struct inet_diag_req_v2 {
+ __u8 sdiag_family;
+ __u8 sdiag_protocol;
+ __u8 idiag_ext;
+ __u8 pad;
+ __u32 idiag_states;
+ struct inet_diag_sockid id;
+};
+.EE
+.in
+.PP
+where
+.I "struct inet_diag_sockid"
+is defined as follows:
+.PP
+.in +4n
+.EX
+struct inet_diag_sockid {
+ __be16 idiag_sport;
+ __be16 idiag_dport;
+ __be32 idiag_src[4];
+ __be32 idiag_dst[4];
+ __u32 idiag_if;
+ __u32 idiag_cookie[2];
+};
+.EE
+.in
+.PP
+The fields of
+.I "struct inet_diag_req_v2"
+are as follows:
+.TP
+.I sdiag_family
+This should be set to either
+.B AF_INET
+or
+.B AF_INET6
+for IPv4 or IPv6 sockets respectively.
+.TP
+.I sdiag_protocol
+This should be set to one of
+.BR IPPROTO_TCP ,
+.BR IPPROTO_UDP ,
+or
+.BR IPPROTO_UDPLITE .
+.TP
+.I idiag_ext
+This is a set of flags defining what kind of extended information to report.
+Each requested kind of information is reported back as a netlink attribute
+as described below:
+.RS
+.TP
+.B INET_DIAG_TOS
+The payload associated with this attribute is a __u8 value
+which is the TOS of the socket.
+.TP
+.B INET_DIAG_TCLASS
+The payload associated with this attribute is a __u8 value
+which is the TClass of the socket.
+IPv6 sockets only.
+For LISTEN and CLOSE sockets, this is followed by
+.B INET_DIAG_SKV6ONLY
+attribute with associated __u8 payload value meaning whether the socket
+is IPv6-only or not.
+.TP
+.B INET_DIAG_MEMINFO
+The payload associated with this attribute is represented in the following
+structure:
+.IP
+.in +4n
+.EX
+struct inet_diag_meminfo {
+ __u32 idiag_rmem;
+ __u32 idiag_wmem;
+ __u32 idiag_fmem;
+ __u32 idiag_tmem;
+};
+.EE
+.in
+.IP
+The fields of this structure are as follows:
+.RS
+.TP 12
+.I idiag_rmem
+The amount of data in the receive queue.
+.TP
+.I idiag_wmem
+The amount of data that is queued by TCP but not yet sent.
+.TP
+.I idiag_fmem
+The amount of memory scheduled for future use (TCP only).
+.TP
+.I idiag_tmem
+The amount of data in send queue.
+.RE
+.TP
+.B INET_DIAG_SKMEMINFO
+The payload associated with this attribute is an array of __u32 values
+described below in the subsection "Socket memory information".
+.TP
+.B INET_DIAG_INFO
+The payload associated with this attribute is specific to the address family.
+For TCP sockets, it is an object of type
+.IR "struct tcp_info" .
+.TP
+.B INET_DIAG_CONG
+The payload associated with this attribute is a string that describes the
+congestion control algorithm used.
+For TCP sockets only.
+.RE
+.TP
+.I pad
+This should be set to 0.
+.TP
+.I idiag_states
+This is a bit mask that defines a filter of socket states.
+Only those sockets whose states are in this mask will be reported.
+Ignored when querying for an individual socket.
+.TP
+.I id
+This is a socket ID object that is used in dump requests, in queries
+about individual sockets, and is reported back in each response.
+Unlike UNIX domain sockets, IPv4 and IPv6 sockets are identified
+using addresses and ports.
+All values are in network byte order.
+.PP
+The fields of
+.I "struct inet_diag_sockid"
+are as follows:
+.TP
+.I idiag_sport
+The source port.
+.TP
+.I idiag_dport
+The destination port.
+.TP
+.I idiag_src
+The source address.
+.TP
+.I idiag_dst
+The destination address.
+.TP
+.I idiag_if
+The interface number the socket is bound to.
+.TP
+.I idiag_cookie
+This is an array of opaque identifiers that could be used along with
+other fields of this structure to specify an individual socket.
+It is ignored when querying for a list of sockets, as well as
+when all its elements are set to \-1.
+.PP
+The response to a query for IPv4 or IPv6 sockets is represented as an array of
+.PP
+.in +4n
+.EX
+struct inet_diag_msg {
+ __u8 idiag_family;
+ __u8 idiag_state;
+ __u8 idiag_timer;
+ __u8 idiag_retrans;
+\&
+ struct inet_diag_sockid id;
+\&
+ __u32 idiag_expires;
+ __u32 idiag_rqueue;
+ __u32 idiag_wqueue;
+ __u32 idiag_uid;
+ __u32 idiag_inode;
+};
+.EE
+.in
+.PP
+followed by netlink attributes.
+.PP
+The fields of this structure are as follows:
+.TP
+.I idiag_family
+This is the same field as in
+.IR "struct inet_diag_req_v2" .
+.TP
+.I idiag_state
+This denotes socket state as in
+.IR "struct inet_diag_req_v2" .
+.TP
+.I idiag_timer
+For TCP sockets, this field describes the type of timer that is currently
+active for the socket.
+It is set to one of the following constants:
+.IP
+.PD 0
+.RS 12
+.TP
+.B 0
+no timer is active
+.TP
+.B 1
+a retransmit timer
+.TP
+.B 2
+a keep-alive timer
+.TP
+.B 3
+a TIME_WAIT timer
+.TP
+.B 4
+a zero window probe timer
+.RE
+.PD
+.IP
+For non-TCP sockets, this field is set to 0.
+.TP
+.I idiag_retrans
+For
+.I idiag_timer
+values 1, 2, and 4, this field contains the number of retransmits.
+For other
+.I idiag_timer
+values, this field is set to 0.
+.TP
+.I idiag_expires
+For TCP sockets that have an active timer, this field describes its expiration
+time in milliseconds.
+For other sockets, this field is set to 0.
+.TP
+.I idiag_rqueue
+For listening sockets:
+the number of pending connections.
+.IP
+For other sockets:
+the amount of data in the incoming queue.
+.TP
+.I idiag_wqueue
+For listening sockets:
+the backlog length.
+.IP
+For other sockets:
+the amount of memory available for sending.
+.TP
+.I idiag_uid
+This is the socket owner UID.
+.TP
+.I idiag_inode
+This is the socket inode number.
+.\"
+.SS Socket memory information
+The payload associated with
+.B UNIX_DIAG_MEMINFO
+and
+.B INET_DIAG_SKMEMINFO
+netlink attributes is an array of the following __u32 values:
+.TP
+.B SK_MEMINFO_RMEM_ALLOC
+The amount of data in receive queue.
+.TP
+.B SK_MEMINFO_RCVBUF
+The receive socket buffer as set by
+.BR SO_RCVBUF .
+.TP
+.B SK_MEMINFO_WMEM_ALLOC
+The amount of data in send queue.
+.TP
+.B SK_MEMINFO_SNDBUF
+The send socket buffer as set by
+.BR SO_SNDBUF .
+.TP
+.B SK_MEMINFO_FWD_ALLOC
+The amount of memory scheduled for future use (TCP only).
+.TP
+.B SK_MEMINFO_WMEM_QUEUED
+The amount of data queued by TCP, but not yet sent.
+.TP
+.B SK_MEMINFO_OPTMEM
+The amount of memory allocated for the socket's service needs (e.g., socket
+filter).
+.TP
+.B SK_MEMINFO_BACKLOG
+The amount of packets in the backlog (not yet processed).
+.SH VERSIONS
+.B NETLINK_INET_DIAG
+was introduced in Linux 2.6.14 and supported
+.B AF_INET
+and
+.B AF_INET6
+sockets only.
+In Linux 3.3, it was renamed to
+.B NETLINK_SOCK_DIAG
+and extended to support
+.B AF_UNIX
+sockets.
+.PP
+.B UNIX_DIAG_MEMINFO
+and
+.B INET_DIAG_SKMEMINFO
+were introduced in Linux 3.6.
+.SH STANDARDS
+Linux.
+.SH EXAMPLES
+The following example program prints inode number, peer's inode number,
+and name of all UNIX domain sockets in the current namespace.
+.PP
+.EX
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/unix_diag.h>
+\&
+static int
+send_query(int fd)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct
+ {
+ struct nlmsghdr nlh;
+ struct unix_diag_req udr;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP
+ },
+ .udr = {
+ .sdiag_family = AF_UNIX,
+ .udiag_states = \-1,
+ .udiag_show = UDIAG_SHOW_NAME | UDIAG_SHOW_PEER
+ }
+ };
+ struct iovec iov = {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+\&
+ for (;;) {
+ if (sendmsg(fd, &msg, 0) < 0) {
+ if (errno == EINTR)
+ continue;
+\&
+ perror("sendmsg");
+ return \-1;
+ }
+\&
+ return 0;
+ }
+}
+\&
+static int
+print_diag(const struct unix_diag_msg *diag, unsigned int len)
+{
+ if (len < NLMSG_LENGTH(sizeof(*diag))) {
+ fputs("short response\en", stderr);
+ return \-1;
+ }
+ if (diag\->udiag_family != AF_UNIX) {
+ fprintf(stderr, "unexpected family %u\en", diag\->udiag_family);
+ return \-1;
+ }
+\&
+ unsigned int rta_len = len \- NLMSG_LENGTH(sizeof(*diag));
+ unsigned int peer = 0;
+ size_t path_len = 0;
+ char path[sizeof(((struct sockaddr_un *) 0)\->sun_path) + 1];
+\&
+ for (struct rtattr *attr = (struct rtattr *) (diag + 1);
+ RTA_OK(attr, rta_len); attr = RTA_NEXT(attr, rta_len)) {
+ switch (attr\->rta_type) {
+ case UNIX_DIAG_NAME:
+ if (!path_len) {
+ path_len = RTA_PAYLOAD(attr);
+ if (path_len > sizeof(path) \- 1)
+ path_len = sizeof(path) \- 1;
+ memcpy(path, RTA_DATA(attr), path_len);
+ path[path_len] = \[aq]\e0\[aq];
+ }
+ break;
+\&
+ case UNIX_DIAG_PEER:
+ if (RTA_PAYLOAD(attr) >= sizeof(peer))
+ peer = *(unsigned int *) RTA_DATA(attr);
+ break;
+ }
+ }
+\&
+ printf("inode=%u", diag\->udiag_ino);
+\&
+ if (peer)
+ printf(", peer=%u", peer);
+\&
+ if (path_len)
+ printf(", name=%s%s", *path ? "" : "@",
+ *path ? path : path + 1);
+\&
+ putchar(\[aq]\en\[aq]);
+ return 0;
+}
+\&
+static int
+receive_responses(int fd)
+{
+ long buf[8192 / sizeof(long)];
+ struct sockaddr_nl nladdr;
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)
+ };
+ int flags = 0;
+\&
+ for (;;) {
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+\&
+ ssize_t ret = recvmsg(fd, &msg, flags);
+\&
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+\&
+ perror("recvmsg");
+ return \-1;
+ }
+ if (ret == 0)
+ return 0;
+\&
+ if (nladdr.nl_family != AF_NETLINK) {
+ fputs("!AF_NETLINK\en", stderr);
+ return \-1;
+ }
+\&
+ const struct nlmsghdr *h = (struct nlmsghdr *) buf;
+\&
+ if (!NLMSG_OK(h, ret)) {
+ fputs("!NLMSG_OK\en", stderr);
+ return \-1;
+ }
+\&
+ for (; NLMSG_OK(h, ret); h = NLMSG_NEXT(h, ret)) {
+ if (h\->nlmsg_type == NLMSG_DONE)
+ return 0;
+\&
+ if (h\->nlmsg_type == NLMSG_ERROR) {
+ const struct nlmsgerr *err = NLMSG_DATA(h);
+\&
+ if (h\->nlmsg_len < NLMSG_LENGTH(sizeof(*err))) {
+ fputs("NLMSG_ERROR\en", stderr);
+ } else {
+ errno = \-err\->error;
+ perror("NLMSG_ERROR");
+ }
+\&
+ return \-1;
+ }
+\&
+ if (h\->nlmsg_type != SOCK_DIAG_BY_FAMILY) {
+ fprintf(stderr, "unexpected nlmsg_type %u\en",
+ (unsigned) h\->nlmsg_type);
+ return \-1;
+ }
+\&
+ if (print_diag(NLMSG_DATA(h), h\->nlmsg_len))
+ return \-1;
+ }
+ }
+}
+\&
+int
+main(void)
+{
+ int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+\&
+ if (fd < 0) {
+ perror("socket");
+ return 1;
+ }
+\&
+ int ret = send_query(fd) || receive_responses(fd);
+\&
+ close(fd);
+ return ret;
+}
+.EE
+.SH SEE ALSO
+.BR netlink (3),
+.BR rtnetlink (3),
+.BR netlink (7),
+.BR tcp (7)
diff --git a/man7/socket.7 b/man7/socket.7
new file mode 100644
index 0000000..2cc24d9
--- /dev/null
+++ b/man7/socket.7
@@ -0,0 +1,1266 @@
+'\" t
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\" and copyright (c) 1999 Matthew Wilcox.
+.\"
+.\" 2002-10-30, Michael Kerrisk, <mtk.manpages@gmail.com>
+.\" Added description of SO_ACCEPTCONN
+.\" 2004-05-20, aeb, added SO_RCVTIMEO/SO_SNDTIMEO text.
+.\" Modified, 27 May 2004, Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Added notes on capability requirements
+.\" A few small grammar fixes
+.\" 2010-06-13 Jan Engelhardt <jengelh@medozas.de>
+.\" Documented SO_DOMAIN and SO_PROTOCOL.
+.\"
+.\" FIXME
+.\" The following are not yet documented:
+.\"
+.\" SO_PEERNAME (2.4?)
+.\" get only
+.\" Seems to do something similar to getpeername(), but then
+.\" why is it necessary / how does it differ?
+.\"
+.\" SO_TIMESTAMPING (2.6.30)
+.\" Documentation/networking/timestamping.txt
+.\" commit cb9eff097831007afb30d64373f29d99825d0068
+.\" Author: Patrick Ohly <patrick.ohly@intel.com>
+.\"
+.\" SO_WIFI_STATUS (3.3)
+.\" commit 6e3e939f3b1bf8534b32ad09ff199d88800835a0
+.\" Author: Johannes Berg <johannes.berg@intel.com>
+.\" Also: SCM_WIFI_STATUS
+.\"
+.\" SO_NOFCS (3.4)
+.\" commit 3bdc0eba0b8b47797f4a76e377dd8360f317450f
+.\" Author: Ben Greear <greearb@candelatech.com>
+.\"
+.\" SO_GET_FILTER (3.8)
+.\" commit a8fc92778080c845eaadc369a0ecf5699a03bef0
+.\" Author: Pavel Emelyanov <xemul@parallels.com>
+.\"
+.\" SO_MAX_PACING_RATE (3.13)
+.\" commit 62748f32d501f5d3712a7c372bbb92abc7c62bc7
+.\" Author: Eric Dumazet <edumazet@google.com>
+.\"
+.\" SO_BPF_EXTENSIONS (3.14)
+.\" commit ea02f9411d9faa3553ed09ce0ec9f00ceae9885e
+.\" Author: Michal Sekletar <msekleta@redhat.com>
+.\"
+.TH socket 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+socket \- Linux socket interface
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.PP
+.IB sockfd " = socket(int " socket_family ", int " socket_type ", int " protocol );
+.fi
+.SH DESCRIPTION
+This manual page describes the Linux networking socket layer user
+interface.
+The BSD compatible sockets
+are the uniform interface
+between the user process and the network protocol stacks in the kernel.
+The protocol modules are grouped into
+.I protocol families
+such as
+.BR AF_INET ", " AF_IPX ", and " AF_PACKET ,
+and
+.I socket types
+such as
+.B SOCK_STREAM
+or
+.BR SOCK_DGRAM .
+See
+.BR socket (2)
+for more information on families and types.
+.SS Socket-layer functions
+These functions are used by the user process to send or receive packets
+and to do other socket operations.
+For more information, see their respective manual pages.
+.PP
+.BR socket (2)
+creates a socket,
+.BR connect (2)
+connects a socket to a remote socket address,
+the
+.BR bind (2)
+function binds a socket to a local socket address,
+.BR listen (2)
+tells the socket that new connections shall be accepted, and
+.BR accept (2)
+is used to get a new socket with a new incoming connection.
+.BR socketpair (2)
+returns two connected anonymous sockets (implemented only for a few
+local families like
+.BR AF_UNIX )
+.PP
+.BR send (2),
+.BR sendto (2),
+and
+.BR sendmsg (2)
+send data over a socket, and
+.BR recv (2),
+.BR recvfrom (2),
+.BR recvmsg (2)
+receive data from a socket.
+.BR poll (2)
+and
+.BR select (2)
+wait for arriving data or a readiness to send data.
+In addition, the standard I/O operations like
+.BR write (2),
+.BR writev (2),
+.BR sendfile (2),
+.BR read (2),
+and
+.BR readv (2)
+can be used to read and write data.
+.PP
+.BR getsockname (2)
+returns the local socket address and
+.BR getpeername (2)
+returns the remote socket address.
+.BR getsockopt (2)
+and
+.BR setsockopt (2)
+are used to set or get socket layer or protocol options.
+.BR ioctl (2)
+can be used to set or read some other options.
+.PP
+.BR close (2)
+is used to close a socket.
+.BR shutdown (2)
+closes parts of a full-duplex socket connection.
+.PP
+Seeking, or calling
+.BR pread (2)
+or
+.BR pwrite (2)
+with a nonzero position is not supported on sockets.
+.PP
+It is possible to do nonblocking I/O on sockets by setting the
+.B O_NONBLOCK
+flag on a socket file descriptor using
+.BR fcntl (2).
+Then all operations that would block will (usually)
+return with
+.B EAGAIN
+(operation should be retried later);
+.BR connect (2)
+will return
+.B EINPROGRESS
+error.
+The user can then wait for various events via
+.BR poll (2)
+or
+.BR select (2).
+.TS
+tab(:) allbox;
+c s s
+l l lx.
+I/O events
+Event:Poll flag:Occurrence
+Read:POLLIN:T{
+New data arrived.
+T}
+Read:POLLIN:T{
+A connection setup has been completed
+(for connection-oriented sockets)
+T}
+Read:POLLHUP:T{
+A disconnection request has been initiated by the other end.
+T}
+Read:POLLHUP:T{
+A connection is broken (only for connection-oriented protocols).
+When the socket is written
+.B SIGPIPE
+is also sent.
+T}
+Write:POLLOUT:T{
+Socket has enough send buffer space for writing new data.
+T}
+Read/Write:T{
+POLLIN |
+.br
+POLLOUT
+T}:T{
+An outgoing
+.BR connect (2)
+finished.
+T}
+Read/Write:POLLERR:T{
+An asynchronous error occurred.
+T}
+Read/Write:POLLHUP:T{
+The other end has shut down one direction.
+T}
+Exception:POLLPRI:T{
+Urgent data arrived.
+.B SIGURG
+is sent then.
+T}
+.\" FIXME . The following is not true currently:
+.\" It is no I/O event when the connection
+.\" is broken from the local end using
+.\" .BR shutdown (2)
+.\" or
+.\" .BR close (2).
+.TE
+.PP
+An alternative to
+.BR poll (2)
+and
+.BR select (2)
+is to let the kernel inform the application about events
+via a
+.B SIGIO
+signal.
+For that the
+.B O_ASYNC
+flag must be set on a socket file descriptor via
+.BR fcntl (2)
+and a valid signal handler for
+.B SIGIO
+must be installed via
+.BR sigaction (2).
+See the
+.I Signals
+discussion below.
+.SS Socket address structures
+Each socket domain has its own format for socket addresses,
+with a domain-specific address structure.
+Each of these structures begins with an
+integer "family" field (typed as
+.IR sa_family_t )
+that indicates the type of the address structure.
+This allows
+the various system calls (e.g.,
+.BR connect (2),
+.BR bind (2),
+.BR accept (2),
+.BR getsockname (2),
+.BR getpeername (2)),
+which are generic to all socket domains,
+to determine the domain of a particular socket address.
+.PP
+To allow any type of socket address to be passed to
+interfaces in the sockets API,
+the type
+.I struct sockaddr
+is defined.
+The purpose of this type is purely to allow casting of
+domain-specific socket address types to a "generic" type,
+so as to avoid compiler warnings about type mismatches in
+calls to the sockets API.
+.PP
+In addition, the sockets API provides the data type
+.IR "struct sockaddr_storage".
+This type
+is suitable to accommodate all supported domain-specific socket
+address structures; it is large enough and is aligned properly.
+(In particular, it is large enough to hold
+IPv6 socket addresses.)
+The structure includes the following field, which can be used to identify
+the type of socket address actually stored in the structure:
+.PP
+.in +4n
+.EX
+ sa_family_t ss_family;
+.EE
+.in
+.PP
+The
+.I sockaddr_storage
+structure is useful in programs that must handle socket addresses
+in a generic way
+(e.g., programs that must deal with both IPv4 and IPv6 socket addresses).
+.SS Socket options
+The socket options listed below can be set by using
+.BR setsockopt (2)
+and read with
+.BR getsockopt (2)
+with the socket level set to
+.B SOL_SOCKET
+for all sockets.
+Unless otherwise noted,
+.I optval
+is a pointer to an
+.IR int .
+.\" FIXME .
+.\" In the list below, the text used to describe argument types
+.\" for each socket option should be more consistent
+.\"
+.\" SO_ACCEPTCONN is in POSIX.1-2001, and its origin is explained in
+.\" W R Stevens, UNPv1
+.TP
+.B SO_ACCEPTCONN
+Returns a value indicating whether or not this socket has been marked
+to accept connections with
+.BR listen (2).
+The value 0 indicates that this is not a listening socket,
+the value 1 indicates that this is a listening socket.
+This socket option is read-only.
+.TP
+.BR SO_ATTACH_FILTER " (since Linux 2.2), " SO_ATTACH_BPF " (since Linux 3.19)"
+Attach a classic BPF
+.RB ( SO_ATTACH_FILTER )
+or an extended BPF
+.RB ( SO_ATTACH_BPF )
+program to the socket for use as a filter of incoming packets.
+A packet will be dropped if the filter program returns zero.
+If the filter program returns a
+nonzero value which is less than the packet's data length,
+the packet will be truncated to the length returned.
+If the value returned by the filter is greater than or equal to the
+packet's data length, the packet is allowed to proceed unmodified.
+.IP
+The argument for
+.B SO_ATTACH_FILTER
+is a
+.I sock_fprog
+structure, defined in
+.IR <linux/filter.h> :
+.IP
+.in +4n
+.EX
+struct sock_fprog {
+ unsigned short len;
+ struct sock_filter *filter;
+};
+.EE
+.in
+.IP
+The argument for
+.B SO_ATTACH_BPF
+is a file descriptor returned by the
+.BR bpf (2)
+system call and must refer to a program of type
+.BR BPF_PROG_TYPE_SOCKET_FILTER .
+.IP
+These options may be set multiple times for a given socket,
+each time replacing the previous filter program.
+The classic and extended versions may be called on the same socket,
+but the previous filter will always be replaced such that a socket
+never has more than one filter defined.
+.IP
+Both classic and extended BPF are explained in the kernel source file
+.I Documentation/networking/filter.txt
+.TP
+.BR SO_ATTACH_REUSEPORT_CBPF ", " SO_ATTACH_REUSEPORT_EBPF
+For use with the
+.B SO_REUSEPORT
+option, these options allow the user to set a classic BPF
+.RB ( SO_ATTACH_REUSEPORT_CBPF )
+or an extended BPF
+.RB ( SO_ATTACH_REUSEPORT_EBPF )
+program which defines how packets are assigned to
+the sockets in the reuseport group (that is, all sockets which have
+.B SO_REUSEPORT
+set and are using the same local address to receive packets).
+.IP
+The BPF program must return an index between 0 and N\-1 representing
+the socket which should receive the packet
+(where N is the number of sockets in the group).
+If the BPF program returns an invalid index,
+socket selection will fall back to the plain
+.B SO_REUSEPORT
+mechanism.
+.IP
+Sockets are numbered in the order in which they are added to the group
+(that is, the order of
+.BR bind (2)
+calls for UDP sockets or the order of
+.BR listen (2)
+calls for TCP sockets).
+New sockets added to a reuseport group will inherit the BPF program.
+When a socket is removed from a reuseport group (via
+.BR close (2)),
+the last socket in the group will be moved into the closed socket's
+position.
+.IP
+These options may be set repeatedly at any time on any socket in the group
+to replace the current BPF program used by all sockets in the group.
+.IP
+.B SO_ATTACH_REUSEPORT_CBPF
+takes the same argument type as
+.B SO_ATTACH_FILTER
+and
+.B SO_ATTACH_REUSEPORT_EBPF
+takes the same argument type as
+.BR SO_ATTACH_BPF .
+.IP
+UDP support for this feature is available since Linux 4.5;
+TCP support is available since Linux 4.6.
+.TP
+.B SO_BINDTODEVICE
+Bind this socket to a particular device like \[lq]eth0\[rq],
+as specified in the passed interface name.
+If the
+name is an empty string or the option length is zero, the socket device
+binding is removed.
+The passed option is a variable-length null-terminated
+interface name string with the maximum size of
+.BR IFNAMSIZ .
+If a socket is bound to an interface,
+only packets received from that particular interface are processed by the
+socket.
+Note that this works only for some socket types, particularly
+.B AF_INET
+sockets.
+It is not supported for packet sockets (use normal
+.BR bind (2)
+there).
+.IP
+Before Linux 3.8,
+this socket option could be set, but could not retrieved with
+.BR getsockopt (2).
+Since Linux 3.8, it is readable.
+The
+.I optlen
+argument should contain the buffer size available
+to receive the device name and is recommended to be
+.B IFNAMSIZ
+bytes.
+The real device name length is reported back in the
+.I optlen
+argument.
+.TP
+.B SO_BROADCAST
+Set or get the broadcast flag.
+When enabled, datagram sockets are allowed to send
+packets to a broadcast address.
+This option has no effect on stream-oriented sockets.
+.TP
+.B SO_BSDCOMPAT
+Enable BSD bug-to-bug compatibility.
+This is used by the UDP protocol module in Linux 2.0 and 2.2.
+If enabled, ICMP errors received for a UDP socket will not be passed
+to the user program.
+In later kernel versions, support for this option has been phased out:
+Linux 2.4 silently ignores it, and Linux 2.6 generates a kernel warning
+(printk()) if a program uses this option.
+Linux 2.0 also enabled BSD bug-to-bug compatibility
+options (random header changing, skipping of the broadcast flag) for raw
+sockets with this option, but that was removed in Linux 2.2.
+.TP
+.B SO_DEBUG
+Enable socket debugging.
+Allowed only for processes with the
+.B CAP_NET_ADMIN
+capability or an effective user ID of 0.
+.TP
+.BR SO_DETACH_FILTER " (since Linux 2.2), " SO_DETACH_BPF " (since Linux 3.19)"
+These two options, which are synonyms,
+may be used to remove the classic or extended BPF
+program attached to a socket with either
+.B SO_ATTACH_FILTER
+or
+.BR SO_ATTACH_BPF .
+The option value is ignored.
+.TP
+.BR SO_DOMAIN " (since Linux 2.6.32)"
+Retrieves the socket domain as an integer, returning a value such as
+.BR AF_INET6 .
+See
+.BR socket (2)
+for details.
+This socket option is read-only.
+.TP
+.B SO_ERROR
+Get and clear the pending socket error.
+This socket option is read-only.
+Expects an integer.
+.TP
+.B SO_DONTROUTE
+Don't send via a gateway, send only to directly connected hosts.
+The same effect can be achieved by setting the
+.B MSG_DONTROUTE
+flag on a socket
+.BR send (2)
+operation.
+Expects an integer boolean flag.
+.TP
+.BR SO_INCOMING_CPU " (gettable since Linux 3.19, settable since Linux 4.4)"
+.\" getsockopt 2c8c56e15df3d4c2af3d656e44feb18789f75837
+.\" setsockopt 70da268b569d32a9fddeea85dc18043de9d89f89
+Sets or gets the CPU affinity of a socket.
+Expects an integer flag.
+.IP
+.in +4n
+.EX
+int cpu = 1;
+setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu,
+ sizeof(cpu));
+.EE
+.in
+.IP
+Because all of the packets for a single stream
+(i.e., all packets for the same 4-tuple)
+arrive on the single RX queue that is associated with a particular CPU,
+the typical use case is to employ one listening process per RX queue,
+with the incoming flow being handled by a listener
+on the same CPU that is handling the RX queue.
+This provides optimal NUMA behavior and keeps CPU caches hot.
+.\"
+.\" From an email conversation with Eric Dumazet:
+.\" >> Note that setting the option is not supported if SO_REUSEPORT is used.
+.\" >
+.\" > Please define "not supported". Does this yield an API diagnostic?
+.\" > If so, what is it?
+.\" >
+.\" >> Socket will be selected from an array, either by a hash or BPF program
+.\" >> that has no access to this information.
+.\" >
+.\" > Sorry -- I'm lost here. How does this comment relate to the proposed
+.\" > man page text above?
+.\"
+.\" Simply that :
+.\"
+.\" If an application uses both SO_INCOMING_CPU and SO_REUSEPORT, then
+.\" SO_REUSEPORT logic, selecting the socket to receive the packet, ignores
+.\" SO_INCOMING_CPU setting.
+.TP
+.BR SO_INCOMING_NAPI_ID " (gettable since Linux 4.12)"
+.\" getsockopt 6d4339028b350efbf87c61e6d9e113e5373545c9
+Returns a system-level unique ID called NAPI ID that is associated
+with a RX queue on which the last packet associated with that
+socket is received.
+.IP
+This can be used by an application to split the incoming flows among worker
+threads based on the RX queue on which the packets associated with the
+flows are received.
+It allows each worker thread to be associated with
+a NIC HW receive queue and service all the connection
+requests received on that RX queue.
+This mapping between an app thread and
+a HW NIC queue streamlines the
+flow of data from the NIC to the application.
+.TP
+.B SO_KEEPALIVE
+Enable sending of keep-alive messages on connection-oriented sockets.
+Expects an integer boolean flag.
+.TP
+.B SO_LINGER
+Sets or gets the
+.B SO_LINGER
+option.
+The argument is a
+.I linger
+structure.
+.IP
+.in +4n
+.EX
+struct linger {
+ int l_onoff; /* linger active */
+ int l_linger; /* how many seconds to linger for */
+};
+.EE
+.in
+.IP
+When enabled, a
+.BR close (2)
+or
+.BR shutdown (2)
+will not return until all queued messages for the socket have been
+successfully sent or the linger timeout has been reached.
+Otherwise,
+the call returns immediately and the closing is done in the background.
+When the socket is closed as part of
+.BR exit (2),
+it always lingers in the background.
+.TP
+.B SO_LOCK_FILTER
+.\" commit d59577b6ffd313d0ab3be39cb1ab47e29bdc9182
+When set, this option will prevent
+changing the filters associated with the socket.
+These filters include any set using the socket options
+.BR SO_ATTACH_FILTER ,
+.BR SO_ATTACH_BPF ,
+.BR SO_ATTACH_REUSEPORT_CBPF ,
+and
+.BR SO_ATTACH_REUSEPORT_EBPF .
+.IP
+The typical use case is for a privileged process to set up a raw socket
+(an operation that requires the
+.B CAP_NET_RAW
+capability), apply a restrictive filter, set the
+.B SO_LOCK_FILTER
+option,
+and then either drop its privileges or pass the socket file descriptor
+to an unprivileged process via a UNIX domain socket.
+.IP
+Once the
+.B SO_LOCK_FILTER
+option has been enabled, attempts to change or remove the filter
+attached to a socket, or to disable the
+.B SO_LOCK_FILTER
+option will fail with the error
+.BR EPERM .
+.TP
+.BR SO_MARK " (since Linux 2.6.25)"
+.\" commit 4a19ec5800fc3bb64e2d87c4d9fdd9e636086fe0
+.\" and 914a9ab386a288d0f22252fc268ecbc048cdcbd5
+Set the mark for each packet sent through this socket
+(similar to the netfilter MARK target but socket-based).
+Changing the mark can be used for mark-based
+routing without netfilter or for packet filtering.
+Setting this option requires the
+.B CAP_NET_ADMIN
+capability.
+.TP
+.B SO_OOBINLINE
+If this option is enabled,
+out-of-band data is directly placed into the receive data stream.
+Otherwise, out-of-band data is passed only when the
+.B MSG_OOB
+flag is set during receiving.
+.\" don't document it because it can do too much harm.
+.\".B SO_NO_CHECK
+.\" The kernel has support for the SO_NO_CHECK socket
+.\" option (boolean: 0 == default, calculate checksum on xmit,
+.\" 1 == do not calculate checksum on xmit).
+.\" Additional note from Andi Kleen on SO_NO_CHECK (2010-08-30)
+.\" On Linux UDP checksums are essentially free and there's no reason
+.\" to turn them off and it would disable another safety line.
+.\" That is why I didn't document the option.
+.TP
+.B SO_PASSCRED
+Enable or disable the receiving of the
+.B SCM_CREDENTIALS
+control message.
+For more information, see
+.BR unix (7).
+.TP
+.B SO_PASSSEC
+Enable or disable the receiving of the
+.B SCM_SECURITY
+control message.
+For more information, see
+.BR unix (7).
+.TP
+.BR SO_PEEK_OFF " (since Linux 3.4)"
+.\" commit ef64a54f6e558155b4f149bb10666b9e914b6c54
+This option, which is currently supported only for
+.BR unix (7)
+sockets, sets the value of the "peek offset" for the
+.BR recv (2)
+system call when used with
+.B MSG_PEEK
+flag.
+.IP
+When this option is set to a negative value
+(it is set to \-1 for all new sockets),
+traditional behavior is provided:
+.BR recv (2)
+with the
+.B MSG_PEEK
+flag will peek data from the front of the queue.
+.IP
+When the option is set to a value greater than or equal to zero,
+then the next peek at data queued in the socket will occur at
+the byte offset specified by the option value.
+At the same time, the "peek offset" will be
+incremented by the number of bytes that were peeked from the queue,
+so that a subsequent peek will return the next data in the queue.
+.IP
+If data is removed from the front of the queue via a call to
+.BR recv (2)
+(or similar) without the
+.B MSG_PEEK
+flag, the "peek offset" will be decreased by the number of bytes removed.
+In other words, receiving data without the
+.B MSG_PEEK
+flag will cause the "peek offset" to be adjusted to maintain
+the correct relative position in the queued data,
+so that a subsequent peek will retrieve the data that would have been
+retrieved had the data not been removed.
+.IP
+For datagram sockets, if the "peek offset" points to the middle of a packet,
+the data returned will be marked with the
+.B MSG_TRUNC
+flag.
+.IP
+The following example serves to illustrate the use of
+.BR SO_PEEK_OFF .
+Suppose a stream socket has the following queued input data:
+.IP
+.in +4n
+.EX
+aabbccddeeff
+.EE
+.in
+.IP
+The following sequence of
+.BR recv (2)
+calls would have the effect noted in the comments:
+.IP
+.in +4n
+.EX
+int ov = 4; // Set peek offset to 4
+setsockopt(fd, SOL_SOCKET, SO_PEEK_OFF, &ov, sizeof(ov));
+\&
+recv(fd, buf, 2, MSG_PEEK); // Peeks "cc"; offset set to 6
+recv(fd, buf, 2, MSG_PEEK); // Peeks "dd"; offset set to 8
+recv(fd, buf, 2, 0); // Reads "aa"; offset set to 6
+recv(fd, buf, 2, MSG_PEEK); // Peeks "ee"; offset set to 8
+.EE
+.in
+.TP
+.B SO_PEERCRED
+Return the credentials of the peer process connected to this socket.
+For further details, see
+.BR unix (7).
+.TP
+.BR SO_PEERSEC " (since Linux 2.6.2)"
+Return the security context of the peer socket connected to this socket.
+For further details, see
+.BR unix (7)
+and
+.BR ip (7).
+.TP
+.B SO_PRIORITY
+Set the protocol-defined priority for all packets to be sent on
+this socket.
+Linux uses this value to order the networking queues:
+packets with a higher priority may be processed first depending
+on the selected device queueing discipline.
+.\" For
+.\" .BR ip (7),
+.\" this also sets the IP type-of-service (TOS) field for outgoing packets.
+Setting a priority outside the range 0 to 6 requires the
+.B CAP_NET_ADMIN
+capability.
+.TP
+.BR SO_PROTOCOL " (since Linux 2.6.32)"
+Retrieves the socket protocol as an integer, returning a value such as
+.BR IPPROTO_SCTP .
+See
+.BR socket (2)
+for details.
+This socket option is read-only.
+.TP
+.B SO_RCVBUF
+Sets or gets the maximum socket receive buffer in bytes.
+The kernel doubles this value (to allow space for bookkeeping overhead)
+when it is set using
+.\" Most (all?) other implementations do not do this -- MTK, Dec 05
+.BR setsockopt (2),
+and this doubled value is returned by
+.BR getsockopt (2).
+.\" The following thread on LMKL is quite informative:
+.\" getsockopt/setsockopt with SO_RCVBUF and SO_SNDBUF "non-standard" behavior
+.\" 17 July 2012
+.\" http://thread.gmane.org/gmane.linux.kernel/1328935
+The default value is set by the
+.I /proc/sys/net/core/rmem_default
+file, and the maximum allowed value is set by the
+.I /proc/sys/net/core/rmem_max
+file.
+The minimum (doubled) value for this option is 256.
+.TP
+.BR SO_RCVBUFFORCE " (since Linux 2.6.14)"
+Using this socket option, a privileged
+.RB ( CAP_NET_ADMIN )
+process can perform the same task as
+.BR SO_RCVBUF ,
+but the
+.I rmem_max
+limit can be overridden.
+.TP
+.BR SO_RCVLOWAT " and " SO_SNDLOWAT
+Specify the minimum number of bytes in the buffer until the socket layer
+will pass the data to the protocol
+.RB ( SO_SNDLOWAT )
+or the user on receiving
+.RB ( SO_RCVLOWAT ).
+These two values are initialized to 1.
+.B SO_SNDLOWAT
+is not changeable on Linux
+.RB ( setsockopt (2)
+fails with the error
+.BR ENOPROTOOPT ).
+.B SO_RCVLOWAT
+is changeable
+only since Linux 2.4.
+.IP
+Before Linux 2.6.28
+.\" Tested on kernel 2.6.14 -- mtk, 30 Nov 05
+.BR select (2),
+.BR poll (2),
+and
+.BR epoll (7)
+did not respect the
+.B SO_RCVLOWAT
+setting on Linux,
+and indicated a socket as readable when even a single byte of data
+was available.
+A subsequent read from the socket would then block until
+.B SO_RCVLOWAT
+bytes are available.
+Since Linux 2.6.28,
+.\" commit c7004482e8dcb7c3c72666395cfa98a216a4fb70
+.BR select (2),
+.BR poll (2),
+and
+.BR epoll (7)
+indicate a socket as readable only if at least
+.B SO_RCVLOWAT
+bytes are available.
+.TP
+.BR SO_RCVTIMEO " and " SO_SNDTIMEO
+.\" Not implemented in Linux 2.0.
+.\" Implemented in Linux 2.1.11 for getsockopt: always return a zero struct.
+.\" Implemented in Linux 2.3.41 for setsockopt, and actually used.
+Specify the receiving or sending timeouts until reporting an error.
+The argument is a
+.IR "struct timeval" .
+If an input or output function blocks for this period of time, and
+data has been sent or received, the return value of that function
+will be the amount of data transferred; if no data has been transferred
+and the timeout has been reached, then \-1 is returned with
+.I errno
+set to
+.B EAGAIN
+or
+.BR EWOULDBLOCK ,
+.\" in fact to EAGAIN
+or
+.B EINPROGRESS
+(for
+.BR connect (2))
+just as if the socket was specified to be nonblocking.
+If the timeout is set to zero (the default),
+then the operation will never timeout.
+Timeouts only have effect for system calls that perform socket I/O (e.g.,
+.BR accept (2),
+.BR connect (2),
+.BR read (2),
+.BR recvmsg (2),
+.BR send (2),
+.BR sendmsg (2));
+timeouts have no effect for
+.BR select (2),
+.BR poll (2),
+.BR epoll_wait (2),
+and so on.
+.TP
+.B SO_REUSEADDR
+.\" commit c617f398edd4db2b8567a28e899a88f8f574798d
+.\" https://lwn.net/Articles/542629/
+Indicates that the rules used in validating addresses supplied in a
+.BR bind (2)
+call should allow reuse of local addresses.
+For
+.B AF_INET
+sockets this
+means that a socket may bind, except when there
+is an active listening socket bound to the address.
+When the listening socket is bound to
+.B INADDR_ANY
+with a specific port then it is not possible
+to bind to this port for any local address.
+Argument is an integer boolean flag.
+.TP
+.BR SO_REUSEPORT " (since Linux 3.9)"
+Permits multiple
+.B AF_INET
+or
+.B AF_INET6
+sockets to be bound to an identical socket address.
+This option must be set on each socket (including the first socket)
+prior to calling
+.BR bind (2)
+on the socket.
+To prevent port hijacking,
+all of the processes binding to the same address must have the same
+effective UID.
+This option can be employed with both TCP and UDP sockets.
+.IP
+For TCP sockets, this option allows
+.BR accept (2)
+load distribution in a multi-threaded server to be improved by
+using a distinct listener socket for each thread.
+This provides improved load distribution as compared
+to traditional techniques such using a single
+.BR accept (2)ing
+thread that distributes connections,
+or having multiple threads that compete to
+.BR accept (2)
+from the same socket.
+.IP
+For UDP sockets,
+the use of this option can provide better distribution
+of incoming datagrams to multiple processes (or threads) as compared
+to the traditional technique of having multiple processes
+compete to receive datagrams on the same socket.
+.TP
+.BR SO_RXQ_OVFL " (since Linux 2.6.33)"
+.\" commit 3b885787ea4112eaa80945999ea0901bf742707f
+Indicates that an unsigned 32-bit value ancillary message (cmsg)
+should be attached to received skbs indicating
+the number of packets dropped by the socket since its creation.
+.TP
+.BR SO_SELECT_ERR_QUEUE " (since Linux 3.10)"
+.\" commit 7d4c04fc170087119727119074e72445f2bb192b
+.\" Author: Keller, Jacob E <jacob.e.keller@intel.com>
+When this option is set on a socket,
+an error condition on a socket causes notification not only via the
+.I exceptfds
+set of
+.BR select (2).
+Similarly,
+.BR poll (2)
+also returns a
+.B POLLPRI
+whenever an
+.B POLLERR
+event is returned.
+.\" It does not affect wake up.
+.IP
+Background: this option was added when waking up on an error condition
+occurred only via the
+.I readfds
+and
+.I writefds
+sets of
+.BR select (2).
+The option was added to allow monitoring for error conditions via the
+.I exceptfds
+argument without simultaneously having to receive notifications (via
+.IR readfds )
+for regular data that can be read from the socket.
+After changes in Linux 4.16,
+.\" commit 6e5d58fdc9bedd0255a8
+.\" ("skbuff: Fix not waking applications when errors are enqueued")
+the use of this flag to achieve the desired notifications
+is no longer necessary.
+This option is nevertheless retained for backwards compatibility.
+.TP
+.B SO_SNDBUF
+Sets or gets the maximum socket send buffer in bytes.
+The kernel doubles this value (to allow space for bookkeeping overhead)
+when it is set using
+.\" Most (all?) other implementations do not do this -- MTK, Dec 05
+.\" See also the comment to SO_RCVBUF (17 Jul 2012 LKML mail)
+.BR setsockopt (2),
+and this doubled value is returned by
+.BR getsockopt (2).
+The default value is set by the
+.I /proc/sys/net/core/wmem_default
+file and the maximum allowed value is set by the
+.I /proc/sys/net/core/wmem_max
+file.
+The minimum (doubled) value for this option is 2048.
+.TP
+.BR SO_SNDBUFFORCE " (since Linux 2.6.14)"
+Using this socket option, a privileged
+.RB ( CAP_NET_ADMIN )
+process can perform the same task as
+.BR SO_SNDBUF ,
+but the
+.I wmem_max
+limit can be overridden.
+.TP
+.B SO_TIMESTAMP
+Enable or disable the receiving of the
+.B SO_TIMESTAMP
+control message.
+The timestamp control message is sent with level
+.B SOL_SOCKET
+and a
+.I cmsg_type
+of
+.BR SCM_TIMESTAMP .
+The
+.I cmsg_data
+field is a
+.I "struct timeval"
+indicating the
+reception time of the last packet passed to the user in this call.
+See
+.BR cmsg (3)
+for details on control messages.
+.TP
+.BR SO_TIMESTAMPNS " (since Linux 2.6.22)"
+.\" commit 92f37fd2ee805aa77925c1e64fd56088b46094fc
+Enable or disable the receiving of the
+.B SO_TIMESTAMPNS
+control message.
+The timestamp control message is sent with level
+.B SOL_SOCKET
+and a
+.I cmsg_type
+of
+.BR SCM_TIMESTAMPNS .
+The
+.I cmsg_data
+field is a
+.I "struct timespec"
+indicating the
+reception time of the last packet passed to the user in this call.
+The clock used for the timestamp is
+.BR CLOCK_REALTIME .
+See
+.BR cmsg (3)
+for details on control messages.
+.IP
+A socket cannot mix
+.B SO_TIMESTAMP
+and
+.BR SO_TIMESTAMPNS :
+the two modes are mutually exclusive.
+.TP
+.B SO_TYPE
+Gets the socket type as an integer (e.g.,
+.BR SOCK_STREAM ).
+This socket option is read-only.
+.TP
+.BR SO_BUSY_POLL " (since Linux 3.11)"
+Sets the approximate time in microseconds to busy poll on a blocking receive
+when there is no data.
+Increasing this value requires
+.BR CAP_NET_ADMIN .
+The default for this option is controlled by the
+.I /proc/sys/net/core/busy_read
+file.
+.IP
+The value in the
+.I /proc/sys/net/core/busy_poll
+file determines how long
+.BR select (2)
+and
+.BR poll (2)
+will busy poll when they operate on sockets with
+.B SO_BUSY_POLL
+set and no events to report are found.
+.IP
+In both cases,
+busy polling will only be done when the socket last received data
+from a network device that supports this option.
+.IP
+While busy polling may improve latency of some applications,
+care must be taken when using it since this will increase
+both CPU utilization and power usage.
+.SS Signals
+When writing onto a connection-oriented socket that has been shut down
+(by the local or the remote end)
+.B SIGPIPE
+is sent to the writing process and
+.B EPIPE
+is returned.
+The signal is not sent when the write call
+specified the
+.B MSG_NOSIGNAL
+flag.
+.PP
+When requested with the
+.B FIOSETOWN
+.BR fcntl (2)
+or
+.B SIOCSPGRP
+.BR ioctl (2),
+.B SIGIO
+is sent when an I/O event occurs.
+It is possible to use
+.BR poll (2)
+or
+.BR select (2)
+in the signal handler to find out which socket the event occurred on.
+An alternative (in Linux 2.2) is to set a real-time signal using the
+.B F_SETSIG
+.BR fcntl (2);
+the handler of the real time signal will be called with
+the file descriptor in the
+.I si_fd
+field of its
+.IR siginfo_t .
+See
+.BR fcntl (2)
+for more information.
+.PP
+Under some circumstances (e.g., multiple processes accessing a
+single socket), the condition that caused the
+.B SIGIO
+may have already disappeared when the process reacts to the signal.
+If this happens, the process should wait again because Linux
+will resend the signal later.
+.\" .SS Ancillary messages
+.SS /proc interfaces
+The core socket networking parameters can be accessed
+via files in the directory
+.IR /proc/sys/net/core/ .
+.TP
+.I rmem_default
+contains the default setting in bytes of the socket receive buffer.
+.TP
+.I rmem_max
+contains the maximum socket receive buffer size in bytes which a user may
+set by using the
+.B SO_RCVBUF
+socket option.
+.TP
+.I wmem_default
+contains the default setting in bytes of the socket send buffer.
+.TP
+.I wmem_max
+contains the maximum socket send buffer size in bytes which a user may
+set by using the
+.B SO_SNDBUF
+socket option.
+.TP
+.IR message_cost " and " message_burst
+configure the token bucket filter used to load limit warning messages
+caused by external network events.
+.TP
+.I netdev_max_backlog
+Maximum number of packets in the global input queue.
+.TP
+.I optmem_max
+Maximum length of ancillary data and user control data like the iovecs
+per socket.
+.\" netdev_fastroute is not documented because it is experimental
+.SS Ioctls
+These operations can be accessed using
+.BR ioctl (2):
+.PP
+.in +4n
+.EX
+.IB error " = ioctl(" ip_socket ", " ioctl_type ", " &value_result ");"
+.EE
+.in
+.TP
+.B SIOCGSTAMP
+Return a
+.I struct timeval
+with the receive timestamp of the last packet passed to the user.
+This is useful for accurate round trip time measurements.
+See
+.BR setitimer (2)
+for a description of
+.IR "struct timeval" .
+.\"
+This ioctl should be used only if the socket options
+.B SO_TIMESTAMP
+and
+.B SO_TIMESTAMPNS
+are not set on the socket.
+Otherwise, it returns the timestamp of the
+last packet that was received while
+.B SO_TIMESTAMP
+and
+.B SO_TIMESTAMPNS
+were not set, or it fails if no such packet has been received,
+(i.e.,
+.BR ioctl (2)
+returns \-1 with
+.I errno
+set to
+.BR ENOENT ).
+.TP
+.B SIOCSPGRP
+Set the process or process group that is to receive
+.B SIGIO
+or
+.B SIGURG
+signals when I/O becomes possible or urgent data is available.
+The argument is a pointer to a
+.IR pid_t .
+For further details, see the description of
+.B F_SETOWN
+in
+.BR fcntl (2).
+.TP
+.B FIOASYNC
+Change the
+.B O_ASYNC
+flag to enable or disable asynchronous I/O mode of the socket.
+Asynchronous I/O mode means that the
+.B SIGIO
+signal or the signal set with
+.B F_SETSIG
+is raised when a new I/O event occurs.
+.IP
+Argument is an integer boolean flag.
+(This operation is synonymous with the use of
+.BR fcntl (2)
+to set the
+.B O_ASYNC
+flag.)
+.\"
+.TP
+.B SIOCGPGRP
+Get the current process or process group that receives
+.B SIGIO
+or
+.B SIGURG
+signals,
+or 0
+when none is set.
+.PP
+Valid
+.BR fcntl (2)
+operations:
+.TP
+.B FIOGETOWN
+The same as the
+.B SIOCGPGRP
+.BR ioctl (2).
+.TP
+.B FIOSETOWN
+The same as the
+.B SIOCSPGRP
+.BR ioctl (2).
+.SH VERSIONS
+.B SO_BINDTODEVICE
+was introduced in Linux 2.0.30.
+.B SO_PASSCRED
+is new in Linux 2.2.
+The
+.I /proc
+interfaces were introduced in Linux 2.2.
+.B SO_RCVTIMEO
+and
+.B SO_SNDTIMEO
+are supported since Linux 2.3.41.
+Earlier, timeouts were fixed to
+a protocol-specific setting, and could not be read or written.
+.SH NOTES
+Linux assumes that half of the send/receive buffer is used for internal
+kernel structures; thus the values in the corresponding
+.I /proc
+files are twice what can be observed on the wire.
+.PP
+Linux will allow port reuse only with the
+.B SO_REUSEADDR
+option
+when this option was set both in the previous program that performed a
+.BR bind (2)
+to the port and in the program that wants to reuse the port.
+This differs from some implementations (e.g., FreeBSD)
+where only the later program needs to set the
+.B SO_REUSEADDR
+option.
+Typically this difference is invisible, since, for example, a server
+program is designed to always set this option.
+.\" .SH AUTHORS
+.\" This man page was written by Andi Kleen.
+.SH SEE ALSO
+.BR wireshark (1),
+.BR bpf (2),
+.BR connect (2),
+.BR getsockopt (2),
+.BR setsockopt (2),
+.BR socket (2),
+.BR pcap (3),
+.BR address_families (7),
+.BR capabilities (7),
+.BR ddp (7),
+.BR ip (7),
+.BR ipv6 (7),
+.BR packet (7),
+.BR tcp (7),
+.BR udp (7),
+.BR unix (7),
+.BR tcpdump (8)
diff --git a/man7/spufs.7 b/man7/spufs.7
new file mode 100644
index 0000000..fc3b424
--- /dev/null
+++ b/man7/spufs.7
@@ -0,0 +1,767 @@
+.\" Copyright (c) International Business Machines Corp., 2006
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" HISTORY:
+.\" 2005-09-28, created by Arnd Bergmann <arndb@de.ibm.com>,
+.\" Mark Nutter <mnutter@us.ibm.com> and
+.\" Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
+.\" 2006-06-16, revised by Eduardo M. Fleury <efleury@br.ibm.com>
+.\" 2007-07-10, quite a lot of polishing by mtk
+.\" 2007-09-28, updates for newer kernels by Jeremy Kerr <jk@ozlabs.org>
+.\"
+.TH spufs 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+spufs \- SPU filesystem
+.SH DESCRIPTION
+The SPU filesystem is used on PowerPC machines that implement the
+Cell Broadband Engine Architecture in order to access Synergistic
+Processor Units (SPUs).
+.PP
+The filesystem provides a name space similar to POSIX shared
+memory or message queues.
+Users that have write permissions
+on the filesystem can use
+.BR spu_create (2)
+to establish SPU contexts under the
+.B spufs
+root directory.
+.PP
+Every SPU context is represented by a directory containing
+a predefined set of files.
+These files can be
+used for manipulating the state of the logical SPU.
+Users can change permissions on the files, but can't
+add or remove files.
+.SS Mount options
+.TP
+.B uid=<uid>
+Set the user owning the mount point; the default is 0 (root).
+.TP
+.B gid=<gid>
+Set the group owning the mount point; the default is 0 (root).
+.TP
+.B mode=<mode>
+Set the mode of the top-level directory in
+.BR spufs ,
+as an octal mode string.
+The default is 0775.
+.SS Files
+The files in
+.B spufs
+mostly follow the standard behavior for regular system calls like
+.BR read (2)
+or
+.BR write (2),
+but often support only a subset of the operations
+supported on regular filesystems.
+This list details the supported
+operations and the deviations from the standard behavior described
+in the respective man pages.
+.PP
+All files that support the
+.BR read (2)
+operation also support
+.BR readv (2)
+and all files that support the
+.BR write (2)
+operation also support
+.BR writev (2).
+All files support the
+.BR access (2)
+and
+.BR stat (2)
+family of operations, but for the latter call,
+the only fields of the returned
+.I stat
+structure that contain reliable information are
+.IR st_mode ,
+.IR st_nlink ,
+.IR st_uid ,
+and
+.IR st_gid .
+.PP
+All files support the
+.BR chmod (2)/\c
+.BR fchmod (2)
+and
+.BR chown (2)/\c
+.BR fchown (2)
+operations, but will not be able to grant permissions that contradict
+the possible operations (e.g., read access on the
+.I wbox
+file).
+.PP
+The current set of files is:
+.TP
+.I /capabilities
+Contains a comma-delimited string representing the capabilities of this
+SPU context.
+Possible capabilities are:
+.RS
+.TP
+.B sched
+This context may be scheduled.
+.TP
+.B step
+This context can be run in single-step mode, for debugging.
+.PP
+New capabilities flags may be added in the future.
+.RE
+.TP
+.I /mem
+the contents of the local storage memory of the SPU.
+This can be accessed like a regular shared memory
+file and contains both code and data in the address
+space of the SPU.
+The possible operations on an open
+.I mem
+file are:
+.RS
+.TP
+.BR read "(2), " pread "(2), " write "(2), " pwrite "(2), " lseek (2)
+These operate as usual, with the exception that
+.BR lseek (2),
+.BR write (2),
+and
+.BR pwrite (2)
+are not supported beyond the end of the file.
+The file size
+is the size of the local storage of the SPU,
+which is normally 256 kilobytes.
+.TP
+.BR mmap (2)
+Mapping
+.I mem
+into the process address space provides access to the SPU local
+storage within the process address space.
+Only
+.B MAP_SHARED
+mappings are allowed.
+.RE
+.TP
+.I /regs
+Contains the saved general-purpose registers of the SPU context.
+This file contains the 128-bit values of each register,
+from register 0 to register 127, in order.
+This allows the general-purpose registers to be
+inspected for debugging.
+.IP
+Reading to or writing from this file requires that the context is
+scheduled out, so use of this file is not recommended in normal
+program operation.
+.IP
+The
+.I regs
+file is not present on contexts that have been created with the
+.B SPU_CREATE_NOSCHED
+flag.
+.TP
+.I /mbox
+The first SPU-to-CPU communication mailbox.
+This file is read-only and can be read in units of 4 bytes.
+The file can be used only in nonblocking mode \- even
+.BR poll (2)
+cannot be used to block on this file.
+The only possible operation on an open
+.I mbox
+file is:
+.RS
+.TP
+.BR read (2)
+If
+.I count
+is smaller than four,
+.BR read (2)
+returns \-1 and sets
+.I errno
+to
+.BR EINVAL .
+If there is no data available in the mailbox (i.e., the SPU has not
+sent a mailbox message), the return value is set to \-1 and
+.I errno
+is set to
+.BR EAGAIN .
+When data
+has been read successfully, four bytes are placed in
+the data buffer and the value four is returned.
+.RE
+.TP
+.I /ibox
+The second SPU-to-CPU communication mailbox.
+This file is similar to the first mailbox file, but can be read
+in blocking I/O mode, thus calling
+.BR read (2)
+on an open
+.I ibox
+file will block until the SPU has written data to its interrupt mailbox
+channel (unless the file has been opened with
+.BR O_NONBLOCK ,
+see below).
+Also,
+.BR poll (2)
+and similar system calls can be used to monitor for the presence
+of mailbox data.
+.IP
+The possible operations on an open
+.I ibox
+file are:
+.RS
+.TP
+.BR read (2)
+If
+.I count
+is smaller than four,
+.BR read (2)
+returns \-1 and sets
+.I errno
+to
+.BR EINVAL .
+If there is no data available in the mailbox and the file
+descriptor has been opened with
+.BR O_NONBLOCK ,
+the return value is set to \-1 and
+.I errno
+is set to
+.BR EAGAIN .
+.IP
+If there is no data available in the mailbox and the file
+descriptor has been opened without
+.BR O_NONBLOCK ,
+the call will
+block until the SPU writes to its interrupt mailbox channel.
+When data has been read successfully, four bytes are placed in
+the data buffer and the value four is returned.
+.TP
+.BR poll (2)
+Poll on the
+.I ibox
+file returns
+.I "(POLLIN | POLLRDNORM)"
+whenever data is available for reading.
+.RE
+.TP
+.I /wbox
+The CPU-to-SPU communication mailbox.
+It is write-only and can be written in units of four bytes.
+If the mailbox is full,
+.BR write (2)
+will block, and
+.BR poll (2)
+can be used to block until the mailbox is available for writing again.
+The possible operations on an open
+.I wbox
+file are:
+.RS
+.TP
+.BR write (2)
+If
+.I count
+is smaller than four,
+.BR write (2)
+returns \-1 and sets
+.I errno
+to
+.BR EINVAL .
+If there is no space available in the mailbox and the file
+descriptor has been opened with
+.BR O_NONBLOCK ,
+the return
+value is set to \-1 and
+.I errno
+is set to
+.BR EAGAIN .
+.IP
+If there is no space available in the mailbox and the file
+descriptor has been opened without
+.BR O_NONBLOCK ,
+the call will block until the SPU reads from its
+PPE (PowerPC Processing Element)
+mailbox channel.
+When data has been written successfully,
+the system call returns four as its function result.
+.TP
+.BR poll (2)
+A poll on the
+.I wbox
+file returns
+.I "(POLLOUT | POLLWRNORM)"
+whenever space is available for writing.
+.RE
+.TP
+.IR /mbox_stat ", " /ibox_stat ", " /wbox_stat
+These are read-only files that contain the length of the current
+queue of each mailbox\[em]that is, how many words can be read from
+.IR mbox " or " ibox
+or how many words can be written to
+.I wbox
+without blocking.
+The files can be read only in four-byte units and return
+a big-endian binary integer number.
+The only possible operation on an open
+.I *box_stat
+file is:
+.RS
+.TP
+.BR read (2)
+If
+.I count
+is smaller than four,
+.BR read (2)
+returns \-1 and sets
+.I errno
+to
+.BR EINVAL .
+Otherwise, a four-byte value is placed in the data buffer.
+This value is the number of elements that can be read from (for
+.I mbox_stat
+and
+.IR ibox_stat )
+or written to (for
+.IR wbox_stat )
+the respective mailbox without blocking or returning an
+.B EAGAIN
+error.
+.RE
+.TP
+.IR /npc ", " /decr ", " /decr_status ", " /spu_tag_mask ", " \
+/event_mask ", " /event_status ", " /srr0 ", " /lslr
+Internal registers of the SPU.
+These files contain an ASCII string
+representing the hex value of the specified register.
+Reads and writes on these
+files (except for
+.IR npc ,
+see below) require that the SPU context be scheduled out,
+so frequent access to
+these files is not recommended for normal program operation.
+.IP
+The contents of these files are:
+.RS
+.TP 16
+.I npc
+Next Program Counter \- valid only when the SPU is in a stopped state.
+.TP
+.I decr
+SPU Decrementer
+.TP
+.I decr_status
+Decrementer Status
+.TP
+.I spu_tag_mask
+MFC tag mask for SPU DMA
+.TP
+.I event_mask
+Event mask for SPU interrupts
+.TP
+.I event_status
+Number of SPU events pending (read-only)
+.TP
+.I srr0
+Interrupt Return address register
+.TP
+.I lslr
+Local Store Limit Register
+.RE
+.IP
+The possible operations on these files are:
+.RS
+.TP
+.BR read (2)
+Reads the current register value.
+If the register value is larger than the buffer passed to the
+.BR read (2)
+system call, subsequent reads will continue reading from the same
+buffer, until the end of the buffer is reached.
+.IP
+When a complete string has been read, all subsequent read operations
+will return zero bytes and a new file descriptor needs to be opened
+to read a new value.
+.TP
+.BR write (2)
+A
+.BR write (2)
+operation on the file sets the register to the
+value given in the string.
+The string is parsed from the beginning
+until the first nonnumeric character or the end of the buffer.
+Subsequent writes to the same file descriptor overwrite the
+previous setting.
+.IP
+Except for the
+.I npc
+file, these files are not present on contexts that have been created with
+the
+.B SPU_CREATE_NOSCHED
+flag.
+.RE
+.TP
+.I /fpcr
+This file provides access to the Floating Point Status and
+Control Register (fcpr) as a binary, four-byte file.
+The operations on the
+.I fpcr
+file are:
+.RS
+.TP
+.BR read (2)
+If
+.I count
+is smaller than four,
+.BR read (2)
+returns \-1 and sets
+.I errno
+to
+.BR EINVAL .
+Otherwise, a four-byte value is placed in the data buffer;
+this is the current value of the
+.I fpcr
+register.
+.TP
+.BR write (2)
+If
+.I count
+is smaller than four,
+.BR write (2)
+returns \-1 and sets
+.I errno
+to
+.BR EINVAL .
+Otherwise, a four-byte value is copied from the data buffer,
+updating the value of the
+.I fpcr
+register.
+.RE
+.TP
+.IR /signal1 ", " /signal2
+The files provide access to the two signal notification channels
+of an SPU.
+These are read-write files that operate on four-byte words.
+Writing to one of these files triggers an interrupt on the SPU.
+The value written to the signal files can
+be read from the SPU through a channel read or from
+host user space through the file.
+After the value has been read by the SPU, it is reset to zero.
+The possible operations on an open
+.I signal1
+or
+.I signal2
+file are:
+.RS
+.TP
+.BR read (2)
+If
+.I count
+is smaller than four,
+.BR read (2)
+returns \-1 and sets
+.I errno
+to
+.BR EINVAL .
+Otherwise, a four-byte value is placed in the data buffer;
+this is the current value of the specified signal notification
+register.
+.TP
+.BR write (2)
+If
+.I count
+is smaller than four,
+.BR write (2)
+returns \-1 and sets
+.I errno
+to
+.BR EINVAL .
+Otherwise, a four-byte value is copied from the data buffer,
+updating the value of the specified signal notification
+register.
+The signal notification register will either be replaced with
+the input data or will be updated to the bitwise OR operation
+of the old value and the input data, depending on the contents
+of the
+.I signal1_type
+or
+.I signal2_type
+files respectively.
+.RE
+.TP
+.IR /signal1_type ", " /signal2_type
+These two files change the behavior of the
+.I signal1
+and
+.I signal2
+notification files.
+They contain a numeric ASCII string which is read
+as either "1" or "0".
+In mode 0 (overwrite), the hardware replaces the contents
+of the signal channel with the data that is written to it.
+In mode 1 (logical OR), the hardware accumulates the bits
+that are subsequently written to it.
+The possible operations on an open
+.I signal1_type
+or
+.I signal2_type
+file are:
+.RS
+.TP
+.BR read (2)
+When the count supplied to the
+.BR read (2)
+call is shorter than the required length for the digit (plus a newline
+character), subsequent reads from the same file descriptor will
+complete the string.
+When a complete string has been read, all subsequent read operations
+will return zero bytes and a new file descriptor needs to be opened
+to read the value again.
+.TP
+.BR write (2)
+A
+.BR write (2)
+operation on the file sets the register to the
+value given in the string.
+The string is parsed from the beginning
+until the first nonnumeric character or the end of the buffer.
+Subsequent writes to the same file descriptor overwrite the
+previous setting.
+.RE
+.TP
+.IR /mbox_info ", " /ibox_info ", " /wbox_info ", " /dma_into ", " /proxydma_info
+Read-only files that contain the saved state of the SPU mailboxes and
+DMA queues.
+This allows the SPU status to be inspected, mainly for debugging.
+The
+.I mbox_info
+and
+.I ibox_info
+files each contain the four-byte mailbox message that has been written
+by the SPU.
+If no message has been written to these mailboxes, then
+contents of these files is undefined.
+The
+.IR mbox_stat ,
+.IR ibox_stat ,
+and
+.I wbox_stat
+files contain the available message count.
+.IP
+The
+.I wbox_info
+file contains an array of four-byte mailbox messages, which have been
+sent to the SPU.
+With current CBEA machines, the array is four items in
+length, so up to 4 * 4 = 16 bytes can be read from this file.
+If any mailbox queue entry is empty,
+then the bytes read at the corresponding location are undefined.
+.IP
+The
+.I dma_info
+file contains the contents of the SPU MFC DMA queue, represented as the
+following structure:
+.IP
+.in +4n
+.EX
+struct spu_dma_info {
+ uint64_t dma_info_type;
+ uint64_t dma_info_mask;
+ uint64_t dma_info_status;
+ uint64_t dma_info_stall_and_notify;
+ uint64_t dma_info_atomic_command_status;
+ struct mfc_cq_sr dma_info_command_data[16];
+};
+.EE
+.in
+.IP
+The last member of this data structure is the actual DMA queue,
+containing 16 entries.
+The
+.I mfc_cq_sr
+structure is defined as:
+.IP
+.in +4n
+.EX
+struct mfc_cq_sr {
+ uint64_t mfc_cq_data0_RW;
+ uint64_t mfc_cq_data1_RW;
+ uint64_t mfc_cq_data2_RW;
+ uint64_t mfc_cq_data3_RW;
+};
+.EE
+.in
+.IP
+The
+.I proxydma_info
+file contains similar information, but describes the proxy DMA queue
+(i.e., DMAs initiated by entities outside the SPU) instead.
+The file is in the following format:
+.IP
+.in +4n
+.EX
+struct spu_proxydma_info {
+ uint64_t proxydma_info_type;
+ uint64_t proxydma_info_mask;
+ uint64_t proxydma_info_status;
+ struct mfc_cq_sr proxydma_info_command_data[8];
+};
+.EE
+.in
+.IP
+Accessing these files requires that the SPU context is scheduled out -
+frequent use can be inefficient.
+These files should not be used for normal program operation.
+.IP
+These files are not present on contexts that have been created with the
+.B SPU_CREATE_NOSCHED
+flag.
+.TP
+.I /cntl
+This file provides access to the SPU Run Control and SPU status
+registers, as an ASCII string.
+The following operations are supported:
+.RS
+.TP
+.BR read (2)
+Reads from the
+.I cntl
+file will return an ASCII string with the hex
+value of the SPU Status register.
+.TP
+.BR write (2)
+Writes to the
+.I cntl
+file will set the context's SPU Run Control register.
+.RE
+.TP
+.I /mfc
+Provides access to the Memory Flow Controller of the SPU.
+Reading from the file returns the contents of the
+SPU's MFC Tag Status register, and
+writing to the file initiates a DMA from the MFC.
+The following operations are supported:
+.RS
+.TP
+.BR write (2)
+Writes to this file need to be in the format of a MFC DMA command,
+defined as follows:
+.IP
+.in +4n
+.EX
+struct mfc_dma_command {
+ int32_t pad; /* reserved */
+ uint32_t lsa; /* local storage address */
+ uint64_t ea; /* effective address */
+ uint16_t size; /* transfer size */
+ uint16_t tag; /* command tag */
+ uint16_t class; /* class ID */
+ uint16_t cmd; /* command opcode */
+};
+.EE
+.in
+.IP
+Writes are required to be exactly
+.I sizeof(struct mfc_dma_command)
+bytes in size.
+The command will be sent to the SPU's MFC proxy queue, and the
+tag stored in the kernel (see below).
+.TP
+.BR read (2)
+Reads the contents of the tag status register.
+If the file is opened in blocking mode (i.e., without
+.BR O_NONBLOCK ),
+then the read will block until a
+DMA tag (as performed by a previous write) is complete.
+In nonblocking mode,
+the MFC tag status register will be returned without waiting.
+.TP
+.BR poll (2)
+Calling
+.BR poll (2)
+on the
+.I mfc
+file will block until a new DMA can be
+started (by checking for
+.BR POLLOUT )
+or until a previously started DMA
+(by checking for
+.BR POLLIN )
+has been completed.
+.IP
+.I /mss
+Provides access to the MFC MultiSource Synchronization (MSS) facility.
+By
+.BR mmap (2)-ing
+this file, processes can access the MSS area of the SPU.
+.IP
+The following operations are supported:
+.TP
+.BR mmap (2)
+Mapping
+.B mss
+into the process address space gives access to the SPU MSS area
+within the process address space.
+Only
+.B MAP_SHARED
+mappings are allowed.
+.RE
+.TP
+.I /psmap
+Provides access to the whole problem-state mapping of the SPU.
+Applications can use this area to interface to the SPU, rather than
+writing to individual register files in
+.BR spufs .
+.IP
+The following operations are supported:
+.RS
+.TP
+.BR mmap (2)
+Mapping
+.B psmap
+gives a process a direct map of the SPU problem state area.
+Only
+.B MAP_SHARED
+mappings are supported.
+.RE
+.TP
+.I /phys\-id
+Read-only file containing the physical SPU number that the SPU context
+is running on.
+When the context is not running, this file contains the
+string "\-1".
+.IP
+The physical SPU number is given by an ASCII hex string.
+.TP
+.I /object\-id
+Allows applications to store (or retrieve) a single 64-bit ID into the
+context.
+This ID is later used by profiling tools to uniquely identify
+the context.
+.RS
+.TP
+.BR write (2)
+By writing an ASCII hex value into this file, applications can set the
+object ID of the SPU context.
+Any previous value of the object ID is overwritten.
+.TP
+.BR read (2)
+Reading this file gives an ASCII hex string representing the object ID
+for this SPU context.
+.RE
+.SH EXAMPLES
+To automatically
+.BR mount (8)
+the SPU filesystem when booting, at the location
+.I /spu
+chosen by the user, put this line into the
+.BR fstab (5)
+configuration file:
+.EX
+none /spu spufs gid=spu 0 0
+.EE
+.\" .SH AUTHORS
+.\" Arnd Bergmann <arndb@de.ibm.com>, Mark Nutter <mnutter@us.ibm.com>,
+.\" Ulrich Weigand <Ulrich.Weigand@de.ibm.com>, Jeremy Kerr <jk@ozlabs.org>
+.SH SEE ALSO
+.BR close (2),
+.BR spu_create (2),
+.BR spu_run (2),
+.BR capabilities (7)
+.PP
+.I The Cell Broadband Engine Architecture (CBEA) specification
diff --git a/man7/standards.7 b/man7/standards.7
new file mode 100644
index 0000000..c1df6f8
--- /dev/null
+++ b/man7/standards.7
@@ -0,0 +1,303 @@
+.\" Copyright (c) 2006, Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH standards 7 2023-03-13 "Linux man-pages 6.05.01"
+.SH NAME
+standards \- C and UNIX Standards
+.SH DESCRIPTION
+The STANDARDS section that appears in many manual pages identifies
+various standards to which the documented interface conforms.
+The following list briefly describes these standards.
+.TP
+.B V7
+Version 7 (also known as Seventh Edition) UNIX,
+released by AT&T/Bell Labs in 1979.
+After this point, UNIX systems diverged into two main dialects:
+BSD and System V.
+.TP
+.B 4.2BSD
+This is an implementation standard defined by the 4.2 release
+of the
+.IR "Berkeley Software Distribution",
+released by the University of California at Berkeley.
+This was the first Berkeley release that contained a TCP/IP
+stack and the sockets API.
+4.2BSD was released in 1983.
+.IP
+Earlier major BSD releases included
+.I 3BSD
+(1980),
+.I 4BSD
+(1980),
+and
+.I 4.1BSD
+(1981).
+.TP
+.B 4.3BSD
+The successor to 4.2BSD, released in 1986.
+.TP
+.B 4.4BSD
+The successor to 4.3BSD, released in 1993.
+This was the last major Berkeley release.
+.TP
+.B System V
+This is an implementation standard defined by AT&T's milestone 1983
+release of its commercial System V (five) release.
+The previous major AT&T release was
+.IR "System III" ,
+released in 1981.
+.TP
+.B System V release 2 (SVr2)
+This was the next System V release, made in 1985.
+The SVr2 was formally described in the
+.I "System V Interface Definition version 1"
+.RI ( "SVID 1" )
+published in 1985.
+.TP
+.B System V release 3 (SVr3)
+This was the successor to SVr2, released in 1986.
+This release was formally described in the
+.I "System V Interface Definition version 2"
+.RI ( "SVID 2" ).
+.TP
+.B System V release 4 (SVr4)
+This was the successor to SVr3, released in 1989.
+This version of System V is described in the "Programmer's Reference
+Manual: Operating System API (Intel processors)" (Prentice-Hall
+1992, ISBN 0-13-951294-2)
+This release was formally described in the
+.I "System V Interface Definition version 3"
+.RI ( "SVID 3" ),
+and is considered the definitive System V release.
+.TP
+.B SVID 4
+System V Interface Definition version 4, issued in 1995.
+Available online at
+.UR http://www.sco.com\:/developers\:/devspecs/
+.UE .
+.TP
+.B C89
+This was the first C language standard, ratified by ANSI
+(American National Standards Institute) in 1989
+.RI ( X3.159-1989 ).
+Sometimes this is known as
+.IR "ANSI C" ,
+but since C99 is also an
+ANSI standard, this term is ambiguous.
+This standard was also ratified by
+ISO (International Standards Organization) in 1990
+.RI ( "ISO/IEC 9899:1990" ),
+and is thus occasionally referred to as
+.IR "ISO C90" .
+.TP
+.B C99
+This revision of the C language standard was ratified by ISO in 1999
+.RI ( "ISO/IEC 9899:1999" ).
+Available online at
+.UR http://www.open\-std.org\:/jtc1\:/sc22\:/wg14\:/www\:/standards
+.UE .
+.TP
+.B C11
+This revision of the C language standard was ratified by ISO in 2011
+.RI ( "ISO/IEC 9899:2011" ).
+.TP
+.B LFS
+The Large File Summit specification, completed in 1996.
+This specification defined mechanisms that allowed 32-bit systems
+to support the use of large files (i.e., 64-bit file offsets).
+See
+.UR https://www.opengroup.org\:/platform\:/lfs.html
+.UE .
+.TP
+.B POSIX.1-1988
+This was the first POSIX standard,
+ratified by IEEE as IEEE Std 1003.1-1988,
+and subsequently adopted (with minor revisions) as an ISO standard in 1990.
+The term "POSIX" was coined by Richard Stallman.
+.TP
+.B POSIX.1-1990
+"Portable Operating System Interface for Computing Environments".
+IEEE 1003.1-1990 part 1, ratified by ISO in 1990
+.RI ( "ISO/IEC 9945-1:1990" ).
+.TP
+.B POSIX.2
+IEEE Std 1003.2-1992,
+describing commands and utilities, ratified by ISO in 1993
+.RI ( "ISO/IEC 9945-2:1993" ).
+.TP
+.BR POSIX.1b " (formerly known as \fIPOSIX.4\fP)"
+IEEE Std 1003.1b-1993,
+describing real-time facilities
+for portable operating systems, ratified by ISO in 1996
+.RI ( "ISO/IEC 9945-1:1996" ).
+.TP
+.BR POSIX.1c " (formerly known as \fIPOSIX.4a\fP)"
+IEEE Std 1003.1c-1995, which describes the POSIX threads interfaces.
+.TP
+.B POSIX.1d
+IEEE Std 1003.1d-1999, which describes additional real-time extensions.
+.TP
+.B POSIX.1g
+IEEE Std 1003.1g-2000, which describes networking APIs (including sockets).
+.TP
+.B POSIX.1j
+IEEE Std 1003.1j-2000, which describes advanced real-time extensions.
+.TP
+.B POSIX.1-1996
+A 1996 revision of POSIX.1 which incorporated POSIX.1b and POSIX.1c.
+.TP
+.B XPG3
+Released in 1989, this was the first release of the X/Open
+Portability Guide to be based on a POSIX standard (POSIX.1-1988).
+This multivolume guide was developed by the X/Open Group,
+a multivendor consortium.
+.TP
+.B XPG4
+A revision of the X/Open Portability Guide, released in 1992.
+This revision incorporated POSIX.2.
+.TP
+.B XPG4v2
+A 1994 revision of XPG4.
+This is also referred to as
+.IR "Spec 1170" ,
+where 1170 referred to the number of interfaces
+defined by this standard.
+.TP
+.B "SUS (SUSv1)"
+Single UNIX Specification.
+This was a repackaging of XPG4v2 and other X/Open standards
+(X/Open Curses Issue 4 version 2,
+X/Open Networking Service (XNS) Issue 4).
+Systems conforming to this standard can be branded
+.IR "UNIX 95" .
+.TP
+.B SUSv2
+Single UNIX Specification version 2.
+Sometimes also referred to (incorrectly) as
+.IR XPG5 .
+This standard appeared in 1997.
+Systems conforming to this standard can be branded
+.IR "UNIX 98" .
+See also
+.UR http://www.unix.org\:/version2/
+.UE .)
+.TP
+.B POSIX.1-2001, SUSv3
+This was a 2001 revision and consolidation of the
+POSIX.1, POSIX.2, and SUS standards into a single document,
+conducted under the auspices of the Austin Group
+.UR http://www.opengroup.org\:/austin/
+.UE .
+The standard is available online at
+.UR http://www.unix.org\:/version3/
+.UE .
+.IP
+The standard defines two levels of conformance:
+.IR "POSIX conformance" ,
+which is a baseline set of interfaces required of a conforming system;
+and
+.IR "XSI Conformance",
+which additionally mandates a set of interfaces
+(the "XSI extension") which are only optional for POSIX conformance.
+XSI-conformant systems can be branded
+.IR "UNIX 03" .
+.IP
+The POSIX.1-2001 document is broken into four parts:
+.IP
+.BR XBD :
+Definitions, terms, and concepts, header file specifications.
+.IP
+.BR XSH :
+Specifications of functions (i.e., system calls and library
+functions in actual implementations).
+.IP
+.BR XCU :
+Specifications of commands and utilities
+(i.e., the area formerly described by POSIX.2).
+.IP
+.BR XRAT :
+Informative text on the other parts of the standard.
+.IP
+POSIX.1-2001 is aligned with C99, so that all of the
+library functions standardized in C99 are also
+standardized in POSIX.1-2001.
+.IP
+The Single UNIX Specification version 3 (SUSv3) comprises the
+Base Specifications containing XBD, XSH, XCU, and XRAT as above,
+plus X/Open Curses Issue 4 version 2 as an extra volume that is
+not in POSIX.1-2001.
+.IP
+Two Technical Corrigenda (minor fixes and improvements)
+of the original 2001 standard have occurred:
+TC1 in 2003
+and TC2 in 2004.
+.TP
+.B POSIX.1-2008, SUSv4
+Work on the next revision of POSIX.1/SUS was completed and
+ratified in 2008.
+The standard is available online at
+.UR http://www.unix.org\:/version4/
+.UE .
+.IP
+The changes in this revision are not as large as those
+that occurred for POSIX.1-2001/SUSv3,
+but a number of new interfaces are added
+and various details of existing specifications are modified.
+Many of the interfaces that were optional in
+POSIX.1-2001 become mandatory in the 2008 revision of the standard.
+A few interfaces that are present in POSIX.1-2001 are marked
+as obsolete in POSIX.1-2008, or removed from the standard altogether.
+.IP
+The revised standard is structured in the same way as its predecessor.
+The Single UNIX Specification version 4 (SUSv4) comprises the
+Base Specifications containing XBD, XSH, XCU, and XRAT,
+plus X/Open Curses Issue 7 as an extra volume that is
+not in POSIX.1-2008.
+.IP
+Again there are two levels of conformance: the baseline
+.IR "POSIX Conformance" ,
+and
+.IR "XSI Conformance" ,
+which mandates an additional set of interfaces
+beyond those in the base specification.
+.IP
+In general, where the STANDARDS section of a manual page
+lists POSIX.1-2001, it can be assumed that the interface also
+conforms to POSIX.1-2008, unless otherwise noted.
+.IP
+Technical Corrigendum 1 (minor fixes and improvements)
+of this standard was released in 2013.
+.IP
+Technical Corrigendum 2 of this standard was released in 2016.
+.IP
+Further information can be found on the Austin Group web site,
+.UR http://www.opengroup.org\:/austin/
+.UE .
+.TP
+.B SUSv4 2016 edition
+This is equivalent to POSIX.1-2008, with the addition of
+Technical Corrigenda 1 and 2 and the XCurses specification.
+.TP
+.B POSIX.1-2017
+This revision of POSIX is technically identical to POSIX.1-2008 with
+Technical Corrigenda 1 and 2 applied.
+.TP
+.B SUSv4 2018 edition
+This is equivalent to POSIX.1-2017, with the addition of
+the XCurses specification.
+.PP
+The interfaces documented in POSIX.1/SUS are available as
+manual pages under sections 0p (header files), 1p (commands),
+and 3p (functions);
+thus one can write "man 3p open".
+.SH SEE ALSO
+.BR getconf (1),
+.BR confstr (3),
+.BR pathconf (3),
+.BR sysconf (3),
+.BR attributes (7),
+.BR feature_test_macros (7),
+.BR libc (7),
+.BR posixoptions (7),
+.BR system_data_types (7)
diff --git a/man7/string_copying.7 b/man7/string_copying.7
new file mode 100644
index 0000000..814eabd
--- /dev/null
+++ b/man7/string_copying.7
@@ -0,0 +1,816 @@
+.\" Copyright 2022 Alejandro Colomar <alx@kernel.org>
+.\"
+.\" SPDX-License-Identifier: BSD-3-Clause
+.\"
+.TH string_copying 7 2023-07-29 "Linux man-pages 6.05.01"
+.\" ----- NAME :: -----------------------------------------------------/
+.SH NAME
+stpcpy,
+strcpy, strcat,
+stpecpy,
+strlcpy, strlcat,
+stpncpy,
+strncpy,
+zustr2ustp, zustr2stp,
+strncat,
+ustpcpy, ustr2stp
+\- copying strings and character sequences
+.\" ----- SYNOPSIS :: -------------------------------------------------/
+.SH SYNOPSIS
+.\" ----- SYNOPSIS :: (Null-terminated) strings -----------------------/
+.SS Strings
+.nf
+// Chain-copy a string.
+.BI "char *stpcpy(char *restrict " dst ", const char *restrict " src );
+.PP
+// Copy/catenate a string.
+.BI "char *strcpy(char *restrict " dst ", const char *restrict " src );
+.BI "char *strcat(char *restrict " dst ", const char *restrict " src );
+.PP
+// Chain-copy a string with truncation.
+.BI "char *stpecpy(char *" dst ", char " end "[0], const char *restrict " src );
+.PP
+// Copy/catenate a string with truncation.
+.BI "size_t strlcpy(char " dst "[restrict ." sz "], \
+const char *restrict " src ,
+.BI " size_t " sz );
+.BI "size_t strlcat(char " dst "[restrict ." sz "], \
+const char *restrict " src ,
+.BI " size_t " sz );
+.fi
+.\" ----- SYNOPSIS :: Null-padded character sequences --------/
+.SS Null-padded character sequences
+.nf
+// Zero a fixed-width buffer, and
+// copy a string into a character sequence with truncation.
+.BI "char *stpncpy(char " dst "[restrict ." sz "], \
+const char *restrict " src ,
+.BI " size_t " sz );
+.PP
+// Zero a fixed-width buffer, and
+// copy a string into a character sequence with truncation.
+.BI "char *strncpy(char " dst "[restrict ." sz "], \
+const char *restrict " src ,
+.BI " size_t " sz );
+.PP
+// Chain-copy a null-padded character sequence into a character sequence.
+.BI "char *zustr2ustp(char *restrict " dst ", \
+const char " src "[restrict ." sz ],
+.BI " size_t " sz );
+.PP
+// Chain-copy a null-padded character sequence into a string.
+.BI "char *zustr2stp(char *restrict " dst ", \
+const char " src "[restrict ." sz ],
+.BI " size_t " sz );
+.PP
+// Catenate a null-padded character sequence into a string.
+.BI "char *strncat(char *restrict " dst ", const char " src "[restrict ." sz ],
+.BI " size_t " sz );
+.fi
+.\" ----- SYNOPSIS :: Measured character sequences --------------------/
+.SS Measured character sequences
+.nf
+// Chain-copy a measured character sequence.
+.BI "char *ustpcpy(char *restrict " dst ", \
+const char " src "[restrict ." len ],
+.BI " size_t " len );
+.PP
+// Chain-copy a measured character sequence into a string.
+.BI "char *ustr2stp(char *restrict " dst ", \
+const char " src "[restrict ." len ],
+.BI " size_t " len );
+.fi
+.SH DESCRIPTION
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: -----------------/
+.SS Terms (and abbreviations)
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: string (str) ----/
+.TP
+.IR "string " ( str )
+is a sequence of zero or more non-null characters followed by a null byte.
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: null-padded character seq
+.TP
+.I character sequence
+is a sequence of zero or more non-null characters.
+A program should never use a character sequence where a string is required.
+However, with appropriate care,
+a string can be used in the place of a character sequence.
+.RS
+.TP
+.IR "null-padded character sequence " ( zustr )
+Character sequences can be contained in fixed-width buffers,
+which contain padding null bytes after the character sequence,
+to fill the rest of the buffer
+without affecting the character sequence;
+however, those padding null bytes are not part of the character sequence.
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: measured character sequence
+.TP
+.IR "measured character sequence " ( ustr )
+Character sequence delimited by its length.
+It may be a slice of a larger character sequence,
+or even of a string.
+.RE
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: length (len) ----/
+.TP
+.IR "length " ( len )
+is the number of non-null characters in a string or character sequence.
+It is the return value of
+.I strlen(str)
+and of
+.IR "strnlen(ustr, sz)" .
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: size (sz) -------/
+.TP
+.IR "size " ( sz )
+refers to the entire buffer
+where the string or character sequence is contained.
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: end -------------/
+.TP
+.I end
+is the name of a pointer to one past the last element of a buffer.
+It is equivalent to
+.IR &str[sz] .
+It is used as a sentinel value,
+to be able to truncate strings or character sequences
+instead of overrunning the containing buffer.
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: copy ------------/
+.TP
+.I copy
+This term is used when
+the writing starts at the first element pointed to by
+.IR dst .
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: catenate --------/
+.TP
+.I catenate
+This term is used when
+a function first finds the terminating null byte in
+.IR dst ,
+and then starts writing at that position.
+.\" ----- DESCRIPTION :: Terms (and abbreviations) :: chain -----------/
+.TP
+.I chain
+This term is used when
+it's the programmer who provides
+a pointer to the terminating null byte in the string
+.I dst
+(or one after the last character in a character sequence),
+and the function starts writing at that location.
+The function returns
+a pointer to the new location of the terminating null byte
+(or one after the last character in a character sequence)
+after the call,
+so that the programmer can use it to chain such calls.
+.\" ----- DESCRIPTION :: Copy, catenate, and chain-copy ---------------/
+.SS Copy, catenate, and chain-copy
+Originally,
+there was a distinction between functions that copy and those that catenate.
+However, newer functions that copy while allowing chaining
+cover both use cases with a single API.
+They are also algorithmically faster,
+since they don't need to search for
+the terminating null byte of the existing string.
+However, functions that catenate have a much simpler use,
+so if performance is not important,
+it can make sense to use them for improving readability.
+.PP
+The pointer returned by functions that allow chaining
+is a byproduct of the copy operation,
+so it has no performance costs.
+Functions that return such a pointer,
+and thus can be chained,
+have names of the form
+.RB * stp *(),
+since it's common to name the pointer just
+.IR p .
+.PP
+Chain-copying functions that truncate
+should accept a pointer to the end of the destination buffer,
+and have names of the form
+.RB * stpe *().
+This allows not having to recalculate the remaining size after each call.
+.\" ----- DESCRIPTION :: Truncate or not? -----------------------------/
+.SS Truncate or not?
+The first thing to note is that programmers should be careful with buffers,
+so they always have the correct size,
+and truncation is not necessary.
+.PP
+In most cases,
+truncation is not desired,
+and it is simpler to just do the copy.
+Simpler code is safer code.
+Programming against programming mistakes by adding more code
+just adds more points where mistakes can be made.
+.PP
+Nowadays,
+compilers can detect most programmer errors with features like
+compiler warnings,
+static analyzers, and
+.B \%_FORTIFY_SOURCE
+(see
+.BR ftm (7)).
+Keeping the code simple
+helps these overflow-detection features be more precise.
+.PP
+When validating user input,
+however,
+it makes sense to truncate.
+Remember to check the return value of such function calls.
+.PP
+Functions that truncate:
+.IP \[bu] 3
+.BR stpecpy (3)
+is the most efficient string copy function that performs truncation.
+It only requires to check for truncation once after all chained calls.
+.IP \[bu]
+.BR strlcpy (3bsd)
+and
+.BR strlcat (3bsd)
+are similar, but less efficient when chained.
+.IP \[bu]
+.BR stpncpy (3)
+and
+.BR strncpy (3)
+also truncate, but they don't write strings,
+but rather null-padded character sequences.
+.\" ----- DESCRIPTION :: Null-padded character sequences --------------/
+.SS Null-padded character sequences
+For historic reasons,
+some standard APIs,
+such as
+.BR utmpx (5),
+use null-padded character sequences in fixed-width buffers.
+To interface with them,
+specialized functions need to be used.
+.PP
+To copy strings into them, use
+.BR stpncpy (3).
+.PP
+To copy from an unterminated string within a fixed-width buffer into a string,
+ignoring any trailing null bytes in the source fixed-width buffer,
+you should use
+.BR zustr2stp (3)
+or
+.BR strncat (3).
+.PP
+To copy from an unterminated string within a fixed-width buffer
+into a character sequence,
+ignoring any trailing null bytes in the source fixed-width buffer,
+you should use
+.BR zustr2ustp (3).
+.\" ----- DESCRIPTION :: Measured character sequences -----------------/
+.SS Measured character sequences
+The simplest character sequence copying function is
+.BR mempcpy (3).
+It requires always knowing the length of your character sequences,
+for which structures can be used.
+It makes the code much faster,
+since you always know the length of your character sequences,
+and can do the minimal copies and length measurements.
+.BR mempcpy (3)
+copies character sequences,
+so you need to explicitly set the terminating null byte if you need a string.
+.PP
+However,
+for keeping type safety,
+it's good to add a wrapper that uses
+.I char\~*
+instead of
+.IR void\~* :
+.BR ustpcpy (3).
+.PP
+In programs that make considerable use of strings or character sequences,
+and need the best performance,
+using overlapping character sequences can make a big difference.
+It allows holding subsequences of a larger character sequence,
+while not duplicating memory
+nor using time to do a copy.
+.PP
+However, this is delicate,
+since it requires using character sequences.
+C library APIs use strings,
+so programs that use character sequences
+will have to take care of differentiating strings from character sequences.
+.PP
+To copy a measured character sequence, use
+.BR ustpcpy (3).
+.PP
+To copy a measured character sequence into a string, use
+.BR ustr2stp (3).
+.PP
+Because these functions ask for the length,
+and a string is by nature composed of a character sequence of the same length
+plus a terminating null byte,
+a string is also accepted as input.
+.\" ----- DESCRIPTION :: String vs character sequence -----------------/
+.SS String vs character sequence
+Some functions only operate on strings.
+Those require that the input
+.I src
+is a string,
+and guarantee an output string
+(even when truncation occurs).
+Functions that catenate
+also require that
+.I dst
+holds a string before the call.
+List of functions:
+.IP \[bu] 3
+.PD 0
+.BR stpcpy (3)
+.IP \[bu]
+.BR strcpy (3),
+.BR strcat (3)
+.IP \[bu]
+.BR stpecpy (3)
+.IP \[bu]
+.BR strlcpy (3bsd),
+.BR strlcat (3bsd)
+.PD
+.PP
+Other functions require an input string,
+but create a character sequence as output.
+These functions have confusing names,
+and have a long history of misuse.
+List of functions:
+.IP \[bu] 3
+.PD 0
+.BR stpncpy (3)
+.IP \[bu]
+.BR strncpy (3)
+.PD
+.PP
+Other functions operate on an input character sequence,
+and create an output string.
+Functions that catenate
+also require that
+.I dst
+holds a string before the call.
+.BR strncat (3)
+has an even more misleading name than the functions above.
+List of functions:
+.IP \[bu] 3
+.PD 0
+.BR zustr2stp (3)
+.IP \[bu]
+.BR strncat (3)
+.IP \[bu]
+.BR ustr2stp (3)
+.PD
+.PP
+Other functions operate on an input character sequence
+to create an output character sequence.
+List of functions:
+.IP \[bu] 3
+.PD 0
+.BR ustpcpy (3)
+.IP \[bu]
+.BR zustr2stp (3)
+.PD
+.\" ----- DESCRIPTION :: Functions :: ---------------------------------/
+.SS Functions
+.\" ----- DESCRIPTION :: Functions :: stpcpy(3) -----------------------/
+.TP
+.BR stpcpy (3)
+This function copies the input string into a destination string.
+The programmer is responsible for allocating a buffer large enough.
+It returns a pointer suitable for chaining.
+.\" ----- DESCRIPTION :: Functions :: strcpy(3), strcat(3) ------------/
+.TP
+.BR strcpy (3)
+.TQ
+.BR strcat (3)
+These functions copy and catenate the input string into a destination string.
+The programmer is responsible for allocating a buffer large enough.
+The return value is useless.
+.IP
+.BR stpcpy (3)
+is a faster alternative to these functions.
+.\" ----- DESCRIPTION :: Functions :: stpecpy(3) ----------------------/
+.TP
+.BR stpecpy (3)
+This function copies the input string into a destination string.
+If the destination buffer,
+limited by a pointer to its end,
+isn't large enough to hold the copy,
+the resulting string is truncated
+(but it is guaranteed to be null-terminated).
+It returns a pointer suitable for chaining.
+Truncation needs to be detected only once after the last chained call.
+.IP
+This function is not provided by any library;
+see EXAMPLES for a reference implementation.
+.\" ----- DESCRIPTION :: Functions :: strlcpy(3bsd), strlcat(3bsd) ----/
+.TP
+.BR strlcpy (3bsd)
+.TQ
+.BR strlcat (3bsd)
+These functions copy and catenate the input string into a destination string.
+If the destination buffer,
+limited by its size,
+isn't large enough to hold the copy,
+the resulting string is truncated
+(but it is guaranteed to be null-terminated).
+They return the length of the total string they tried to create.
+.IP
+.BR stpecpy (3)
+is a simpler alternative to these functions.
+.\" ----- DESCRIPTION :: Functions :: stpncpy(3) ----------------------/
+.TP
+.BR stpncpy (3)
+This function copies the input string into
+a destination null-padded character sequence in a fixed-width buffer.
+If the destination buffer,
+limited by its size,
+isn't large enough to hold the copy,
+the resulting character sequence is truncated.
+Since it creates a character sequence,
+it doesn't need to write a terminating null byte.
+It's impossible to distinguish truncation by the result of the call,
+from a character sequence that just fits the destination buffer;
+truncation should be detected by
+comparing the length of the input string
+with the size of the destination buffer.
+.\" ----- DESCRIPTION :: Functions :: strncpy(3) ----------------------/
+.TP
+.BR strncpy (3)
+This function is identical to
+.BR stpncpy (3)
+except for the useless return value.
+.IP
+.BR stpncpy (3)
+is a more useful alternative to this function.
+.\" ----- DESCRIPTION :: Functions :: zustr2ustp(3) --------------------/
+.TP
+.BR zustr2ustp (3)
+This function copies the input character sequence,
+contained in a null-padded fixed-width buffer,
+into a destination character sequence.
+The programmer is responsible for allocating a buffer large enough.
+It returns a pointer suitable for chaining.
+.IP
+A truncating version of this function doesn't exist,
+since the size of the original character sequence is always known,
+so it wouldn't be very useful.
+.IP
+This function is not provided by any library;
+see EXAMPLES for a reference implementation.
+.\" ----- DESCRIPTION :: Functions :: zustr2stp(3) --------------------/
+.TP
+.BR zustr2stp (3)
+This function copies the input character sequence,
+contained in a null-padded fixed-width buffer,
+into a destination string.
+The programmer is responsible for allocating a buffer large enough.
+It returns a pointer suitable for chaining.
+.IP
+A truncating version of this function doesn't exist,
+since the size of the original character sequence is always known,
+so it wouldn't be very useful.
+.IP
+This function is not provided by any library;
+see EXAMPLES for a reference implementation.
+.\" ----- DESCRIPTION :: Functions :: strncat(3) ----------------------/
+.TP
+.BR strncat (3)
+Do not confuse this function with
+.BR strncpy (3);
+they are not related at all.
+.IP
+This function catenates the input character sequence,
+contained in a null-padded fixed-width buffer,
+into a destination string.
+The programmer is responsible for allocating a buffer large enough.
+The return value is useless.
+.IP
+.BR zustr2stp (3)
+is a faster alternative to this function.
+.\" ----- DESCRIPTION :: Functions :: ustpcpy(3) ----------------------/
+.TP
+.BR ustpcpy (3)
+This function copies the input character sequence,
+limited by its length,
+into a destination character sequence.
+The programmer is responsible for allocating a buffer large enough.
+It returns a pointer suitable for chaining.
+.\" ----- DESCRIPTION :: Functions :: ustr2stp(3) ---------------------/
+.TP
+.BR ustr2stp (3)
+This function copies the input character sequence,
+limited by its length,
+into a destination string.
+The programmer is responsible for allocating a buffer large enough.
+It returns a pointer suitable for chaining.
+.\" ----- RETURN VALUE :: ---------------------------------------------/
+.SH RETURN VALUE
+The following functions return
+a pointer to the terminating null byte in the destination string.
+.IP \[bu] 3
+.PD 0
+.BR stpcpy (3)
+.IP \[bu]
+.BR ustr2stp (3)
+.IP \[bu]
+.BR zustr2stp (3)
+.PD
+.PP
+The following function returns
+a pointer to the terminating null byte in the destination string,
+except when truncation occurs;
+if truncation occurs,
+it returns a pointer to the end of the destination buffer.
+.IP \[bu] 3
+.BR stpecpy (3)
+.PP
+The following function returns
+a pointer to one after the last character
+in the destination character sequence;
+if truncation occurs,
+that pointer is equivalent to
+a pointer to the end of the destination buffer.
+.IP \[bu] 3
+.BR stpncpy (3)
+.PP
+The following functions return
+a pointer to one after the last character
+in the destination character sequence.
+.IP \[bu] 3
+.PD 0
+.BR zustr2ustp (3)
+.IP \[bu]
+.BR ustpcpy (3)
+.PD
+.PP
+The following functions return
+the length of the total string that they tried to create
+(as if truncation didn't occur).
+.IP \[bu] 3
+.BR strlcpy (3bsd),
+.BR strlcat (3bsd)
+.PP
+The following functions return the
+.I dst
+pointer,
+which is useless.
+.IP \[bu] 3
+.PD 0
+.BR strcpy (3),
+.BR strcat (3)
+.IP \[bu]
+.BR strncpy (3)
+.IP \[bu]
+.BR strncat (3)
+.PD
+.\" ----- NOTES :: strscpy(9) -----------------------------------------/
+.SH NOTES
+The Linux kernel has an internal function for copying strings,
+which is similar to
+.BR stpecpy (3),
+except that it can't be chained:
+.TP
+.BR strscpy (9)
+This function copies the input string into a destination string.
+If the destination buffer,
+limited by its size,
+isn't large enough to hold the copy,
+the resulting string is truncated
+(but it is guaranteed to be null-terminated).
+It returns the length of the destination string, or
+.B \-E2BIG
+on truncation.
+.IP
+.BR stpecpy (3)
+is a simpler and faster alternative to this function.
+.\" ----- CAVEATS :: --------------------------------------------------/
+.SH CAVEATS
+Don't mix chain calls to truncating and non-truncating functions.
+It is conceptually wrong
+unless you know that the first part of a copy will always fit.
+Anyway, the performance difference will probably be negligible,
+so it will probably be more clear if you use consistent semantics:
+either truncating or non-truncating.
+Calling a non-truncating function after a truncating one is necessarily wrong.
+.\" ----- BUGS :: -----------------------------------------------------/
+.SH BUGS
+All catenation functions share the same performance problem:
+.UR https://www.joelonsoftware.com/\:2001/12/11/\:back\-to\-basics/
+Shlemiel the painter
+.UE .
+.\" ----- EXAMPLES :: -------------------------------------------------/
+.SH EXAMPLES
+The following are examples of correct use of each of these functions.
+.\" ----- EXAMPLES :: stpcpy(3) ---------------------------------------/
+.TP
+.BR stpcpy (3)
+.EX
+p = buf;
+p = stpcpy(p, "Hello ");
+p = stpcpy(p, "world");
+p = stpcpy(p, "!");
+len = p \- buf;
+puts(buf);
+.EE
+.\" ----- EXAMPLES :: strcpy(3), strcat(3) ----------------------------/
+.TP
+.BR strcpy (3)
+.TQ
+.BR strcat (3)
+.EX
+strcpy(buf, "Hello ");
+strcat(buf, "world");
+strcat(buf, "!");
+len = strlen(buf);
+puts(buf);
+.EE
+.\" ----- EXAMPLES :: stpecpy(3) --------------------------------------/
+.TP
+.BR stpecpy (3)
+.EX
+end = buf + sizeof(buf);
+p = buf;
+p = stpecpy(p, end, "Hello ");
+p = stpecpy(p, end, "world");
+p = stpecpy(p, end, "!");
+if (p == end) {
+ p\-\-;
+ goto toolong;
+}
+len = p \- buf;
+puts(buf);
+.EE
+.\" ----- EXAMPLES :: strlcpy(3bsd), strlcat(3bsd) --------------------/
+.TP
+.BR strlcpy (3bsd)
+.TQ
+.BR strlcat (3bsd)
+.EX
+if (strlcpy(buf, "Hello ", sizeof(buf)) >= sizeof(buf))
+ goto toolong;
+if (strlcat(buf, "world", sizeof(buf)) >= sizeof(buf))
+ goto toolong;
+len = strlcat(buf, "!", sizeof(buf));
+if (len >= sizeof(buf))
+ goto toolong;
+puts(buf);
+.EE
+.\" ----- EXAMPLES :: strscpy(9) --------------------------------------/
+.TP
+.BR strscpy (9)
+.EX
+len = strscpy(buf, "Hello world!", sizeof(buf));
+if (len == \-E2BIG)
+ goto toolong;
+puts(buf);
+.EE
+.\" ----- EXAMPLES :: stpncpy(3) --------------------------------------/
+.TP
+.BR stpncpy (3)
+.EX
+p = stpncpy(buf, "Hello world!", sizeof(buf));
+if (sizeof(buf) < strlen("Hello world!"))
+ goto toolong;
+len = p \- buf;
+for (size_t i = 0; i < sizeof(buf); i++)
+ putchar(buf[i]);
+.EE
+.\" ----- EXAMPLES :: strncpy(3) --------------------------------------/
+.TP
+.BR strncpy (3)
+.EX
+strncpy(buf, "Hello world!", sizeof(buf));
+if (sizeof(buf) < strlen("Hello world!"))
+ goto toolong;
+len = strnlen(buf, sizeof(buf));
+for (size_t i = 0; i < sizeof(buf); i++)
+ putchar(buf[i]);
+.EE
+.\" ----- EXAMPLES :: zustr2ustp(3) -----------------------------------/
+.TP
+.BR zustr2ustp (3)
+.EX
+p = buf;
+p = zustr2ustp(p, "Hello ", 6);
+p = zustr2ustp(p, "world", 42); // Padding null bytes ignored.
+p = zustr2ustp(p, "!", 1);
+len = p \- buf;
+printf("%.*s\en", (int) len, buf);
+.EE
+.\" ----- EXAMPLES :: zustr2stp(3) ------------------------------------/
+.TP
+.BR zustr2stp (3)
+.EX
+p = buf;
+p = zustr2stp(p, "Hello ", 6);
+p = zustr2stp(p, "world", 42); // Padding null bytes ignored.
+p = zustr2stp(p, "!", 1);
+len = p \- buf;
+puts(buf);
+.EE
+.\" ----- EXAMPLES :: strncat(3) --------------------------------------/
+.TP
+.BR strncat (3)
+.EX
+buf[0] = \[aq]\e0\[aq]; // There's no 'cpy' function to this 'cat'.
+strncat(buf, "Hello ", 6);
+strncat(buf, "world", 42); // Padding null bytes ignored.
+strncat(buf, "!", 1);
+len = strlen(buf);
+puts(buf);
+.EE
+.\" ----- EXAMPLES :: ustpcpy(3) --------------------------------------/
+.TP
+.BR ustpcpy (3)
+.EX
+p = buf;
+p = ustpcpy(p, "Hello ", 6);
+p = ustpcpy(p, "world", 5);
+p = ustpcpy(p, "!", 1);
+len = p \- buf;
+printf("%.*s\en", (int) len, buf);
+.EE
+.\" ----- EXAMPLES :: ustr2stp(3) -------------------------------------/
+.TP
+.BR ustr2stp (3)
+.EX
+p = buf;
+p = ustr2stp(p, "Hello ", 6);
+p = ustr2stp(p, "world", 5);
+p = ustr2stp(p, "!", 1);
+len = p \- buf;
+puts(buf);
+.EE
+.\" ----- EXAMPLES :: Implementations :: ------------------------------/
+.SS Implementations
+Here are reference implementations for functions not provided by libc.
+.PP
+.in +4n
+.EX
+/* This code is in the public domain. */
+\&
+.\" ----- EXAMPLES :: Implementations :: stpecpy(3) -------------------/
+char *
+.IR stpecpy "(char *dst, char end[0], const char *restrict src)"
+{
+ char *p;
+\&
+ if (dst == NULL)
+ return NULL;
+ if (dst == end)
+ return end;
+\&
+ p = memccpy(dst, src, \[aq]\e0\[aq], end \- dst);
+ if (p != NULL)
+ return p \- 1;
+\&
+ /* truncation detected */
+ end[\-1] = \[aq]\e0\[aq];
+ return end;
+}
+\&
+.\" ----- EXAMPLES :: Implementations :: zustr2ustp(3) ----------------/
+char *
+.IR zustr2ustp "(char *restrict dst, const char *restrict src, size_t sz)"
+{
+ return ustpcpy(dst, src, strnlen(src, sz));
+}
+\&
+.\" ----- EXAMPLES :: Implementations :: zustr2stp(3) -----------------/
+char *
+.IR zustr2stp "(char *restrict dst, const char *restrict src, size_t sz)"
+{
+ char *p;
+\&
+ p = zustr2ustp(dst, src, sz);
+ *p = \[aq]\e0\[aq];
+\&
+ return p;
+}
+\&
+.\" ----- EXAMPLES :: Implementations :: ustpcpy(3) -------------------/
+char *
+.IR ustpcpy "(char *restrict dst, const char *restrict src, size_t len)"
+{
+ return mempcpy(dst, src, len);
+}
+\&
+.\" ----- EXAMPLES :: Implementations :: ustr2stp(3) ------------------/
+char *
+.IR ustr2stp "(char *restrict dst, const char *restrict src, size_t len)"
+{
+ char *p;
+\&
+ p = ustpcpy(dst, src, len);
+ *p = \[aq]\e0\[aq];
+\&
+ return p;
+}
+.EE
+.in
+.\" ----- SEE ALSO :: -------------------------------------------------/
+.SH SEE ALSO
+.BR bzero (3),
+.BR memcpy (3),
+.BR memccpy (3),
+.BR mempcpy (3),
+.BR stpcpy (3),
+.BR strlcpy (3bsd),
+.BR strncat (3),
+.BR stpncpy (3),
+.BR string (3)
diff --git a/man7/suffixes.7 b/man7/suffixes.7
new file mode 100644
index 0000000..5e970f4
--- /dev/null
+++ b/man7/suffixes.7
@@ -0,0 +1,265 @@
+'\" t
+.\" Copyright (c) 1993 by Thomas Koenig (ig25@rz.uni-karlsruhe.de)
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" Modified Sat Jul 24 17:35:15 1993 by Rik Faith <faith@cs.unc.edu>
+.\" Modified Sun Feb 19 22:02:32 1995 by Rik Faith <faith@cs.unc.edu>
+.\" Modified Tue Oct 22 23:28:12 1996 by Eric S. Raymond <esr@thyrsus.com>
+.\" Modified Sun Jan 26 21:56:56 1997 by Ralph Schleicher
+.\" <rs@purple.UL.BaWue.DE>
+.\" Modified Mon Jun 16 20:24:58 1997 by Nicolás Lichtmaier <nick@debian.org>
+.\" Modified Sun Oct 18 22:11:28 1998 by Joseph S. Myers <jsm28@cam.ac.uk>
+.\" Modified Mon Nov 16 17:24:47 1998 by Andries Brouwer <aeb@cwi.nl>
+.\" Modified Thu Nov 16 23:28:25 2000 by David A. Wheeler
+.\" <dwheeler@dwheeler.com>
+.\"
+.TH SUFFIXES 7 2023-03-17 "Linux man-pages 6.05.01"
+.SH NAME
+suffixes \- list of file suffixes
+.SH DESCRIPTION
+It is customary to indicate the contents of a file with the file suffix,
+which (typically) consists of a period, followed by one or more letters.
+Many standard utilities, such as compilers, use this to recognize the type of
+file they are dealing with.
+The
+.BR make (1)
+utility is driven by rules based on file suffix.
+.PP
+Following is a list of suffixes which are likely to be found on a
+Linux system.
+.PP
+.TS
+l | l
+_ | _
+lI | l .
+Suffix File type
+\&,v files for RCS (Revision Control System)
+\&- backup file
+\&.C C++ source code, equivalent to \fI.cc\fP
+\&.F Fortran source with \fBcpp\fP(1) directives
+\& or file compressed using freeze
+\&.S assembler source with \fBcpp\fP(1) directives
+\&.Y file compressed using yabba
+\&.Z file compressed using \fBcompress\fP(1)
+\&.[0\-9]+gf TeX generic font files
+\&.[0\-9]+pk TeX packed font files
+\&.[1\-9] manual page for the corresponding section
+\&.[1\-9][a-z] manual page for section plus subsection
+\&.a static object code library
+\&.ad X application default resource file
+\&.ada Ada source (may be body, spec, or combination)
+\&.adb Ada body source
+\&.ads Ada spec source
+\&.afm PostScript font metrics
+\&.al Perl autoload file
+\&.am \fBautomake\fP(1) input file
+\&.arc \fBarc\fP(1) archive
+\&.arj \fBarj\fP(1) archive
+\&.asc PGP ASCII-armored data
+\&.asm (GNU) assembler source file
+\&.au Audio sound file
+\&.aux LaTeX auxiliary file
+\&.avi (msvideo) movie
+\&.awk AWK language program
+\&.b LILO boot loader image
+\&.bak backup file
+\&.bash \fBbash\fP(1) shell script
+\&.bb basic block list data produced by
+\& gcc \-ftest\-coverage
+\&.bbg basic block graph data produced by
+\& gcc \-ftest\-coverage
+\&.bbl BibTeX output
+\&.bdf X font file
+\&.bib TeX bibliographic database, BibTeX input
+\&.bm bitmap source
+\&.bmp bitmap
+\&.bz2 file compressed using \fBbzip2\fP(1)
+\&.c C source
+\&.cat message catalog files
+\&.cc C++ source
+\&.cf configuration file
+\&.cfg configuration file
+\&.cgi WWW content generating script or program
+\&.cls LaTeX Class definition
+\&.class Java compiled byte-code
+\&.conf configuration file
+\&.config configuration file
+\&.cpp equivalent to \fI.cc\fR
+\&.csh \fBcsh\fP(1) shell script
+\&.cxx equivalent to \fI.cc\fR
+\&.dat data file
+\&.deb Debian software package
+\&.def Modula-2 source for definition modules
+\&.def other definition files
+\&.desc initial part of mail message unpacked with
+\& \fBmunpack\fP(1)
+\&.diff file differences (\fBdiff\fP(1) command output)
+\&.dir dbm data base directory file
+\&.doc documentation file
+\&.dsc Debian Source Control (source package)
+\&.dtx LaTeX package source file
+\&.dvi TeX's device independent output
+\&.el Emacs-Lisp source
+\&.elc compiled Emacs-Lisp source
+\&.eps encapsulated PostScript
+\&.exp Expect source code
+\&.f Fortran source
+\&.f77 Fortran 77 source
+\&.f90 Fortran 90 source
+\&.fas precompiled Common-Lisp
+\&.fi Fortran include files
+\&.fig FIG image file (used by \fBxfig\fP(1))
+\&.fmt TeX format file
+\&.gif Compuserve Graphics Image File format
+\&.gmo GNU format message catalog
+\&.gsf Ghostscript fonts
+\&.gz file compressed using \fBgzip\fP(1)
+\&.h C or C++ header files
+\&.help help file
+\&.hf equivalent to \fI.help\fP
+\&.hlp equivalent to \fI.help\fP
+\&.htm poor man's \fI.html\fP
+\&.html HTML document used with the World Wide Web
+\&.hqx 7-bit encoded Macintosh file
+\&.i C source after preprocessing
+\&.icon bitmap source
+\&.idx reference or datum-index file for hypertext
+\& or database system
+\&.image bitmap source
+\&.in configuration template, especially for GNU Autoconf
+\&.info files for the Emacs info browser
+\&.info-[0\-9]+ split info files
+\&.ins LaTeX package install file for docstrip
+\&.itcl itcl source code;
+\& itcl ([incr Tcl]) is an OO extension of tcl
+\&.java a Java source file
+\&.jpeg Joint Photographic Experts Group format
+\&.jpg poor man's \fI.jpeg\fP
+\&.js JavaScript source code
+\&.jsx JSX (JavaScript XML-like extension) source code
+\&.kmap \fBlyx\fP(1) keymap
+\&.l equivalent to \fI.lex\fP or \fI.lisp\fP
+\&.lex \fBlex\fP(1) or \fBflex\fP(1) files
+\&.lha lharc archive
+\&.lib Common-Lisp library
+\&.lisp Lisp source
+\&.ln files for use with \fBlint\fP(1)
+\&.log log file, in particular produced by TeX
+\&.lsm Linux Software Map entry
+\&.lsp Common-Lisp source
+\&.lzh lharc archive
+\&.m Objective-C source code
+\&.m4 \fBm4\fP(1) source
+\&.mac macro files for various programs
+\&.man manual page (usually source rather than formatted)
+\&.map map files for various programs
+\&.me Nroff source using the me macro package
+\&.mf Metafont (font generator for TeX) source
+\&.mgp MagicPoint file
+\&.mm sources for \fBgroff\fP(1) in mm - format
+\&.mo Message catalog binary file
+\&.mod Modula-2 source for implementation modules
+\&.mov (quicktime) movie
+\&.mp Metapost source
+\&.mp2 MPEG Layer 2 (audio) file
+\&.mp3 MPEG Layer 3 (audio) file
+\&.mpeg movie file
+\&.o object file
+\&.old old or backup file
+\&.orig backup (original) version of a file, from \fBpatch\fP(1)
+\&.out output file, often executable program (a.out)
+\&.p Pascal source
+\&.pag dbm data base data file
+\&.patch file differences for \fBpatch\fP(1)
+\&.pbm portable bitmap format
+\&.pcf X11 font files
+\&.pdf Adobe Portable Data Format
+\& (use Acrobat/\fBacroread\fP or \fBxpdf\fP)
+\&.perl Perl source (see .ph, .pl, and .pm)
+\&.pfa PostScript font definition files, ASCII format
+\&.pfb PostScript font definition files, binary format
+\&.pgm portable greymap format
+\&.pgp PGP binary data
+\&.ph Perl header file
+\&.php PHP program file
+\&.php3 PHP3 program file
+\&.pid File to store daemon PID (e.g., crond.pid)
+\&.pl TeX property list file or Perl library file
+\&.pm Perl module
+\&.png Portable Network Graphics file
+\&.po Message catalog source
+\&.pod \fBperldoc\fP(1) file
+\&.ppm portable pixmap format
+\&.pr bitmap source
+\&.ps PostScript file
+\&.py Python source
+\&.pyc compiled python
+\&.qt quicktime movie
+\&.r RATFOR source (obsolete)
+\&.rej patches that \fBpatch\fP(1) couldn't apply
+\&.rpm RPM software package
+\&.rtf Rich Text Format file
+\&.rules rules for something
+\&.s assembler source
+\&.sa stub libraries for a.out shared libraries
+\&.sc \fBsc\fP(1) spreadsheet commands
+\&.scm Scheme source code
+\&.sed sed source file
+\&.sgml SGML source file
+\&.sh \fBsh\fP(1) scripts
+\&.shar archive created by the \fBshar\fP(1) utility
+\&.shtml HTML using Server Side Includes
+\&.so Shared library or dynamically loadable object
+\&.sql SQL source
+\&.sqml SQML schema or query program
+\&.sty LaTeX style files
+\&.sym Modula-2 compiled definition modules
+\&.tar archive created by the \fBtar\fP(1) utility
+\&.tar.Z tar(1) archive compressed with \fBcompress\fP(1)
+\&.tar.bz2 tar(1) archive compressed with \fBbzip2\fP(1)
+\&.tar.gz tar(1) archive compressed with \fBgzip\fP(1)
+\&.taz tar(1) archive compressed with \fBcompress\fP(1)
+\&.tcl tcl source code
+\&.tex TeX or LaTeX source
+\&.texi equivalent to \fI.texinfo\fP
+\&.texinfo Texinfo documentation source
+\&.text text file
+\&.tfm TeX font metric file
+\&.tgz tar archive compressed with \fBgzip\fP(1)
+\&.tif poor man's \fI.tiff\fP
+\&.tiff Tagged Image File Format
+\&.tk tcl/tk script
+\&.tmp temporary file
+\&.tmpl template files
+\&.ts TypeScript source code
+\&.tsx TypeScript with JSX source code (\fI.ts\fP + \fI.jsx\fP)
+\&.txt equivalent to \fI.text\fP
+\&.uu equivalent to \fI.uue\fP
+\&.uue binary file encoded with \fBuuencode\fP(1)
+\&.vf TeX virtual font file
+\&.vpl TeX virtual property list file
+\&.w Silvio Levi's CWEB
+\&.wav wave sound file
+\&.web Donald Knuth's WEB
+\&.wml Source file for Web Meta Language
+\&.xbm X11 bitmap source
+\&.xcf GIMP graphic
+\&.xml eXtended Markup Language file
+\&.xpm X11 pixmap source
+\&.xs Perl xsub file produced by h2xs
+\&.xsl XSL stylesheet
+\&.y \fByacc\fP(1) or \fBbison\fP(1) (parser generator) files
+\&.z File compressed using \fBpack\fP(1) (or an old \fBgzip\fP(1))
+\&.zip \fBzip\fP(1) archive
+\&.zoo \fBzoo\fP(1) archive
+\&\[ti] Emacs or \fBpatch\fP(1) backup file
+\&rc startup (`run control') file, e.g., \fI.newsrc\fP
+.TE
+.SH STANDARDS
+General UNIX conventions.
+.SH BUGS
+This list is not exhaustive.
+.SH SEE ALSO
+.BR file (1),
+.BR make (1)
diff --git a/man7/svipc.7 b/man7/svipc.7
new file mode 100644
index 0000000..cc543f5
--- /dev/null
+++ b/man7/svipc.7
@@ -0,0 +1 @@
+.so man7/sysvipc.7
diff --git a/man7/symlink.7 b/man7/symlink.7
new file mode 100644
index 0000000..9c238b2
--- /dev/null
+++ b/man7/symlink.7
@@ -0,0 +1,564 @@
+.\" Copyright (c) 1992, 1993, 1994
+.\" The Regents of the University of California. All rights reserved.
+.\" and Copyright (C) 2008, 2014 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: BSD-3-Clause
+.\"
+.\" @(#)symlink.7 8.3 (Berkeley) 3/31/94
+.\" $FreeBSD: src/bin/ln/symlink.7,v 1.30 2005/02/13 22:25:09 ru Exp $
+.\"
+.\" 2008-06-11, mtk, Taken from FreeBSD 6.2 and heavily edited for
+.\" specific Linux details, improved readability, and man-pages style.
+.\"
+.TH symlink 7 2023-04-03 "Linux man-pages 6.05.01"
+.SH NAME
+symlink \- symbolic link handling
+.SH DESCRIPTION
+Symbolic links are files that act as pointers to other files.
+To understand their behavior, you must first understand how hard links
+work.
+.PP
+A hard link to a file is indistinguishable from the original file because
+it is a reference to the object underlying the original filename.
+(To be precise: each of the hard links to a file is a reference to
+the same
+.IR "inode number" ,
+where an inode number is an index into the inode table,
+which contains metadata about all files on a filesystem.
+See
+.BR stat (2).)
+Changes to a file are independent of the name used to reference the file.
+Hard links may not refer to directories
+(to prevent the possibility of loops within the filesystem tree,
+which would confuse many programs)
+and may not refer to files on different filesystems
+(because inode numbers are not unique across filesystems).
+.PP
+A symbolic link is a special type of file whose contents are a string
+that is the pathname of another file, the file to which the link refers.
+(The contents of a symbolic link can be read using
+.BR readlink (2).)
+In other words, a symbolic link is a pointer to another name,
+and not to an underlying object.
+For this reason, symbolic links may refer to directories and may cross
+filesystem boundaries.
+.PP
+There is no requirement that the pathname referred to by a symbolic link
+should exist.
+A symbolic link that refers to a pathname that does not exist is said
+to be a
+.IR "dangling link" .
+.PP
+Because a symbolic link and its referenced object coexist in the filesystem
+name space, confusion can arise in distinguishing between the link itself
+and the referenced object.
+On historical systems,
+commands and system calls adopted their own link-following
+conventions in a somewhat ad-hoc fashion.
+Rules for a more uniform approach,
+as they are implemented on Linux and other systems,
+are outlined here.
+It is important that site-local applications also conform to these rules,
+so that the user interface can be as consistent as possible.
+.\"
+.SS Magic links
+There is a special class of symbolic-link-like objects
+known as "magic links", which
+can be found in certain pseudofilesystems such as
+.BR proc (5)
+(examples include
+.IR /proc/ pid /exe
+and
+.IR /proc/ pid /fd/ *).
+Unlike normal symbolic links, magic links are not resolved through
+pathname-expansion, but instead act as direct references to the kernel's own
+representation of a file handle.
+As such, these magic links allow users to
+access files which cannot be referenced with normal paths (such as unlinked
+files still referenced by a running program ).
+.PP
+Because they can bypass ordinary
+.BR mount_namespaces (7)-based
+restrictions,
+magic links have been used as attack vectors in various exploits.
+.\"
+.SS Symbolic link ownership, permissions, and timestamps
+The owner and group of an existing symbolic link can be changed
+using
+.BR lchown (2).
+The ownership of a symbolic link matters
+when the link is being removed or renamed in a directory that
+has the sticky bit set (see
+.BR inode (7)),
+and when the
+.I fs.protected_symlinks
+sysctl is set (see
+.BR proc (5)).
+.PP
+The last access and last modification timestamps
+of a symbolic link can be changed using
+.BR utimensat (2)
+or
+.BR lutimes (3).
+.PP
+.\" Linux does not currently implement an lchmod(2).
+On Linux, the permissions of an ordinary symbolic link are not used in any
+operations; the permissions are always 0777 (read, write, and execute for all
+user categories), and can't be changed.
+.PP
+However, magic links do not follow this rule.
+They can have a non-0777 mode,
+though this mode is not currently used in any permission checks.
+.\" .PP
+.\" The
+.\" 4.4BSD
+.\" system differs from historical
+.\" 4BSD
+.\" systems in that the system call
+.\" .BR chown (2)
+.\" has been changed to follow symbolic links.
+.\" The
+.\" .BR lchown (2)
+.\" system call was added later when the limitations of the new
+.\" .BR chown (2)
+.\" became apparent.
+.SS Obtaining a file descriptor that refers to a symbolic link
+Using the combination of the
+.B O_PATH
+and
+.B O_NOFOLLOW
+flags to
+.BR open (2)
+yields a file descriptor that can be passed as the
+.I dirfd
+argument in system calls such as
+.BR fstatat (2),
+.BR fchownat (2),
+.BR fchmodat (2),
+.BR linkat (2),
+and
+.BR readlinkat (2),
+in order to operate on the symbolic link itself
+(rather than the file to which it refers).
+.PP
+By default
+(i.e., if the
+.B AT_SYMLINK_FOLLOW
+flag is not specified), if
+.BR name_to_handle_at (2)
+is applied to a symbolic link, it yields a handle for the symbolic link
+(rather than the file to which it refers).
+One can then obtain a file descriptor for the symbolic link
+(rather than the file to which it refers)
+by specifying the
+.B O_PATH
+flag in a subsequent call to
+.BR open_by_handle_at (2).
+Again, that file descriptor can be used in the
+aforementioned system calls to operate on the symbolic link itself.
+.SS Handling of symbolic links by system calls and commands
+Symbolic links are handled either by operating on the link itself,
+or by operating on the object referred to by the link.
+In the latter case,
+an application or system call is said to
+.I follow
+the link.
+Symbolic links may refer to other symbolic links,
+in which case the links are dereferenced until an object that is
+not a symbolic link is found,
+a symbolic link that refers to a file which does not exist is found,
+or a loop is detected.
+(Loop detection is done by placing an upper limit on the number of
+links that may be followed, and an error results if this limit is
+exceeded.)
+.PP
+There are three separate areas that need to be discussed.
+They are as follows:
+.IP \[bu] 3
+Symbolic links used as filename arguments for system calls.
+.IP \[bu]
+Symbolic links specified as command-line arguments to utilities that
+are not traversing a file tree.
+.IP \[bu]
+Symbolic links encountered by utilities that are traversing a file tree
+(either specified on the command line or encountered as part of the
+file hierarchy walk).
+.PP
+Before describing the treatment of symbolic links by system calls and commands,
+we require some terminology.
+Given a pathname of the form
+.IR a/b/c ,
+the part preceding the final slash (i.e.,
+.IR a/b )
+is called the
+.I dirname
+component, and the part following the final slash (i.e.,
+.IR c )
+is called the
+.I basename
+component.
+.\"
+.SS Treatment of symbolic links in system calls
+The first area is symbolic links used as filename arguments for
+system calls.
+.PP
+The treatment of symbolic links within a pathname passed to
+a system call is as follows:
+.IP (1) 5
+Within the dirname component of a pathname,
+symbolic links are always followed in nearly every system call.
+(This is also true for commands.)
+The one exception is
+.BR openat2 (2),
+which provides flags that can be used to explicitly
+prevent following of symbolic links in the dirname component.
+.IP (2)
+Except as noted below,
+all system calls follow symbolic links
+in the basename component of a pathname.
+For example, if there were a symbolic link
+.I slink
+which pointed to a file named
+.IR afile ,
+the system call
+.I "open(""slink"" ...\&)"
+would return a file descriptor referring to the file
+.IR afile .
+.PP
+Various system calls do not follow links in
+the basename component of a pathname,
+and operate on the symbolic link itself.
+They are:
+.BR lchown (2),
+.BR lgetxattr (2),
+.BR llistxattr (2),
+.BR lremovexattr (2),
+.BR lsetxattr (2),
+.BR lstat (2),
+.BR readlink (2),
+.BR rename (2),
+.BR rmdir (2),
+and
+.BR unlink (2).
+.PP
+Certain other system calls optionally follow symbolic links
+in the basename component of a pathname.
+They are:
+.BR faccessat (2),
+.\" Maybe one day: .BR fchownat (2)
+.BR fchownat (2),
+.BR fstatat (2),
+.BR linkat (2),
+.BR name_to_handle_at (2),
+.BR open (2),
+.BR openat (2),
+.BR open_by_handle_at (2),
+and
+.BR utimensat (2);
+see their manual pages for details.
+Because
+.BR remove (3)
+is an alias for
+.BR unlink (2),
+that library function also does not follow symbolic links.
+When
+.BR rmdir (2)
+is applied to a symbolic link, it fails with the error
+.BR ENOTDIR .
+.PP
+.BR link (2)
+warrants special discussion.
+POSIX.1-2001 specifies that
+.BR link (2)
+should dereference
+.I oldpath
+if it is a symbolic link.
+However, Linux does not do this.
+(By default, Solaris is the same,
+but the POSIX.1-2001 specified behavior can be obtained with
+suitable compiler options.)
+POSIX.1-2008 changed the specification to allow
+either behavior in an implementation.
+.SS Commands not traversing a file tree
+The second area is symbolic links, specified as command-line
+filename arguments, to commands which are not traversing a file tree.
+.PP
+Except as noted below, commands follow symbolic links named as
+command-line arguments.
+For example, if there were a symbolic link
+.I slink
+which pointed to a file named
+.IR afile ,
+the command
+.I "cat slink"
+would display the contents of the file
+.IR afile .
+.PP
+It is important to realize that this rule includes commands which may
+optionally traverse file trees; for example, the command
+.I "chown file"
+is included in this rule, while the command
+.IR "chown\ \-R file" ,
+which performs a tree traversal, is not.
+(The latter is described in the third area, below.)
+.PP
+If it is explicitly intended that the command operate on the symbolic
+link instead of following the symbolic link\[em]for example, it is desired that
+.I "chown slink"
+change the ownership of the file that
+.I slink
+is, whether it is a symbolic link or not\[em]then the
+.I \-h
+option should be used.
+In the above example,
+.I "chown root slink"
+would change the ownership of the file referred to by
+.IR slink ,
+while
+.I "chown\ \-h root slink"
+would change the ownership of
+.I slink
+itself.
+.PP
+There are some exceptions to this rule:
+.IP \[bu] 3
+The
+.BR mv (1)
+and
+.BR rm (1)
+commands do not follow symbolic links named as arguments,
+but respectively attempt to rename and delete them.
+(Note, if the symbolic link references a file via a relative path,
+moving it to another directory may very well cause it to stop working,
+since the path may no longer be correct.)
+.IP \[bu]
+The
+.BR ls (1)
+command is also an exception to this rule.
+For compatibility with historic systems (when
+.BR ls (1)
+is not doing a tree walk\[em]that is,
+.I \-R
+option is not specified),
+the
+.BR ls (1)
+command follows symbolic links named as arguments if the
+.I \-H
+or
+.I \-L
+option is specified,
+or if the
+.IR \-F ,
+.IR \-d ,
+or
+.I \-l
+options are not specified.
+(The
+.BR ls (1)
+command is the only command where the
+.I \-H
+and
+.I \-L
+options affect its behavior even though it is not doing a walk of
+a file tree.)
+.IP \[bu]
+The
+.BR file (1)
+command is also an exception to this rule.
+The
+.BR file (1)
+command does not follow symbolic links named as argument by default.
+The
+.BR file (1)
+command does follow symbolic links named as argument if the
+.I \-L
+option is specified.
+.\"
+.\"The 4.4BSD system differs from historical 4BSD systems in that the
+.\".BR chown (1)
+.\"and
+.\".BR chgrp (1)
+.\"commands follow symbolic links specified on the command line.
+.SS Commands traversing a file tree
+The following commands either optionally or always traverse file trees:
+.BR chgrp (1),
+.BR chmod (1),
+.BR chown (1),
+.BR cp (1),
+.BR du (1),
+.BR find (1),
+.BR ls (1),
+.BR pax (1),
+.BR rm (1),
+and
+.BR tar (1).
+.PP
+It is important to realize that the following rules apply equally to
+symbolic links encountered during the file tree traversal and symbolic
+links listed as command-line arguments.
+.PP
+The \fIfirst rule\fP applies to symbolic links that reference files other
+than directories.
+Operations that apply to symbolic links are performed on the links
+themselves, but otherwise the links are ignored.
+.PP
+The command
+.I "rm\ \-r slink directory"
+will remove
+.IR slink ,
+as well as any symbolic links encountered in the tree traversal of
+.IR directory ,
+because symbolic links may be removed.
+In no case will
+.BR rm (1)
+affect the file referred to by
+.IR slink .
+.PP
+The \fIsecond rule\fP applies to symbolic links that refer to directories.
+Symbolic links that refer to directories are never followed by default.
+This is often referred to as a "physical" walk, as opposed to a "logical"
+walk (where symbolic links that refer to directories are followed).
+.PP
+Certain conventions are (should be) followed as consistently as
+possible by commands that perform file tree walks:
+.IP \[bu] 3
+A command can be made to follow
+any symbolic links named on the command line,
+regardless of the type of file they reference, by specifying the
+.I \-H
+(for "half-logical") flag.
+This flag is intended to make the command-line name space look
+like the logical name space.
+(Note, for commands that do not always do file tree traversals, the
+.I \-H
+flag will be ignored if the
+.I \-R
+flag is not also specified.)
+.IP
+For example, the command
+.I "chown\ \-HR user slink"
+will traverse the file hierarchy rooted in the file pointed to by
+.IR slink .
+Note, the
+.I \-H
+is not the same as the previously discussed
+.I \-h
+flag.
+The
+.I \-H
+flag causes symbolic links specified on the command line to be
+dereferenced for the purposes of both the action to be performed
+and the tree walk, and it is as if the user had specified the
+name of the file to which the symbolic link pointed.
+.IP \[bu]
+A command can be made to
+follow any symbolic links named on the command line,
+as well as any symbolic links encountered during the traversal,
+regardless of the type of file they reference, by specifying the
+.I \-L
+(for "logical") flag.
+This flag is intended to make the entire name space look like
+the logical name space.
+(Note, for commands that do not always do file tree traversals, the
+.I \-L
+flag will be ignored if the
+.I \-R
+flag is not also specified.)
+.IP
+For example, the command
+.I "chown\ \-LR user slink"
+will change the owner of the file referred to by
+.IR slink .
+If
+.I slink
+refers to a directory,
+.B chown
+will traverse the file hierarchy rooted in the directory that it
+references.
+In addition, if any symbolic links are encountered in any file tree that
+.B chown
+traverses, they will be treated in the same fashion as
+.IR slink .
+.IP \[bu]
+A command can be made to
+provide the default behavior by specifying the
+.I \-P
+(for "physical") flag.
+This flag is intended to make the entire name space look like the
+physical name space.
+.PP
+For commands that do not by default do file tree traversals, the
+.IR \-H ,
+.IR \-L ,
+and
+.I \-P
+flags are ignored if the
+.I \-R
+flag is not also specified.
+In addition, you may specify the
+.IR \-H ,
+.IR \-L ,
+and
+.I \-P
+options more than once;
+the last one specified determines the command's behavior.
+This is intended to permit you to alias commands to behave one way
+or the other, and then override that behavior on the command line.
+.PP
+The
+.BR ls (1)
+and
+.BR rm (1)
+commands have exceptions to these rules:
+.IP \[bu] 3
+The
+.BR rm (1)
+command operates on the symbolic link, and not the file it references,
+and therefore never follows a symbolic link.
+The
+.BR rm (1)
+command does not support the
+.IR \-H ,
+.IR \-L ,
+or
+.I \-P
+options.
+.IP \[bu]
+To maintain compatibility with historic systems,
+the
+.BR ls (1)
+command acts a little differently.
+If you do not specify the
+.IR \-F ,
+.IR \-d ,
+or
+.I \-l
+options,
+.BR ls (1)
+will follow symbolic links specified on the command line.
+If the
+.I \-L
+flag is specified,
+.BR ls (1)
+follows all symbolic links,
+regardless of their type,
+whether specified on the command line or encountered in the tree walk.
+.SH SEE ALSO
+.BR chgrp (1),
+.BR chmod (1),
+.BR find (1),
+.BR ln (1),
+.BR ls (1),
+.BR mv (1),
+.BR namei (1),
+.BR rm (1),
+.BR lchown (2),
+.BR link (2),
+.BR lstat (2),
+.BR readlink (2),
+.BR rename (2),
+.BR symlink (2),
+.BR unlink (2),
+.BR utimensat (2),
+.BR lutimes (3),
+.BR path_resolution (7)
diff --git a/man7/system_data_types.7 b/man7/system_data_types.7
new file mode 100644
index 0000000..c4b3925
--- /dev/null
+++ b/man7/system_data_types.7
@@ -0,0 +1,320 @@
+.\" Copyright (c) 2020 by Alejandro Colomar <alx@kernel.org>
+.\" and Copyright (c) 2020 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH system_data_types 7 2023-05-20 "Linux man-pages 6.05.01"
+.SH NAME
+system_data_types \- overview of system data types
+.SH DESCRIPTION
+.\" Layout:
+.\" A list of type names (the struct/union keyword will be omitted).
+.\" Each entry will have the following parts:
+.\" * Include (see NOTES)
+.\"
+.\" * Definition (no "Definition" header)
+.\" Only struct/union types will have definition;
+.\" typedefs will remain opaque.
+.\"
+.\" * Description (no "Description" header)
+.\" A few lines describing the type.
+.\"
+.\" * Versions (optional)
+.\"
+.\" * Conforming to (see NOTES)
+.\" Format: CXY and later; POSIX.1-XXXX and later.
+.\"
+.\" * Notes (optional)
+.\"
+.\" * Bugs (if any)
+.\"
+.\" * See also
+.\"------------------------------------- aiocb ------------------------/
+.\"------------------------------------- blkcnt_t ---------------------/
+.\"------------------------------------- blksize_t --------------------/
+.\"------------------------------------- cc_t -------------------------/
+.\"------------------------------------- clock_t ----------------------/
+.\"------------------------------------- clockid_t --------------------/
+.\"------------------------------------- dev_t ------------------------/
+.\"------------------------------------- div_t ------------------------/
+.\"------------------------------------- double_t ---------------------/
+.\"------------------------------------- fd_set -----------------------/
+.\"------------------------------------- fenv_t -----------------------/
+.\"------------------------------------- fexcept_t --------------------/
+.\"------------------------------------- FILE -------------------------/
+.\"------------------------------------- float_t ----------------------/
+.\"------------------------------------- gid_t ------------------------/
+.\"------------------------------------- id_t -------------------------/
+.\"------------------------------------- imaxdiv_t --------------------/
+.\"------------------------------------- intmax_t ---------------------/
+.\"------------------------------------- intN_t -----------------------/
+.\"------------------------------------- intptr_t ---------------------/
+.\"------------------------------------- lconv ------------------------/
+.\"------------------------------------- ldiv_t -----------------------/
+.\"------------------------------------- lldiv_t ----------------------/
+.\"------------------------------------- mode_t -----------------------/
+.\"------------------------------------- off64_t ----------------------/
+.\"------------------------------------- off_t ------------------------/
+.\"------------------------------------- pid_t ------------------------/
+.\"------------------------------------- ptrdiff_t --------------------/
+.\"------------------------------------- regex_t ----------------------/
+.\"------------------------------------- regmatch_t -------------------/
+.\"------------------------------------- regoff_t ---------------------/
+.\"------------------------------------- sigevent ---------------------/
+.TP
+.I sigevent
+.RS
+.IR Include :
+.IR <signal.h> .
+Alternatively,
+.IR <aio.h> ,
+.IR <mqueue.h> ,
+or
+.IR <time.h> .
+.PP
+.EX
+struct sigevent {
+ int sigev_notify; /* Notification type */
+ int sigev_signo; /* Signal number */
+ union sigval sigev_value; /* Signal value */
+ void (*sigev_notify_function)(union sigval);
+ /* Notification function */
+ pthread_attr_t *sigev_notify_attributes;
+ /* Notification attributes */
+};
+.EE
+.PP
+For further details about this type, see
+.BR sigevent (7).
+.PP
+.IR Versions :
+.I <aio.h>
+and
+.I <time.h>
+define
+.I sigevent
+since POSIX.1-2008.
+.PP
+.IR "Conforming to" :
+POSIX.1-2001 and later.
+.PP
+.IR "See also" :
+.BR timer_create (2),
+.BR getaddrinfo_a (3),
+.BR lio_listio (3),
+.BR mq_notify (3)
+.PP
+See also the
+.I aiocb
+structure in this page.
+.RE
+.\"------------------------------------- siginfo_t --------------------/
+.TP
+.I siginfo_t
+.RS
+.IR Include :
+.IR <signal.h> .
+Alternatively,
+.IR <sys/wait.h> .
+.PP
+.EX
+typedef struct {
+ int si_signo; /* Signal number */
+ int si_code; /* Signal code */
+ pid_t si_pid; /* Sending process ID */
+ uid_t si_uid; /* Real user ID of sending process */
+ void *si_addr; /* Address of faulting instruction */
+ int si_status; /* Exit value or signal */
+ union sigval si_value; /* Signal value */
+} siginfo_t;
+.EE
+.PP
+Information associated with a signal.
+For further details on this structure
+(including additional, Linux-specific fields), see
+.BR sigaction (2).
+.PP
+.IR "Conforming to" :
+POSIX.1-2001 and later.
+.PP
+.IR "See also" :
+.BR pidfd_send_signal (2),
+.BR rt_sigqueueinfo (2),
+.BR sigaction (2),
+.BR sigwaitinfo (2),
+.BR psiginfo (3)
+.RE
+.\"------------------------------------- sigset_t ---------------------/
+.TP
+.I sigset_t
+.RS
+.IR Include :
+.IR <signal.h> .
+Alternatively,
+.IR <spawn.h> ,
+or
+.IR <sys/select.h> .
+.PP
+This is a type that represents a set of signals.
+According to POSIX, this shall be an integer or structure type.
+.PP
+.IR "Conforming to" :
+POSIX.1-2001 and later.
+.PP
+.IR "See also" :
+.BR epoll_pwait (2),
+.BR ppoll (2),
+.BR pselect (2),
+.BR sigaction (2),
+.BR signalfd (2),
+.BR sigpending (2),
+.BR sigprocmask (2),
+.BR sigsuspend (2),
+.BR sigwaitinfo (2),
+.BR signal (7)
+.RE
+.\"------------------------------------- sigval -----------------------/
+.TP
+.I sigval
+.RS
+.IR Include :
+.IR <signal.h> .
+.PP
+.EX
+union sigval {
+ int sival_int; /* Integer value */
+ void *sival_ptr; /* Pointer value */
+};
+.EE
+.PP
+Data passed with a signal.
+.PP
+.IR "Conforming to" :
+POSIX.1-2001 and later.
+.PP
+.IR "See also" :
+.BR pthread_sigqueue (3),
+.BR sigqueue (3),
+.BR sigevent (7)
+.PP
+See also the
+.I sigevent
+structure
+and the
+.I siginfo_t
+type
+in this page.
+.RE
+.\"------------------------------------- size_t -----------------------/
+.\"------------------------------------- sockaddr ---------------------/
+.\"------------------------------------- socklen_t --------------------/
+.\"------------------------------------- ssize_t ----------------------/
+.\"------------------------------------- stat -------------------------/
+.\"------------------------------------- suseconds_t ------------------/
+.\"------------------------------------- time_t -----------------------/
+.\"------------------------------------- timer_t ----------------------/
+.\"------------------------------------- timespec ---------------------/
+.\"------------------------------------- timeval ----------------------/
+.\"------------------------------------- uid_t ----------------------/
+.\"------------------------------------- uintmax_t --------------------/
+.\"------------------------------------- uintN_t ----------------------/
+.\"------------------------------------- uintptr_t --------------------/
+.\"------------------------------------- useconds_t -------------------/
+.\"------------------------------------- va_list ----------------------/
+.\"------------------------------------- void * -----------------------/
+.\"--------------------------------------------------------------------/
+.SH NOTES
+The structures described in this manual page shall contain,
+at least, the members shown in their definition, in no particular order.
+.PP
+Most of the integer types described in this page don't have
+a corresponding length modifier for the
+.BR printf (3)
+and the
+.BR scanf (3)
+families of functions.
+To print a value of an integer type that doesn't have a length modifier,
+it should be converted to
+.I intmax_t
+or
+.I uintmax_t
+by an explicit cast.
+To scan into a variable of an integer type
+that doesn't have a length modifier,
+an intermediate temporary variable of type
+.I intmax_t
+or
+.I uintmax_t
+should be used.
+When copying from the temporary variable to the destination variable,
+the value could overflow.
+If the type has upper and lower limits,
+the user should check that the value is within those limits,
+before actually copying the value.
+The example below shows how these conversions should be done.
+.SS Conventions used in this page
+In "Conforming to" we only concern ourselves with
+C99 and later and POSIX.1-2001 and later.
+Some types may be specified in earlier versions of one of these standards,
+but in the interests of simplicity we omit details from earlier standards.
+.PP
+In "Include", we first note the "primary" header(s) that
+define the type according to either the C or POSIX.1 standards.
+Under "Alternatively", we note additional headers that
+the standards specify shall define the type.
+.SH EXAMPLES
+The program shown below scans from a string and prints a value stored in
+a variable of an integer type that doesn't have a length modifier.
+The appropriate conversions from and to
+.IR intmax_t ,
+and the appropriate range checks,
+are used as explained in the notes section above.
+.PP
+.EX
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+\&
+int
+main (void)
+{
+ static const char *const str = "500000 us in half a second";
+ suseconds_t us;
+ intmax_t tmp;
+\&
+ /* Scan the number from the string into the temporary variable. */
+\&
+ sscanf(str, "%jd", &tmp);
+\&
+ /* Check that the value is within the valid range of suseconds_t. */
+\&
+ if (tmp < \-1 || tmp > 1000000) {
+ fprintf(stderr, "Scanned value outside valid range!\en");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Copy the value to the suseconds_t variable \[aq]us\[aq]. */
+\&
+ us = tmp;
+\&
+ /* Even though suseconds_t can hold the value \-1, this isn\[aq]t
+ a sensible number of microseconds. */
+\&
+ if (us < 0) {
+ fprintf(stderr, "Scanned value shouldn\[aq]t be negative!\en");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Print the value. */
+\&
+ printf("There are %jd microseconds in half a second.\en",
+ (intmax_t) us);
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.SH SEE ALSO
+.BR feature_test_macros (7),
+.BR standards (7)
diff --git a/man7/sysvipc.7 b/man7/sysvipc.7
new file mode 100644
index 0000000..307292c
--- /dev/null
+++ b/man7/sysvipc.7
@@ -0,0 +1,99 @@
+.\" Copyright 2020 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH sysvipc 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+sysvipc \- System V interprocess communication mechanisms
+.SH DESCRIPTION
+System V IPC is the name given to three interprocess
+communication mechanisms that are widely available on UNIX systems:
+message queues, semaphore, and shared memory.
+.\"
+.SS Message queues
+System V message queues allow data to be exchanged in units called messages.
+Each message can have an associated priority.
+POSIX message queues provide an alternative API for achieving the same result;
+see
+.BR mq_overview (7).
+.PP
+The System V message queue API consists of the following system calls:
+.TP
+.BR msgget (2)
+Create a new message queue or obtain the ID of an existing message queue.
+This call returns an identifier that is used in the remaining APIs.
+.TP
+.BR msgsnd (2)
+Add a message to a queue.
+.TP
+.BR msgrcv (2)
+Remove a message from a queue.
+.TP
+.BR msgctl (2)
+Perform various control operations on a queue, including deletion.
+.\"
+.SS Semaphore sets
+System V semaphores allow processes to synchronize their actions.
+System V semaphores are allocated in groups called sets;
+each semaphore in a set is a counting semaphore.
+POSIX semaphores provide an alternative API for achieving the same result;
+see
+.BR sem_overview (7).
+.PP
+The System V semaphore API consists of the following system calls:
+.TP
+.BR semget (2)
+Create a new set or obtain the ID of an existing set.
+This call returns an identifier that is used in the remaining APIs.
+.TP
+.BR semop (2)
+Perform operations on the semaphores in a set.
+.TP
+.BR semctl (2)
+Perform various control operations on a set, including deletion.
+.\"
+.SS Shared memory segments
+System V shared memory allows processes to share a region a memory
+(a "segment").
+POSIX shared memory is an alternative API for achieving the same result; see
+.BR shm_overview (7).
+.PP
+The System V shared memory API consists of the following system calls:
+.TP
+.BR shmget (2)
+Create a new segment or obtain the ID of an existing segment.
+This call returns an identifier that is used in the remaining APIs.
+.TP
+.BR shmat (2)
+Attach an existing shared memory object into the calling process's
+address space.
+.TP
+.BR shmdt (2)
+Detach a segment from the calling process's address space.
+.TP
+.BR shmctl (2)
+Perform various control operations on a segment, including deletion.
+.\"
+.SS IPC namespaces
+For a discussion of the interaction of System V IPC objects and
+IPC namespaces, see
+.BR ipc_namespaces (7).
+.SH SEE ALSO
+.BR ipcmk (1),
+.BR ipcrm (1),
+.BR ipcs (1),
+.BR lsipc (1),
+.BR ipc (2),
+.BR msgctl (2),
+.BR msgget (2),
+.BR msgrcv (2),
+.BR msgsnd (2),
+.BR semctl (2),
+.BR semget (2),
+.BR semop (2),
+.BR shmat (2),
+.BR shmctl (2),
+.BR shmdt (2),
+.BR shmget (2),
+.BR ftok (3),
+.BR ipc_namespaces (7)
diff --git a/man7/tcp.7 b/man7/tcp.7
new file mode 100644
index 0000000..aec6b78
--- /dev/null
+++ b/man7/tcp.7
@@ -0,0 +1,1563 @@
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\" and Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Note also that many pieces are drawn from the kernel source file
+.\" Documentation/networking/ip-sysctl.txt.
+.\"
+.\" 2.4 Updates by Nivedita Singhvi 4/20/02 <nivedita@us.ibm.com>.
+.\" Modified, 2004-11-11, Michael Kerrisk and Andries Brouwer
+.\" Updated details of interaction of TCP_CORK and TCP_NODELAY.
+.\"
+.\" 2008-11-21, mtk, many, many updates.
+.\" The descriptions of /proc files and socket options should now
+.\" be more or less up to date and complete as at Linux 2.6.27
+.\" (other than the remaining FIXMEs in the page source below).
+.\"
+.\" FIXME The following need to be documented
+.\" TCP_MD5SIG (2.6.20)
+.\" commit cfb6eeb4c860592edd123fdea908d23c6ad1c7dc
+.\" Author was yoshfuji@linux-ipv6.org
+.\" Needs CONFIG_TCP_MD5SIG
+.\" From net/inet/Kconfig:
+.\" bool "TCP: MD5 Signature Option support (RFC2385) (EXPERIMENTAL)"
+.\" RFC2385 specifies a method of giving MD5 protection to TCP sessions.
+.\" Its main (only?) use is to protect BGP sessions between core routers
+.\" on the Internet.
+.\"
+.\" There is a TCP_MD5SIG option documented in FreeBSD's tcp(4),
+.\" but probably many details are different on Linux
+.\" http://thread.gmane.org/gmane.linux.network/47490
+.\" http://www.daemon-systems.org/man/tcp.4.html
+.\" http://article.gmane.org/gmane.os.netbsd.devel.network/3767/match=tcp_md5sig+freebsd
+.\"
+.\" TCP_COOKIE_TRANSACTIONS (2.6.33)
+.\" commit 519855c508b9a17878c0977a3cdefc09b59b30df
+.\" Author: William Allen Simpson <william.allen.simpson@gmail.com>
+.\" commit e56fb50f2b7958b931c8a2fc0966061b3f3c8f3a
+.\" Author: William Allen Simpson <william.allen.simpson@gmail.com>
+.\"
+.\" REMOVED in Linux 3.10
+.\" commit 1a2c6181c4a1922021b4d7df373bba612c3e5f04
+.\" Author: Christoph Paasch <christoph.paasch@uclouvain.be>
+.\"
+.\" TCP_THIN_LINEAR_TIMEOUTS (2.6.34)
+.\" commit 36e31b0af58728071e8023cf8e20c5166b700717
+.\" Author: Andreas Petlund <apetlund@simula.no>
+.\"
+.\" TCP_THIN_DUPACK (2.6.34)
+.\" commit 7e38017557bc0b87434d184f8804cadb102bb903
+.\" Author: Andreas Petlund <apetlund@simula.no>
+.\"
+.\" TCP_REPAIR (3.5)
+.\" commit ee9952831cfd0bbe834f4a26489d7dce74582e37
+.\" Author: Pavel Emelyanov <xemul@parallels.com>
+.\" See also
+.\" http://criu.org/TCP_connection
+.\" https://lwn.net/Articles/495304/
+.\"
+.\" TCP_REPAIR_QUEUE (3.5)
+.\" commit ee9952831cfd0bbe834f4a26489d7dce74582e37
+.\" Author: Pavel Emelyanov <xemul@parallels.com>
+.\"
+.\" TCP_QUEUE_SEQ (3.5)
+.\" commit ee9952831cfd0bbe834f4a26489d7dce74582e37
+.\" Author: Pavel Emelyanov <xemul@parallels.com>
+.\"
+.\" TCP_REPAIR_OPTIONS (3.5)
+.\" commit b139ba4e90dccbf4cd4efb112af96a5c9e0b098c
+.\" Author: Pavel Emelyanov <xemul@parallels.com>
+.\"
+.\" TCP_FASTOPEN (3.6)
+.\" (Fast Open server side implementation completed in Linux 3.7)
+.\" http://lwn.net/Articles/508865/
+.\"
+.\" TCP_TIMESTAMP (3.9)
+.\" commit 93be6ce0e91b6a94783e012b1857a347a5e6e9f2
+.\" Author: Andrey Vagin <avagin@openvz.org>
+.\"
+.\" TCP_NOTSENT_LOWAT (3.12)
+.\" commit c9bee3b7fdecb0c1d070c7b54113b3bdfb9a3d36
+.\" Author: Eric Dumazet <edumazet@google.com>
+.\"
+.\" TCP_CC_INFO (4.1)
+.\" commit 6e9250f59ef9efb932c84850cd221f22c2a03c4a
+.\" Author: Eric Dumazet <edumazet@google.com>
+.\"
+.\" TCP_SAVE_SYN, TCP_SAVED_SYN (4.2)
+.\" commit cd8ae85299d54155702a56811b2e035e63064d3d
+.\" Author: Eric Dumazet <edumazet@google.com>
+.\"
+.TH tcp 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+tcp \- TCP protocol
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <netinet/in.h>
+.B #include <netinet/tcp.h>
+.PP
+.IB tcp_socket " = socket(AF_INET, SOCK_STREAM, 0);"
+.fi
+.SH DESCRIPTION
+This is an implementation of the TCP protocol defined in
+RFC\ 793, RFC\ 1122 and RFC\ 2001 with the NewReno and SACK
+extensions.
+It provides a reliable, stream-oriented,
+full-duplex connection between two sockets on top of
+.BR ip (7),
+for both v4 and v6 versions.
+TCP guarantees that the data arrives in order and
+retransmits lost packets.
+It generates and checks a per-packet checksum to catch
+transmission errors.
+TCP does not preserve record boundaries.
+.PP
+A newly created TCP socket has no remote or local address and is not
+fully specified.
+To create an outgoing TCP connection use
+.BR connect (2)
+to establish a connection to another TCP socket.
+To receive new incoming connections, first
+.BR bind (2)
+the socket to a local address and port and then call
+.BR listen (2)
+to put the socket into the listening state.
+After that a new socket for each incoming connection can be accepted using
+.BR accept (2).
+A socket which has had
+.BR accept (2)
+or
+.BR connect (2)
+successfully called on it is fully specified and may transmit data.
+Data cannot be transmitted on listening or not yet connected sockets.
+.PP
+Linux supports RFC\ 1323 TCP high performance
+extensions.
+These include Protection Against Wrapped
+Sequence Numbers (PAWS), Window Scaling and Timestamps.
+Window scaling allows the use
+of large (> 64\ kB) TCP windows in order to support links with high
+latency or bandwidth.
+To make use of them, the send and receive buffer sizes must be increased.
+They can be set globally with the
+.I /proc/sys/net/ipv4/tcp_wmem
+and
+.I /proc/sys/net/ipv4/tcp_rmem
+files, or on individual sockets by using the
+.B SO_SNDBUF
+and
+.B SO_RCVBUF
+socket options with the
+.BR setsockopt (2)
+call.
+.PP
+The maximum sizes for socket buffers declared via the
+.B SO_SNDBUF
+and
+.B SO_RCVBUF
+mechanisms are limited by the values in the
+.I /proc/sys/net/core/rmem_max
+and
+.I /proc/sys/net/core/wmem_max
+files.
+Note that TCP actually allocates twice the size of
+the buffer requested in the
+.BR setsockopt (2)
+call, and so a succeeding
+.BR getsockopt (2)
+call will not return the same size of buffer as requested in the
+.BR setsockopt (2)
+call.
+TCP uses the extra space for administrative purposes and internal
+kernel structures, and the
+.I /proc
+file values reflect the
+larger sizes compared to the actual TCP windows.
+On individual connections, the socket buffer size must be set prior to the
+.BR listen (2)
+or
+.BR connect (2)
+calls in order to have it take effect.
+See
+.BR socket (7)
+for more information.
+.PP
+TCP supports urgent data.
+Urgent data is used to signal the
+receiver that some important message is part of the data
+stream and that it should be processed as soon as possible.
+To send urgent data specify the
+.B MSG_OOB
+option to
+.BR send (2).
+When urgent data is received, the kernel sends a
+.B SIGURG
+signal to the process or process group that has been set as the
+socket "owner" using the
+.B SIOCSPGRP
+or
+.B FIOSETOWN
+ioctls (or the POSIX.1-specified
+.BR fcntl (2)
+.B F_SETOWN
+operation).
+When the
+.B SO_OOBINLINE
+socket option is enabled, urgent data is put into the normal
+data stream (a program can test for its location using the
+.B SIOCATMARK
+ioctl described below),
+otherwise it can be received only when the
+.B MSG_OOB
+flag is set for
+.BR recv (2)
+or
+.BR recvmsg (2).
+.PP
+When out-of-band data is present,
+.BR select (2)
+indicates the file descriptor as having an exceptional condition and
+.I poll (2)
+indicates a
+.B POLLPRI
+event.
+.PP
+Linux 2.4 introduced a number of changes for improved
+throughput and scaling, as well as enhanced functionality.
+Some of these features include support for zero-copy
+.BR sendfile (2),
+Explicit Congestion Notification, new
+management of TIME_WAIT sockets, keep-alive socket options
+and support for Duplicate SACK extensions.
+.SS Address formats
+TCP is built on top of IP (see
+.BR ip (7)).
+The address formats defined by
+.BR ip (7)
+apply to TCP.
+TCP supports point-to-point communication only;
+broadcasting and multicasting are not
+supported.
+.SS /proc interfaces
+System-wide TCP parameter settings can be accessed by files in the directory
+.IR /proc/sys/net/ipv4/ .
+In addition, most IP
+.I /proc
+interfaces also apply to TCP; see
+.BR ip (7).
+Variables described as
+.I Boolean
+take an integer value, with a nonzero value ("true") meaning that
+the corresponding option is enabled, and a zero value ("false")
+meaning that the option is disabled.
+.TP
+.IR tcp_abc " (Integer; default: 0; Linux 2.6.15 to Linux 3.8)"
+.\" Since Linux 2.6.15; removed in Linux 3.9
+.\" commit ca2eb5679f8ddffff60156af42595df44a315ef0
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+Control the Appropriate Byte Count (ABC), defined in RFC 3465.
+ABC is a way of increasing the congestion window
+.RI ( cwnd )
+more slowly in response to partial acknowledgements.
+Possible values are:
+.RS
+.TP
+.B 0
+increase
+.I cwnd
+once per acknowledgement (no ABC)
+.TP
+.B 1
+increase
+.I cwnd
+once per acknowledgement of full sized segment
+.TP
+.B 2
+allow increase
+.I cwnd
+by two if acknowledgement is
+of two segments to compensate for delayed acknowledgements.
+.RE
+.TP
+.IR tcp_abort_on_overflow " (Boolean; default: disabled; since Linux 2.4)"
+.\" Since Linux 2.3.41
+Enable resetting connections if the listening service is too
+slow and unable to keep up and accept them.
+It means that if overflow occurred due
+to a burst, the connection will recover.
+Enable this option
+.I only
+if you are really sure that the listening daemon
+cannot be tuned to accept connections faster.
+Enabling this option can harm the clients of your server.
+.TP
+.IR tcp_adv_win_scale " (integer; default: 2; since Linux 2.4)"
+.\" Since Linux 2.4.0-test7
+Count buffering overhead as
+.IR "bytes/2\[ha]tcp_adv_win_scale" ,
+if
+.I tcp_adv_win_scale
+is greater than 0; or
+.IR "bytes\-bytes/2\[ha](\-tcp_adv_win_scale)" ,
+if
+.I tcp_adv_win_scale
+is less than or equal to zero.
+.IP
+The socket receive buffer space is shared between the
+application and kernel.
+TCP maintains part of the buffer as
+the TCP window, this is the size of the receive window
+advertised to the other end.
+The rest of the space is used
+as the "application" buffer, used to isolate the network
+from scheduling and application latencies.
+The
+.I tcp_adv_win_scale
+default value of 2 implies that the space
+used for the application buffer is one fourth that of the total.
+.TP
+.IR tcp_allowed_congestion_control " (String; default: see text; since Linux 2.4.20)"
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+Show/set the congestion control algorithm choices available to unprivileged
+processes (see the description of the
+.B TCP_CONGESTION
+socket option).
+The items in the list are separated by white space and
+terminated by a newline character.
+The list is a subset of those listed in
+.IR tcp_available_congestion_control .
+The default value for this list is "reno" plus the default setting of
+.IR tcp_congestion_control .
+.TP
+.IR tcp_autocorking " (Boolean; default: enabled; since Linux 3.14)"
+.\" commit f54b311142a92ea2e42598e347b84e1655caf8e3
+.\" Text heavily based on Documentation/networking/ip-sysctl.txt
+If this option is enabled, the kernel tries to coalesce small writes
+(from consecutive
+.BR write (2)
+and
+.BR sendmsg (2)
+calls) as much as possible,
+in order to decrease the total number of sent packets.
+Coalescing is done if at least one prior packet for the flow
+is waiting in Qdisc queues or device transmit queue.
+Applications can still use the
+.B TCP_CORK
+socket option to obtain optimal behavior
+when they know how/when to uncork their sockets.
+.TP
+.IR tcp_available_congestion_control " (String; read-only; since Linux 2.4.20)"
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+Show a list of the congestion-control algorithms
+that are registered.
+The items in the list are separated by white space and
+terminated by a newline character.
+This list is a limiting set for the list in
+.IR tcp_allowed_congestion_control .
+More congestion-control algorithms may be available as modules,
+but not loaded.
+.TP
+.IR tcp_app_win " (integer; default: 31; since Linux 2.4)"
+.\" Since Linux 2.4.0-test7
+This variable defines how many
+bytes of the TCP window are reserved for buffering overhead.
+.IP
+A maximum of (\fIwindow/2\[ha]tcp_app_win\fP, mss) bytes in the window
+are reserved for the application buffer.
+A value of 0 implies that no amount is reserved.
+.\"
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_base_mss " (Integer; default: 512; since Linux 2.6.17)"
+The initial value of
+.I search_low
+to be used by the packetization layer Path MTU discovery (MTU probing).
+If MTU probing is enabled,
+this is the initial MSS used by the connection.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_bic " (Boolean; default: disabled; Linux 2.4.27/2.6.6 to Linux 2.6.13)"
+Enable BIC TCP congestion control algorithm.
+BIC-TCP is a sender-side-only change that ensures a linear RTT
+fairness under large windows while offering both scalability and
+bounded TCP-friendliness.
+The protocol combines two schemes
+called additive increase and binary search increase.
+When the congestion window is large, additive increase with a large
+increment ensures linear RTT fairness as well as good scalability.
+Under small congestion windows, binary search
+increase provides TCP friendliness.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_bic_low_window " (integer; default: 14; Linux 2.4.27/2.6.6 to Linux 2.6.13)"
+Set the threshold window (in packets) where BIC TCP starts to
+adjust the congestion window.
+Below this threshold BIC TCP behaves the same as the default TCP Reno.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_bic_fast_convergence " (Boolean; default: enabled; Linux 2.4.27/2.6.6 to Linux 2.6.13)"
+Force BIC TCP to more quickly respond to changes in congestion window.
+Allows two flows sharing the same connection to converge more rapidly.
+.TP
+.IR tcp_congestion_control " (String; default: see text; since Linux 2.4.13)"
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+Set the default congestion-control algorithm to be used for new connections.
+The algorithm "reno" is always available,
+but additional choices may be available depending on kernel configuration.
+The default value for this file is set as part of kernel configuration.
+.TP
+.IR tcp_dma_copybreak " (integer; default: 4096; since Linux 2.6.24)"
+Lower limit, in bytes, of the size of socket reads that will be
+offloaded to a DMA copy engine, if one is present in the system
+and the kernel was configured with the
+.B CONFIG_NET_DMA
+option.
+.TP
+.IR tcp_dsack " (Boolean; default: enabled; since Linux 2.4)"
+.\" Since Linux 2.4.0-test7
+Enable RFC\ 2883 TCP Duplicate SACK support.
+.TP
+.IR tcp_fastopen " (Bitmask; default: 0x1; since Linux 3.7)"
+Enables RFC\~7413 Fast Open support.
+The flag is used as a bitmap with the following values:
+.RS
+.TP
+.B 0x1
+Enables client side Fast Open support
+.TP
+.B 0x2
+Enables server side Fast Open support
+.TP
+.B 0x4
+Allows client side to transmit data in SYN without Fast Open option
+.TP
+.B 0x200
+Allows server side to accept SYN data without Fast Open option
+.TP
+.B 0x400
+Enables Fast Open on all listeners without
+.B TCP_FASTOPEN
+socket option
+.RE
+.TP
+.IR tcp_fastopen_key " (since Linux 3.7)"
+Set server side RFC\~7413 Fast Open key to generate Fast Open cookie
+when server side Fast Open support is enabled.
+.TP
+.IR tcp_ecn " (Integer; default: see below; since Linux 2.4)"
+.\" Since Linux 2.4.0-test7
+Enable RFC\ 3168 Explicit Congestion Notification.
+.IP
+This file can have one of the following values:
+.RS
+.TP
+.B 0
+Disable ECN.
+Neither initiate nor accept ECN.
+This was the default up to and including Linux 2.6.30.
+.TP
+.B 1
+Enable ECN when requested by incoming connections and also
+request ECN on outgoing connection attempts.
+.TP
+.B 2
+.\" commit 255cac91c3c9ce7dca7713b93ab03c75b7902e0e
+Enable ECN when requested by incoming connections,
+but do not request ECN on outgoing connections.
+This value is supported, and is the default, since Linux 2.6.31.
+.RE
+.IP
+When enabled, connectivity to some destinations could be affected
+due to older, misbehaving middle boxes along the path, causing
+connections to be dropped.
+However, to facilitate and encourage deployment with option 1, and
+to work around such buggy equipment, the
+.B tcp_ecn_fallback
+option has been introduced.
+.TP
+.IR tcp_ecn_fallback " (Boolean; default: enabled; since Linux 4.1)"
+.\" commit 492135557dc090a1abb2cfbe1a412757e3ed68ab
+Enable RFC\ 3168, Section 6.1.1.1. fallback.
+When enabled, outgoing ECN-setup SYNs that time out within the
+normal SYN retransmission timeout will be resent with CWR and
+ECE cleared.
+.TP
+.IR tcp_fack " (Boolean; default: enabled; since Linux 2.2)"
+.\" Since Linux 2.1.92
+Enable TCP Forward Acknowledgement support.
+.TP
+.IR tcp_fin_timeout " (integer; default: 60; since Linux 2.2)"
+.\" Since Linux 2.1.53
+This specifies how many seconds to wait for a final FIN packet before the
+socket is forcibly closed.
+This is strictly a violation of the TCP specification,
+but required to prevent denial-of-service attacks.
+In Linux 2.2, the default value was 180.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_frto " (integer; default: see below; since Linux 2.4.21/2.6)"
+.\" Since Linux 2.4.21/2.5.43
+Enable F-RTO, an enhanced recovery algorithm for TCP retransmission
+timeouts (RTOs).
+It is particularly beneficial in wireless environments
+where packet loss is typically due to random radio interference
+rather than intermediate router congestion.
+See RFC 4138 for more details.
+.IP
+This file can have one of the following values:
+.RS
+.TP
+.B 0
+Disabled.
+This was the default up to and including Linux 2.6.23.
+.TP
+.B 1
+The basic version F-RTO algorithm is enabled.
+.TP
+.B 2
+.\" commit c96fd3d461fa495400df24be3b3b66f0e0b152f9
+Enable SACK-enhanced F-RTO if flow uses SACK.
+The basic version can be used also when
+SACK is in use though in that case scenario(s) exists where F-RTO
+interacts badly with the packet counting of the SACK-enabled TCP flow.
+This value is the default since Linux 2.6.24.
+.RE
+.IP
+Before Linux 2.6.22, this parameter was a Boolean value,
+supporting just values 0 and 1 above.
+.TP
+.IR tcp_frto_response " (integer; default: 0; since Linux 2.6.22)"
+When F-RTO has detected that a TCP retransmission timeout was spurious
+(i.e., the timeout would have been avoided had TCP set a
+longer retransmission timeout),
+TCP has several options concerning what to do next.
+Possible values are:
+.RS
+.TP
+.B 0
+Rate halving based; a smooth and conservative response,
+results in halved congestion window
+.RI ( cwnd )
+and slow-start threshold
+.RI ( ssthresh )
+after one RTT.
+.TP
+.B 1
+Very conservative response; not recommended because even
+though being valid, it interacts poorly with the rest of Linux TCP; halves
+.I cwnd
+and
+.I ssthresh
+immediately.
+.TP
+.B 2
+Aggressive response; undoes congestion-control measures
+that are now known to be unnecessary
+(ignoring the possibility of a lost retransmission that would require
+TCP to be more cautious);
+.I cwnd
+and
+.I ssthresh
+are restored to the values prior to timeout.
+.RE
+.TP
+.IR tcp_keepalive_intvl " (integer; default: 75; since Linux 2.4)"
+.\" Since Linux 2.3.18
+The number of seconds between TCP keep-alive probes.
+.TP
+.IR tcp_keepalive_probes " (integer; default: 9; since Linux 2.2)"
+.\" Since Linux 2.1.43
+The maximum number of TCP keep-alive probes to send
+before giving up and killing the connection if
+no response is obtained from the other end.
+.TP
+.IR tcp_keepalive_time " (integer; default: 7200; since Linux 2.2)"
+.\" Since Linux 2.1.43
+The number of seconds a connection needs to be idle
+before TCP begins sending out keep-alive probes.
+Keep-alives are sent only when the
+.B SO_KEEPALIVE
+socket option is enabled.
+The default value is 7200 seconds (2 hours).
+An idle connection is terminated after
+approximately an additional 11 minutes (9 probes an interval
+of 75 seconds apart) when keep-alive is enabled.
+.IP
+Note that underlying connection tracking mechanisms and
+application timeouts may be much shorter.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_low_latency " (Boolean; default: disabled; since Linux 2.4.21/2.6; \
+obsolete since Linux 4.14)"
+.\" Since Linux 2.4.21/2.5.60
+If enabled, the TCP stack makes decisions that prefer lower
+latency as opposed to higher throughput.
+It this option is disabled, then higher throughput is preferred.
+An example of an application where this default should be
+changed would be a Beowulf compute cluster.
+Since Linux 4.14,
+.\" commit b6690b14386698ce2c19309abad3f17656bdfaea
+this file still exists, but its value is ignored.
+.TP
+.IR tcp_max_orphans " (integer; default: see below; since Linux 2.4)"
+.\" Since Linux 2.3.41
+The maximum number of orphaned (not attached to any user file
+handle) TCP sockets allowed in the system.
+When this number is exceeded,
+the orphaned connection is reset and a warning is printed.
+This limit exists only to prevent simple denial-of-service attacks.
+Lowering this limit is not recommended.
+Network conditions might require you to increase the number of
+orphans allowed, but note that each orphan can eat up to \[ti]64\ kB
+of unswappable memory.
+The default initial value is set equal to the kernel parameter NR_FILE.
+This initial default is adjusted depending on the memory in the system.
+.TP
+.IR tcp_max_syn_backlog " (integer; default: see below; since Linux 2.2)"
+.\" Since Linux 2.1.53
+The maximum number of queued connection requests which have
+still not received an acknowledgement from the connecting client.
+If this number is exceeded, the kernel will begin
+dropping requests.
+The default value of 256 is increased to
+1024 when the memory present in the system is adequate or
+greater (>= 128\ MB), and reduced to 128 for those systems with
+very low memory (<= 32\ MB).
+.IP
+Before Linux 2.6.20,
+.\" commit 72a3effaf633bcae9034b7e176bdbd78d64a71db
+it was recommended that if this needed to be increased above 1024,
+the size of the SYNACK hash table
+.RB ( TCP_SYNQ_HSIZE )
+in
+.I include/net/tcp.h
+should be modified to keep
+.IP
+.in +4n
+.EX
+TCP_SYNQ_HSIZE * 16 <= tcp_max_syn_backlog
+.EE
+.in
+.IP
+and the kernel should be
+recompiled.
+In Linux 2.6.20, the fixed sized
+.B TCP_SYNQ_HSIZE
+was removed in favor of dynamic sizing.
+.TP
+.IR tcp_max_tw_buckets " (integer; default: see below; since Linux 2.4)"
+.\" Since Linux 2.3.41
+The maximum number of sockets in TIME_WAIT state allowed in
+the system.
+This limit exists only to prevent simple denial-of-service attacks.
+The default value of NR_FILE*2 is adjusted
+depending on the memory in the system.
+If this number is
+exceeded, the socket is closed and a warning is printed.
+.TP
+.IR tcp_moderate_rcvbuf " (Boolean; default: enabled; since Linux 2.4.17/2.6.7)"
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+If enabled, TCP performs receive buffer auto-tuning,
+attempting to automatically size the buffer (no greater than
+.IR tcp_rmem[2] )
+to match the size required by the path for full throughput.
+.TP
+.IR tcp_mem " (since Linux 2.4)"
+.\" Since Linux 2.4.0-test7
+This is a vector of 3 integers: [low, pressure, high].
+These bounds, measured in units of the system page size,
+are used by TCP to track its memory usage.
+The defaults are calculated at boot time from the amount of
+available memory.
+(TCP can only use
+.I "low memory"
+for this, which is limited to around 900 megabytes on 32-bit systems.
+64-bit systems do not suffer this limitation.)
+.RS
+.TP
+.I low
+TCP doesn't regulate its memory allocation when the number
+of pages it has allocated globally is below this number.
+.TP
+.I pressure
+When the amount of memory allocated by TCP
+exceeds this number of pages, TCP moderates its memory consumption.
+This memory pressure state is exited
+once the number of pages allocated falls below
+the
+.I low
+mark.
+.TP
+.I high
+The maximum number of pages, globally, that TCP will allocate.
+This value overrides any other limits imposed by the kernel.
+.RE
+.TP
+.IR tcp_mtu_probing " (integer; default: 0; since Linux 2.6.17)"
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+This parameter controls TCP Packetization-Layer Path MTU Discovery.
+The following values may be assigned to the file:
+.RS
+.TP
+.B 0
+Disabled
+.TP
+.B 1
+Disabled by default, enabled when an ICMP black hole detected
+.TP
+.B 2
+Always enabled, use initial MSS of
+.IR tcp_base_mss .
+.RE
+.TP
+.IR tcp_no_metrics_save " (Boolean; default: disabled; since Linux 2.6.6)"
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+By default, TCP saves various connection metrics in the route cache
+when the connection closes, so that connections established in the
+near future can use these to set initial conditions.
+Usually, this increases overall performance,
+but it may sometimes cause performance degradation.
+If
+.I tcp_no_metrics_save
+is enabled, TCP will not cache metrics on closing connections.
+.TP
+.IR tcp_orphan_retries " (integer; default: 8; since Linux 2.4)"
+.\" Since Linux 2.3.41
+The maximum number of attempts made to probe the other
+end of a connection which has been closed by our end.
+.TP
+.IR tcp_reordering " (integer; default: 3; since Linux 2.4)"
+.\" Since Linux 2.4.0-test7
+The maximum a packet can be reordered in a TCP packet stream
+without TCP assuming packet loss and going into slow start.
+It is not advisable to change this number.
+This is a packet reordering detection metric designed to
+minimize unnecessary back off and retransmits provoked by
+reordering of packets on a connection.
+.TP
+.IR tcp_retrans_collapse " (Boolean; default: enabled; since Linux 2.2)"
+.\" Since Linux 2.1.96
+Try to send full-sized packets during retransmit.
+.TP
+.IR tcp_retries1 " (integer; default: 3; since Linux 2.2)"
+.\" Since Linux 2.1.43
+The number of times TCP will attempt to retransmit a
+packet on an established connection normally,
+without the extra effort of getting the network layers involved.
+Once we exceed this number of
+retransmits, we first have the network layer
+update the route if possible before each new retransmit.
+The default is the RFC specified minimum of 3.
+.TP
+.IR tcp_retries2 " (integer; default: 15; since Linux 2.2)"
+.\" Since Linux 2.1.43
+The maximum number of times a TCP packet is retransmitted
+in established state before giving up.
+The default value is 15, which corresponds to a duration of
+approximately between 13 to 30 minutes, depending
+on the retransmission timeout.
+The RFC\ 1122 specified
+minimum limit of 100 seconds is typically deemed too short.
+.TP
+.IR tcp_rfc1337 " (Boolean; default: disabled; since Linux 2.2)"
+.\" Since Linux 2.1.90
+Enable TCP behavior conformant with RFC\ 1337.
+When disabled,
+if a RST is received in TIME_WAIT state, we close
+the socket immediately without waiting for the end
+of the TIME_WAIT period.
+.TP
+.IR tcp_rmem " (since Linux 2.4)"
+.\" Since Linux 2.4.0-test7
+This is a vector of 3 integers: [min, default, max].
+These parameters are used by TCP to regulate receive buffer sizes.
+TCP dynamically adjusts the size of the
+receive buffer from the defaults listed below, in the range
+of these values, depending on memory available in the system.
+.RS
+.TP
+.I min
+minimum size of the receive buffer used by each TCP socket.
+The default value is the system page size.
+(On Linux 2.4, the default value is 4\ kB, lowered to
+.B PAGE_SIZE
+bytes in low-memory systems.)
+This value
+is used to ensure that in memory pressure mode,
+allocations below this size will still succeed.
+This is not
+used to bound the size of the receive buffer declared
+using
+.B SO_RCVBUF
+on a socket.
+.TP
+.I default
+the default size of the receive buffer for a TCP socket.
+This value overwrites the initial default buffer size from
+the generic global
+.I net.core.rmem_default
+defined for all protocols.
+The default value is 87380 bytes.
+(On Linux 2.4, this will be lowered to 43689 in low-memory systems.)
+If larger receive buffer sizes are desired, this value should
+be increased (to affect all sockets).
+To employ large TCP windows, the
+.I net.ipv4.tcp_window_scaling
+must be enabled (default).
+.TP
+.I max
+the maximum size of the receive buffer used by each TCP socket.
+This value does not override the global
+.IR net.core.rmem_max .
+This is not used to limit the size of the receive buffer declared using
+.B SO_RCVBUF
+on a socket.
+The default value is calculated using the formula
+.IP
+.in +4n
+.EX
+max(87380, min(4\ MB, \fItcp_mem\fP[1]*PAGE_SIZE/128))
+.EE
+.in
+.IP
+(On Linux 2.4, the default is 87380*2 bytes,
+lowered to 87380 in low-memory systems).
+.RE
+.TP
+.IR tcp_sack " (Boolean; default: enabled; since Linux 2.2)"
+.\" Since Linux 2.1.36
+Enable RFC\ 2018 TCP Selective Acknowledgements.
+.TP
+.IR tcp_slow_start_after_idle " (Boolean; default: enabled; since Linux 2.6.18)"
+.\" The following is from Linux 2.6.28-rc4: Documentation/networking/ip-sysctl.txt
+If enabled, provide RFC 2861 behavior and time out the congestion
+window after an idle period.
+An idle period is defined as the current RTO (retransmission timeout).
+If disabled, the congestion window will not
+be timed out after an idle period.
+.TP
+.IR tcp_stdurg " (Boolean; default: disabled; since Linux 2.2)"
+.\" Since Linux 2.1.44
+If this option is enabled, then use the RFC\ 1122 interpretation
+of the TCP urgent-pointer field.
+.\" RFC 793 was ambiguous in its specification of the meaning of the
+.\" urgent pointer. RFC 1122 (and RFC 961) fixed on a particular
+.\" resolution of this ambiguity (unfortunately the "wrong" one).
+According to this interpretation, the urgent pointer points
+to the last byte of urgent data.
+If this option is disabled, then use the BSD-compatible interpretation of
+the urgent pointer:
+the urgent pointer points to the first byte after the urgent data.
+Enabling this option may lead to interoperability problems.
+.TP
+.IR tcp_syn_retries " (integer; default: 6; since Linux 2.2)"
+.\" Since Linux 2.1.38
+The maximum number of times initial SYNs for an active TCP
+connection attempt will be retransmitted.
+This value should not be higher than 255.
+The default value is 6, which corresponds to retrying for up to
+approximately 127 seconds.
+Before Linux 3.7,
+.\" commit 6c9ff979d1921e9fd05d89e1383121c2503759b9
+the default value was 5, which
+(in conjunction with calculation based on other kernel parameters)
+corresponded to approximately 180 seconds.
+.TP
+.IR tcp_synack_retries " (integer; default: 5; since Linux 2.2)"
+.\" Since Linux 2.1.38
+The maximum number of times a SYN/ACK segment
+for a passive TCP connection will be retransmitted.
+This number should not be higher than 255.
+.TP
+.IR tcp_syncookies " (integer; default: 1; since Linux 2.2)"
+.\" Since Linux 2.1.43
+Enable TCP syncookies.
+The kernel must be compiled with
+.BR CONFIG_SYN_COOKIES .
+The syncookies feature attempts to protect a
+socket from a SYN flood attack.
+This should be used as a last resort, if at all.
+This is a violation of the TCP protocol,
+and conflicts with other areas of TCP such as TCP extensions.
+It can cause problems for clients and relays.
+It is not recommended as a tuning mechanism for heavily
+loaded servers to help with overloaded or misconfigured conditions.
+For recommended alternatives see
+.IR tcp_max_syn_backlog ,
+.IR tcp_synack_retries ,
+and
+.IR tcp_abort_on_overflow .
+Set to one of the following values:
+.RS
+.TP
+.B 0
+Disable TCP syncookies.
+.TP
+.B 1
+Send out syncookies when the syn backlog queue of a socket overflows.
+.TP
+.B 2
+(since Linux 3.12)
+.\" commit 5ad37d5deee1ff7150a2d0602370101de158ad86
+Send out syncookies unconditionally.
+This can be useful for network testing.
+.RE
+.TP
+.IR tcp_timestamps " (integer; default: 1; since Linux 2.2)"
+.\" Since Linux 2.1.36
+Set to one of the following values to enable or disable RFC\ 1323
+TCP timestamps:
+.RS
+.TP
+.B 0
+Disable timestamps.
+.TP
+.B 1
+Enable timestamps as defined in RFC1323 and use random offset for
+each connection rather than only using the current time.
+.TP
+.B 2
+As for the value 1, but without random offsets.
+.\" commit 25429d7b7dca01dc4f17205de023a30ca09390d0
+Setting
+.I tcp_timestamps
+to this value is meaningful since Linux 4.10.
+.RE
+.TP
+.IR tcp_tso_win_divisor " (integer; default: 3; since Linux 2.6.9)"
+This parameter controls what percentage of the congestion window
+can be consumed by a single TCP Segmentation Offload (TSO) frame.
+The setting of this parameter is a tradeoff between burstiness and
+building larger TSO frames.
+.TP
+.IR tcp_tw_recycle " (Boolean; default: disabled; Linux 2.4 to Linux 4.11)"
+.\" Since Linux 2.3.15
+.\" removed in Linux 4.12; commit 4396e46187ca5070219b81773c4e65088dac50cc
+Enable fast recycling of TIME_WAIT sockets.
+Enabling this option is
+not recommended as the remote IP may not use monotonically increasing
+timestamps (devices behind NAT, devices with per-connection timestamp
+offsets).
+See RFC 1323 (PAWS) and RFC 6191.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_tw_reuse " (Boolean; default: disabled; since Linux 2.4.19/2.6)"
+.\" Since Linux 2.4.19/2.5.43
+Allow to reuse TIME_WAIT sockets for new connections when it is
+safe from protocol viewpoint.
+It should not be changed without advice/request of technical experts.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_vegas_cong_avoid " (Boolean; default: disabled; Linux 2.2 to Linux 2.6.13)"
+.\" Since Linux 2.1.8; removed in Linux 2.6.13
+Enable TCP Vegas congestion avoidance algorithm.
+TCP Vegas is a sender-side-only change to TCP that anticipates
+the onset of congestion by estimating the bandwidth.
+TCP Vegas adjusts the sending rate by modifying the congestion window.
+TCP Vegas should provide less packet loss, but it is
+not as aggressive as TCP Reno.
+.\"
+.\" The following is from Linux 2.6.12: Documentation/networking/ip-sysctl.txt
+.TP
+.IR tcp_westwood " (Boolean; default: disabled; Linux 2.4.26/2.6.3 to Linux 2.6.13)"
+Enable TCP Westwood+ congestion control algorithm.
+TCP Westwood+ is a sender-side-only modification of the TCP Reno
+protocol stack that optimizes the performance of TCP congestion control.
+It is based on end-to-end bandwidth estimation to set
+congestion window and slow start threshold after a congestion episode.
+Using this estimation, TCP Westwood+ adaptively sets a
+slow start threshold and a congestion window which takes into
+account the bandwidth used at the time congestion is experienced.
+TCP Westwood+ significantly increases fairness with respect to
+TCP Reno in wired networks and throughput over wireless links.
+.TP
+.IR tcp_window_scaling " (Boolean; default: enabled; since Linux 2.2)"
+.\" Since Linux 2.1.36
+Enable RFC\ 1323 TCP window scaling.
+This feature allows the use of a large window
+(> 64\ kB) on a TCP connection, should the other end support it.
+Normally, the 16 bit window length field in the TCP header
+limits the window size to less than 64\ kB.
+If larger windows are desired, applications can increase the size of
+their socket buffers and the window scaling option will be employed.
+If
+.I tcp_window_scaling
+is disabled, TCP will not negotiate the use of window
+scaling with the other end during connection setup.
+.TP
+.IR tcp_wmem " (since Linux 2.4)"
+.\" Since Linux 2.4.0-test7
+This is a vector of 3 integers: [min, default, max].
+These parameters are used by TCP to regulate send buffer sizes.
+TCP dynamically adjusts the size of the send buffer from the
+default values listed below, in the range of these values,
+depending on memory available.
+.RS
+.TP
+.I min
+Minimum size of the send buffer used by each TCP socket.
+The default value is the system page size.
+(On Linux 2.4, the default value is 4\ kB.)
+This value is used to ensure that in memory pressure mode,
+allocations below this size will still succeed.
+This is not used to bound the size of the send buffer declared using
+.B SO_SNDBUF
+on a socket.
+.TP
+.I default
+The default size of the send buffer for a TCP socket.
+This value overwrites the initial default buffer size from
+the generic global
+.I /proc/sys/net/core/wmem_default
+defined for all protocols.
+The default value is 16\ kB.
+.\" True in Linux 2.4 and 2.6
+If larger send buffer sizes are desired, this value
+should be increased (to affect all sockets).
+To employ large TCP windows, the
+.I /proc/sys/net/ipv4/tcp_window_scaling
+must be set to a nonzero value (default).
+.TP
+.I max
+The maximum size of the send buffer used by each TCP socket.
+This value does not override the value in
+.IR /proc/sys/net/core/wmem_max .
+This is not used to limit the size of the send buffer declared using
+.B SO_SNDBUF
+on a socket.
+The default value is calculated using the formula
+.IP
+.in +4n
+.EX
+max(65536, min(4\ MB, \fItcp_mem\fP[1]*PAGE_SIZE/128))
+.EE
+.in
+.IP
+(On Linux 2.4, the default value is 128\ kB,
+lowered 64\ kB depending on low-memory systems.)
+.RE
+.TP
+.IR tcp_workaround_signed_windows " (Boolean; default: disabled; since Linux 2.6.26)"
+If enabled, assume that no receipt of a window-scaling option means that the
+remote TCP is broken and treats the window as a signed quantity.
+If disabled, assume that the remote TCP is not broken even if we do
+not receive a window scaling option from it.
+.SS Socket options
+To set or get a TCP socket option, call
+.BR getsockopt (2)
+to read or
+.BR setsockopt (2)
+to write the option with the option level argument set to
+.BR IPPROTO_TCP .
+Unless otherwise noted,
+.I optval
+is a pointer to an
+.IR int .
+.\" or SOL_TCP on Linux
+In addition,
+most
+.B IPPROTO_IP
+socket options are valid on TCP sockets.
+For more information see
+.BR ip (7).
+.PP
+Following is a list of TCP-specific socket options.
+For details of some other socket options that are also applicable
+for TCP sockets, see
+.BR socket (7).
+.TP
+.BR TCP_CONGESTION " (since Linux 2.6.13)"
+.\" commit 5f8ef48d240963093451bcf83df89f1a1364f51d
+.\" Author: Stephen Hemminger <shemminger@osdl.org>
+The argument for this option is a string.
+This option allows the caller to set the TCP congestion control
+algorithm to be used, on a per-socket basis.
+Unprivileged processes are restricted to choosing one of the algorithms in
+.I tcp_allowed_congestion_control
+(described above).
+Privileged processes
+.RB ( CAP_NET_ADMIN )
+can choose from any of the available congestion-control algorithms
+(see the description of
+.I tcp_available_congestion_control
+above).
+.TP
+.BR TCP_CORK " (since Linux 2.2)"
+.\" precisely: since Linux 2.1.127
+If set, don't send out partial frames.
+All queued partial frames are sent when the option is cleared again.
+This is useful for prepending headers before calling
+.BR sendfile (2),
+or for throughput optimization.
+As currently implemented, there is a 200 millisecond ceiling on the time
+for which output is corked by
+.BR TCP_CORK .
+If this ceiling is reached, then queued data is automatically transmitted.
+This option can be combined with
+.B TCP_NODELAY
+only since Linux 2.5.71.
+This option should not be used in code intended to be portable.
+.TP
+.BR TCP_DEFER_ACCEPT " (since Linux 2.4)"
+.\" Precisely: since Linux 2.3.38
+.\" Useful references:
+.\" http://www.techrepublic.com/article/take-advantage-of-tcp-ip-options-to-optimize-data-transmission/
+.\" http://unix.stackexchange.com/questions/94104/real-world-use-of-tcp-defer-accept
+Allow a listener to be awakened only when data arrives on the socket.
+Takes an integer value (seconds), this can
+bound the maximum number of attempts TCP will make to
+complete the connection.
+This option should not be used in code intended to be portable.
+.TP
+.BR TCP_INFO " (since Linux 2.4)"
+Used to collect information about this socket.
+The kernel returns a \fIstruct tcp_info\fP as defined in the file
+.IR /usr/include/linux/tcp.h .
+This option should not be used in code intended to be portable.
+.TP
+.BR TCP_KEEPCNT " (since Linux 2.4)"
+.\" Precisely: since Linux 2.3.18
+The maximum number of keepalive probes TCP should send
+before dropping the connection.
+This option should not be
+used in code intended to be portable.
+.TP
+.BR TCP_KEEPIDLE " (since Linux 2.4)"
+.\" Precisely: since Linux 2.3.18
+The time (in seconds) the connection needs to remain idle
+before TCP starts sending keepalive probes, if the socket
+option
+.B SO_KEEPALIVE
+has been set on this socket.
+This option should not be used in code intended to be portable.
+.TP
+.BR TCP_KEEPINTVL " (since Linux 2.4)"
+.\" Precisely: since Linux 2.3.18
+The time (in seconds) between individual keepalive probes.
+This option should not be used in code intended to be portable.
+.TP
+.BR TCP_LINGER2 " (since Linux 2.4)"
+.\" Precisely: since Linux 2.3.41
+The lifetime of orphaned FIN_WAIT2 state sockets.
+This option can be used to override the system-wide setting in the file
+.I /proc/sys/net/ipv4/tcp_fin_timeout
+for this socket.
+This is not to be confused with the
+.BR socket (7)
+level option
+.BR SO_LINGER .
+This option should not be used in code intended to be portable.
+.TP
+.B TCP_MAXSEG
+.\" Present in Linux 1.0
+The maximum segment size for outgoing TCP packets.
+In Linux 2.2 and earlier, and in Linux 2.6.28 and later,
+if this option is set before connection establishment, it also
+changes the MSS value announced to the other end in the initial packet.
+Values greater than the (eventual) interface MTU have no effect.
+TCP will also impose
+its minimum and maximum bounds over the value provided.
+.TP
+.B TCP_NODELAY
+.\" Present in Linux 1.0
+If set, disable the Nagle algorithm.
+This means that segments
+are always sent as soon as possible, even if there is only a
+small amount of data.
+When not set, data is buffered until there
+is a sufficient amount to send out, thereby avoiding the
+frequent sending of small packets, which results in poor
+utilization of the network.
+This option is overridden by
+.BR TCP_CORK ;
+however, setting this option forces an explicit flush of
+pending output, even if
+.B TCP_CORK
+is currently set.
+.TP
+.BR TCP_QUICKACK " (since Linux 2.4.4)"
+Enable quickack mode if set or disable quickack
+mode if cleared.
+In quickack mode, acks are sent
+immediately, rather than delayed if needed in accordance
+to normal TCP operation.
+This flag is not permanent,
+it only enables a switch to or from quickack mode.
+Subsequent operation of the TCP protocol will
+once again enter/leave quickack mode depending on
+internal protocol processing and factors such as
+delayed ack timeouts occurring and data transfer.
+This option should not be used in code intended to be
+portable.
+.TP
+.BR TCP_SYNCNT " (since Linux 2.4)"
+.\" Precisely: since Linux 2.3.18
+Set the number of SYN retransmits that TCP should send before
+aborting the attempt to connect.
+It cannot exceed 255.
+This option should not be used in code intended to be portable.
+.TP
+.BR TCP_USER_TIMEOUT " (since Linux 2.6.37)"
+.\" commit dca43c75e7e545694a9dd6288553f55c53e2a3a3
+.\" Author: Jerry Chu <hkchu@google.com>
+.\" The following text taken nearly verbatim from Jerry Chu's (excellent)
+.\" commit message.
+.\"
+This option takes an
+.I unsigned int
+as an argument.
+When the value is greater than 0,
+it specifies the maximum amount of time in milliseconds that transmitted
+data may remain unacknowledged, or buffered data may remain untransmitted
+(due to zero window size) before TCP will forcibly close the
+corresponding connection and return
+.B ETIMEDOUT
+to the application.
+If the option value is specified as 0,
+TCP will use the system default.
+.IP
+Increasing user timeouts allows a TCP connection to survive extended
+periods without end-to-end connectivity.
+Decreasing user timeouts
+allows applications to "fail fast", if so desired.
+Otherwise, failure may take up to 20 minutes with
+the current system defaults in a normal WAN environment.
+.IP
+This option can be set during any state of a TCP connection,
+but is effective only during the synchronized states of a connection
+(ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, and LAST-ACK).
+Moreover, when used with the TCP keepalive
+.RB ( SO_KEEPALIVE )
+option,
+.B TCP_USER_TIMEOUT
+will override keepalive to determine when to close a
+connection due to keepalive failure.
+.IP
+The option has no effect on when TCP retransmits a packet,
+nor when a keepalive probe is sent.
+.IP
+This option, like many others, will be inherited by the socket returned by
+.BR accept (2),
+if it was set on the listening socket.
+.IP
+Further details on the user timeout feature can be found in
+RFC\ 793 and RFC\ 5482 ("TCP User Timeout Option").
+.TP
+.BR TCP_WINDOW_CLAMP " (since Linux 2.4)"
+.\" Precisely: since Linux 2.3.41
+Bound the size of the advertised window to this value.
+The kernel imposes a minimum size of SOCK_MIN_RCVBUF/2.
+This option should not be used in code intended to be
+portable.
+.TP
+.BR TCP_FASTOPEN " (since Linux 3.6)"
+This option enables Fast Open (RFC\~7413) on the listener socket.
+The value specifies the maximum length of pending SYNs
+(similar to the backlog argument in
+.BR listen (2)).
+Once enabled,
+the listener socket grants the TCP Fast Open cookie
+on incoming SYN with TCP Fast Open option.
+.IP
+More importantly it accepts the data in SYN with a valid Fast Open cookie
+and responds SYN-ACK acknowledging both the data and the SYN sequence.
+.BR accept (2)
+returns a socket that is available for read and write
+when the handshake has not completed yet.
+Thus the data exchange can commence before the handshake completes.
+This option requires enabling the server-side support on sysctl
+.I net.ipv4.tcp_fastopen
+(see above).
+For TCP Fast Open client-side support,
+see
+.BR send (2)
+.B MSG_FASTOPEN
+or
+.B TCP_FASTOPEN_CONNECT
+below.
+.TP
+.BR TCP_FASTOPEN_CONNECT " (since Linux 4.11)"
+This option enables an alternative way to perform Fast Open
+on the active side (client).
+When this option is enabled,
+.BR connect (2)
+would behave differently depending on
+if a Fast Open cookie is available for the destination.
+.IP
+If a cookie is not available (i.e. first contact to the destination),
+.BR connect (2)
+behaves as usual by sending a SYN immediately,
+except the SYN would include an empty Fast Open cookie option
+to solicit a cookie.
+.IP
+If a cookie is available,
+.BR connect (2)
+would return 0 immediately but the SYN transmission is deferred.
+A subsequent
+.BR write (2)
+or
+.BR sendmsg (2)
+would trigger a SYN with data plus cookie in the Fast Open option.
+In other words,
+the actual connect operation is deferred until data is supplied.
+.IP
+.B Note:
+While this option is designed for convenience,
+enabling it does change the behaviors and certain system calls might set
+different
+.I errno
+values.
+With cookie present,
+.BR write (2)
+or
+.BR sendmsg (2)
+must be called right after
+.BR connect (2)
+in order to send out SYN+data to complete 3WHS and establish connection.
+Calling
+.BR read (2)
+right after
+.BR connect (2)
+without
+.BR write (2)
+will cause the blocking socket to be blocked forever.
+.IP
+The application should either set
+.B TCP_FASTOPEN_CONNECT
+socket option before
+.BR write (2)
+or
+.BR sendmsg (2),
+or call
+.BR write (2)
+or
+.BR sendmsg (2)
+with
+.B MSG_FASTOPEN
+flag directly,
+instead of both on the same connection.
+.IP
+Here is the typical call flow with this new option:
+.IP
+.in +4n
+.EX
+s = socket();
+setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN_CONNECT, 1, ...);
+connect(s);
+write(s); /* write() should always follow connect()
+ * in order to trigger SYN to go out. */
+read(s)/write(s);
+/* ... */
+close(s);
+.EE
+.in
+.SS Sockets API
+TCP provides limited support for out-of-band data,
+in the form of (a single byte of) urgent data.
+In Linux this means if the other end sends newer out-of-band
+data the older urgent data is inserted as normal data into
+the stream (even when
+.B SO_OOBINLINE
+is not set).
+This differs from BSD-based stacks.
+.PP
+Linux uses the BSD compatible interpretation of the urgent
+pointer field by default.
+This violates RFC\ 1122, but is
+required for interoperability with other stacks.
+It can be changed via
+.IR /proc/sys/net/ipv4/tcp_stdurg .
+.PP
+It is possible to peek at out-of-band data using the
+.BR recv (2)
+.B MSG_PEEK
+flag.
+.PP
+Since Linux 2.4, Linux supports the use of
+.B MSG_TRUNC
+in the
+.I flags
+argument of
+.BR recv (2)
+(and
+.BR recvmsg (2)).
+This flag causes the received bytes of data to be discarded,
+rather than passed back in a caller-supplied buffer.
+Since Linux 2.4.4,
+.B MSG_TRUNC
+also has this effect when used in conjunction with
+.B MSG_OOB
+to receive out-of-band data.
+.SS Ioctls
+The following
+.BR ioctl (2)
+calls return information in
+.IR value .
+The correct syntax is:
+.PP
+.RS
+.nf
+.BI int " value";
+.IB error " = ioctl(" tcp_socket ", " ioctl_type ", &" value ");"
+.fi
+.RE
+.PP
+.I ioctl_type
+is one of the following:
+.TP
+.B SIOCINQ
+Returns the amount of queued unread data in the receive buffer.
+The socket must not be in LISTEN state, otherwise an error
+.RB ( EINVAL )
+is returned.
+.B SIOCINQ
+is defined in
+.IR <linux/sockios.h> .
+.\" FIXME https://www.sourceware.org/bugzilla/show_bug.cgi?id=12002,
+.\" filed 2010-09-10, may cause SIOCINQ to be defined in glibc headers
+Alternatively,
+you can use the synonymous
+.BR FIONREAD ,
+defined in
+.IR <sys/ioctl.h> .
+.TP
+.B SIOCATMARK
+Returns true (i.e.,
+.I value
+is nonzero) if the inbound data stream is at the urgent mark.
+.IP
+If the
+.B SO_OOBINLINE
+socket option is set, and
+.B SIOCATMARK
+returns true, then the
+next read from the socket will return the urgent data.
+If the
+.B SO_OOBINLINE
+socket option is not set, and
+.B SIOCATMARK
+returns true, then the
+next read from the socket will return the bytes following
+the urgent data (to actually read the urgent data requires the
+.B recv(MSG_OOB)
+flag).
+.IP
+Note that a read never reads across the urgent mark.
+If an application is informed of the presence of urgent data via
+.BR select (2)
+(using the
+.I exceptfds
+argument) or through delivery of a
+.B SIGURG
+signal,
+then it can advance up to the mark using a loop which repeatedly tests
+.B SIOCATMARK
+and performs a read (requesting any number of bytes) as long as
+.B SIOCATMARK
+returns false.
+.TP
+.B SIOCOUTQ
+Returns the amount of unsent data in the socket send queue.
+The socket must not be in LISTEN state, otherwise an error
+.RB ( EINVAL )
+is returned.
+.B SIOCOUTQ
+is defined in
+.IR <linux/sockios.h> .
+.\" FIXME . https://www.sourceware.org/bugzilla/show_bug.cgi?id=12002,
+.\" filed 2010-09-10, may cause SIOCOUTQ to be defined in glibc headers
+Alternatively,
+you can use the synonymous
+.BR TIOCOUTQ ,
+defined in
+.IR <sys/ioctl.h> .
+.SS Error handling
+When a network error occurs, TCP tries to resend the packet.
+If it doesn't succeed after some time, either
+.B ETIMEDOUT
+or the last received error on this connection is reported.
+.PP
+Some applications require a quicker error notification.
+This can be enabled with the
+.B IPPROTO_IP
+level
+.B IP_RECVERR
+socket option.
+When this option is enabled, all incoming
+errors are immediately passed to the user program.
+Use this option with care \[em] it makes TCP less tolerant to routing
+changes and other normal network conditions.
+.SH ERRORS
+.TP
+.B EAFNOTSUPPORT
+Passed socket address type in
+.I sin_family
+was not
+.BR AF_INET .
+.TP
+.B EPIPE
+The other end closed the socket unexpectedly or a read is
+executed on a shut down socket.
+.TP
+.B ETIMEDOUT
+The other end didn't acknowledge retransmitted data after some time.
+.PP
+Any errors defined for
+.BR ip (7)
+or the generic socket layer may also be returned for TCP.
+.SH VERSIONS
+Support for Explicit Congestion Notification, zero-copy
+.BR sendfile (2),
+reordering support and some SACK extensions
+(DSACK) were introduced in Linux 2.4.
+Support for forward acknowledgement (FACK), TIME_WAIT recycling,
+and per-connection keepalive socket options were introduced in Linux 2.3.
+.SH BUGS
+Not all errors are documented.
+.PP
+IPv6 is not described.
+.\" Only a single Linux kernel version is described
+.\" Info for 2.2 was lost. Should be added again,
+.\" or put into a separate page.
+.\" .SH AUTHORS
+.\" This man page was originally written by Andi Kleen.
+.\" It was updated for 2.4 by Nivedita Singhvi with input from
+.\" Alexey Kuznetsov's Documentation/networking/ip-sysctl.txt
+.\" document.
+.SH SEE ALSO
+.BR accept (2),
+.BR bind (2),
+.BR connect (2),
+.BR getsockopt (2),
+.BR listen (2),
+.BR recvmsg (2),
+.BR sendfile (2),
+.BR sendmsg (2),
+.BR socket (2),
+.BR ip (7),
+.BR socket (7)
+.PP
+The kernel source file
+.IR Documentation/networking/ip\-sysctl.txt .
+.PP
+RFC\ 793 for the TCP specification.
+.br
+RFC\ 1122 for the TCP requirements and a description of the Nagle algorithm.
+.br
+RFC\ 1323 for TCP timestamp and window scaling options.
+.br
+RFC\ 1337 for a description of TIME_WAIT assassination hazards.
+.br
+RFC\ 3168 for a description of Explicit Congestion Notification.
+.br
+RFC\ 2581 for TCP congestion control algorithms.
+.br
+RFC\ 2018 and RFC\ 2883 for SACK and extensions to SACK.
diff --git a/man7/termio.7 b/man7/termio.7
new file mode 100644
index 0000000..08bba54
--- /dev/null
+++ b/man7/termio.7
@@ -0,0 +1,45 @@
+.\" Copyright (c) 2006 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" 28 Dec 2006 - Initial Creation
+.\"
+.TH termio 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+termio \- System V terminal driver interface
+.SH DESCRIPTION
+.B termio
+is the name of the old System V terminal driver interface.
+This interface defined a
+.I termio
+structure used to store terminal settings, and a range of
+.BR ioctl (2)
+operations to get and set terminal attributes.
+.PP
+The
+.B termio
+interface is now obsolete: POSIX.1-1990 standardized a modified
+version of this interface, under the name
+.BR termios .
+The POSIX.1 data structure differs slightly from the
+System V version, and POSIX.1 defined a suite of functions
+to replace the various
+.BR ioctl (2)
+operations that existed in System V.
+(This was done because
+.BR ioctl (2)
+was unstandardized, and its variadic third argument
+does not allow argument type checking.)
+.PP
+If you're looking for a page called "termio", then you can probably
+find most of the information that you seek in either
+.BR termios (3)
+or
+.BR ioctl_tty (2).
+.SH SEE ALSO
+.BR reset (1),
+.BR setterm (1),
+.BR stty (1),
+.BR ioctl_tty (2),
+.BR termios (3),
+.BR tty (4)
diff --git a/man7/thread-keyring.7 b/man7/thread-keyring.7
new file mode 100644
index 0000000..524bf22
--- /dev/null
+++ b/man7/thread-keyring.7
@@ -0,0 +1,50 @@
+.\" Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH thread-keyring 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+thread-keyring \- per-thread keyring
+.SH DESCRIPTION
+The thread keyring is a keyring used to anchor keys on behalf of a process.
+It is created only when a thread requests it.
+The thread keyring has the name (description)
+.IR _tid .
+.PP
+A special serial number value,
+.BR KEY_SPEC_THREAD_KEYRING ,
+is defined that can be used in lieu of the actual serial number of
+the calling thread's thread keyring.
+.PP
+From the
+.BR keyctl (1)
+utility, '\fB@t\fP' can be used instead of a numeric key ID in
+much the same way, but as
+.BR keyctl (1)
+is a program run after forking, this is of no utility.
+.PP
+Thread keyrings are not inherited across
+.BR clone (2)
+and
+.BR fork (2)
+and are cleared by
+.BR execve (2).
+A thread keyring is destroyed when the thread that refers to it terminates.
+.PP
+Initially, a thread does not have a thread keyring.
+If a thread doesn't have a thread keyring when it is accessed,
+then it will be created if it is to be modified;
+otherwise the operation fails with the error
+.BR ENOKEY .
+.SH SEE ALSO
+.ad l
+.nh
+.BR keyctl (1),
+.BR keyctl (3),
+.BR keyrings (7),
+.BR persistent\-keyring (7),
+.BR process\-keyring (7),
+.BR session\-keyring (7),
+.BR user\-keyring (7),
+.BR user\-session\-keyring (7)
diff --git a/man7/time.7 b/man7/time.7
new file mode 100644
index 0000000..ee0db5d
--- /dev/null
+++ b/man7/time.7
@@ -0,0 +1,218 @@
+.\" Copyright (c) 2006 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" 2008-06-24, mtk: added some details about where jiffies come into
+.\" play; added section on high-resolution timers.
+.\"
+.TH time 7 2023-01-22 "Linux man-pages 6.05.01"
+.SH NAME
+time \- overview of time and timers
+.SH DESCRIPTION
+.SS Real time and process time
+.I "Real time"
+is defined as time measured from some fixed point,
+either from a standard point in the past
+(see the description of the Epoch and calendar time below),
+or from some point (e.g., the start) in the life of a process
+.RI ( "elapsed time" ).
+.PP
+.I "Process time"
+is defined as the amount of CPU time used by a process.
+This is sometimes divided into
+.I user
+and
+.I system
+components.
+User CPU time is the time spent executing code in user mode.
+System CPU time is the time spent by the kernel executing
+in system mode on behalf of the process (e.g., executing system calls).
+The
+.BR time (1)
+command can be used to determine the amount of CPU time consumed
+during the execution of a program.
+A program can determine the amount of CPU time it has consumed using
+.BR times (2),
+.BR getrusage (2),
+or
+.BR clock (3).
+.SS The hardware clock
+Most computers have a (battery-powered) hardware clock which the kernel
+reads at boot time in order to initialize the software clock.
+For further details, see
+.BR rtc (4)
+and
+.BR hwclock (8).
+.SS The software clock, HZ, and jiffies
+The accuracy of various system calls that set timeouts,
+(e.g.,
+.BR select (2),
+.BR sigtimedwait (2))
+.\" semtimedop(), mq_timedwait(), io_getevents(), poll() are the same
+.\" futexes and thus sem_timedwait() seem to use high-res timers.
+and measure CPU time (e.g.,
+.BR getrusage (2))
+is limited by the resolution of the
+.IR "software clock" ,
+a clock maintained by the kernel which measures time in
+.IR jiffies .
+The size of a jiffy is determined by the value of the kernel constant
+.IR HZ .
+.PP
+The value of
+.I HZ
+varies across kernel versions and hardware platforms.
+On i386 the situation is as follows:
+on kernels up to and including Linux 2.4.x,
+HZ was 100,
+giving a jiffy value of 0.01 seconds;
+starting with Linux 2.6.0,
+HZ was raised to 1000,
+giving a jiffy of 0.001 seconds.
+Since Linux 2.6.13, the HZ value is a kernel
+configuration parameter and can be 100, 250 (the default) or 1000,
+yielding a jiffies value of, respectively, 0.01, 0.004, or 0.001 seconds.
+Since Linux 2.6.20, a further frequency is available:
+300, a number that divides evenly for the common video frame rates
+(PAL, 25 Hz; NTSC, 30 Hz).
+.PP
+The
+.BR times (2)
+system call is a special case.
+It reports times with a granularity defined by the kernel constant
+.IR USER_HZ .
+User-space applications can determine the value of this constant using
+.IR sysconf(_SC_CLK_TCK) .
+.\" glibc gets this info with a little help from the ELF loader;
+.\" see glibc elf/dl-support.c and kernel fs/binfmt_elf.c.
+.\"
+.SS System and process clocks; time namespaces
+The kernel supports a range of clocks that measure various kinds of
+elapsed and virtual (i.e., consumed CPU) time.
+These clocks are described in
+.BR clock_gettime (2).
+A few of the clocks are settable using
+.BR clock_settime (2).
+The values of certain clocks are virtualized by time namespaces; see
+.BR time_namespaces (7).
+.\"
+.SS High-resolution timers
+Before Linux 2.6.21, the accuracy of timer and sleep system calls
+(see below) was also limited by the size of the jiffy.
+.PP
+Since Linux 2.6.21, Linux supports high-resolution timers (HRTs),
+optionally configurable via
+.BR CONFIG_HIGH_RES_TIMERS .
+On a system that supports HRTs, the accuracy of sleep and timer
+system calls is no longer constrained by the jiffy,
+but instead can be as accurate as the hardware allows
+(microsecond accuracy is typical of modern hardware).
+You can determine whether high-resolution timers are supported by
+checking the resolution returned by a call to
+.BR clock_getres (2)
+or looking at the "resolution" entries in
+.IR /proc/timer_list .
+.PP
+HRTs are not supported on all hardware architectures.
+(Support is provided on x86, ARM, and PowerPC, among others.)
+.SS The Epoch
+UNIX systems represent time in seconds since the
+.IR Epoch ,
+1970-01-01 00:00:00 +0000 (UTC).
+.PP
+A program can determine the
+.I "calendar time"
+via the
+.BR clock_gettime (2)
+.B CLOCK_REALTIME
+clock,
+which returns time (in seconds and nanoseconds) that have
+elapsed since the Epoch;
+.BR time (2)
+provides similar information, but only with accuracy to the
+nearest second.
+The system time can be changed using
+.BR clock_settime (2).
+.\"
+.SS Broken-down time
+Certain library functions use a structure of
+type
+.I tm
+to represent
+.IR "broken-down time" ,
+which stores time value separated out into distinct components
+(year, month, day, hour, minute, second, etc.).
+This structure is described in
+.BR tm (3type),
+which also describes functions that convert between calendar time and
+broken-down time.
+Functions for converting between broken-down time and printable
+string representations of the time are described in
+.BR ctime (3),
+.BR strftime (3),
+and
+.BR strptime (3).
+.SS Sleeping and setting timers
+Various system calls and functions allow a program to sleep
+(suspend execution) for a specified period of time; see
+.BR nanosleep (2),
+.BR clock_nanosleep (2),
+and
+.BR sleep (3).
+.PP
+Various system calls allow a process to set a timer that expires
+at some point in the future, and optionally at repeated intervals;
+see
+.BR alarm (2),
+.BR getitimer (2),
+.BR timerfd_create (2),
+and
+.BR timer_create (2).
+.SS Timer slack
+Since Linux 2.6.28, it is possible to control the "timer slack"
+value for a thread.
+The timer slack is the length of time by
+which the kernel may delay the wake-up of certain
+system calls that block with a timeout.
+Permitting this delay allows the kernel to coalesce wake-up events,
+thus possibly reducing the number of system wake-ups and saving power.
+For more details, see the description of
+.B PR_SET_TIMERSLACK
+in
+.BR prctl (2).
+.SH SEE ALSO
+.ad l
+.nh
+.BR date (1),
+.BR time (1),
+.BR timeout (1),
+.BR adjtimex (2),
+.BR alarm (2),
+.BR clock_gettime (2),
+.BR clock_nanosleep (2),
+.BR getitimer (2),
+.BR getrlimit (2),
+.BR getrusage (2),
+.BR gettimeofday (2),
+.BR nanosleep (2),
+.BR stat (2),
+.BR time (2),
+.BR timer_create (2),
+.BR timerfd_create (2),
+.BR times (2),
+.BR utime (2),
+.BR adjtime (3),
+.BR clock (3),
+.BR clock_getcpuclockid (3),
+.BR ctime (3),
+.BR ntp_adjtime (3),
+.BR ntp_gettime (3),
+.BR pthread_getcpuclockid (3),
+.BR sleep (3),
+.BR strftime (3),
+.BR strptime (3),
+.BR timeradd (3),
+.BR usleep (3),
+.BR rtc (4),
+.BR time_namespaces (7),
+.BR hwclock (8)
diff --git a/man7/time_namespaces.7 b/man7/time_namespaces.7
new file mode 100644
index 0000000..6e29996
--- /dev/null
+++ b/man7/time_namespaces.7
@@ -0,0 +1,345 @@
+.\" Copyright (c) 2020 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH time_namespaces 7 2023-03-12 "Linux man-pages 6.05.01"
+.SH NAME
+time_namespaces \- overview of Linux time namespaces
+.SH DESCRIPTION
+Time namespaces virtualize the values of two system clocks:
+.IP \[bu] 3
+.B CLOCK_MONOTONIC
+(and likewise
+.B CLOCK_MONOTONIC_COARSE
+and
+.BR CLOCK_MONOTONIC_RAW ),
+a nonsettable clock that represents monotonic time since\[em]as
+described by POSIX\[em]"some unspecified point in the past".
+.IP \[bu]
+.B CLOCK_BOOTTIME
+(and likewise
+.BR CLOCK_BOOTTIME_ALARM ),
+a nonsettable clock that is identical to
+.BR CLOCK_MONOTONIC ,
+except that it also includes any time that the system is suspended.
+.PP
+Thus, the processes in a time namespace share per-namespace values
+for these clocks.
+This affects various APIs that measure against these clocks, including:
+.BR clock_gettime (2),
+.BR clock_nanosleep (2),
+.BR nanosleep (2),
+.BR timer_settime (2),
+.BR timerfd_settime (2),
+and
+.IR /proc/uptime .
+.PP
+Currently, the only way to create a time namespace is by calling
+.BR unshare (2)
+with the
+.B CLONE_NEWTIME
+flag.
+This call creates a new time namespace but does
+.I not
+place the calling process in the new namespace.
+Instead, the calling process's
+subsequently created children are placed in the new namespace.
+This allows clock offsets (see below) for the new namespace
+to be set before the first process is placed in the namespace.
+The
+.IR /proc/ pid /ns/time_for_children
+symbolic link shows the time namespace in which
+the children of a process will be created.
+(A process can use a file descriptor opened on
+this symbolic link in a call to
+.BR setns (2)
+in order to move into the namespace.)
+.\"
+.SS \fI/proc/\fPpid\fI/timens_offsets\fP
+Associated with each time namespace are offsets,
+expressed with respect to the initial time namespace,
+that define the values of the monotonic and
+boot-time clocks in that namespace.
+These offsets are exposed via the file
+.IR /proc/ pid /timens_offsets .
+Within this file,
+the offsets are expressed as lines consisting of
+three space-delimited fields:
+.PP
+.in +4n
+.EX
+<clock-id> <offset-secs> <offset-nanosecs>
+.EE
+.in
+.PP
+The
+.I clock-id
+is a string that identifies the clock whose offsets are being shown.
+This field is either
+.IR monotonic ,
+for
+.BR CLOCK_MONOTONIC ,
+or
+.IR boottime ,
+for
+.BR CLOCK_BOOTTIME .
+The remaining fields express the offset (seconds plus nanoseconds) for the
+clock in this time namespace.
+These offsets are expressed relative to the clock values in
+the initial time namespace.
+The
+.I offset-secs
+value can be negative, subject to restrictions noted below;
+.I offset-nanosecs
+is an unsigned value.
+.PP
+In the initial time namespace, the contents of the
+.I timens_offsets
+file are as follows:
+.PP
+.in +4n
+.EX
+$ \fBcat /proc/self/timens_offsets\fP
+monotonic 0 0
+boottime 0 0
+.EE
+.in
+.PP
+In a new time namespace that has had no member processes,
+the clock offsets can be modified by writing newline-terminated
+records of the same form to the
+.I timens_offsets
+file.
+The file can be written to multiple times,
+but after the first process has been created in or has entered the namespace,
+.BR write (2)s
+on this file fail with the error
+.BR EACCES .
+In order to write to the
+.I timens_offsets
+file, a process must have the
+.B CAP_SYS_TIME
+capability in the user namespace that owns the time namespace.
+.PP
+Writes to the
+.I timens_offsets
+file can fail with the following errors:
+.TP
+.B EINVAL
+An
+.I offset-nanosecs
+value is greater than 999,999,999.
+.TP
+.B EINVAL
+A
+.I clock-id
+value is not valid.
+.TP
+.B EPERM
+The caller does not have the
+.B CAP_SYS_TIME
+capability.
+.TP
+.B ERANGE
+An
+.I offset-secs
+value is out of range.
+In particular;
+.RS
+.IP \[bu] 3
+.I offset-secs
+can't be set to a value which would make the current
+time on the corresponding clock inside the namespace a negative value; and
+.IP \[bu]
+.I offset-secs
+can't be set to a value such that the time on the corresponding clock
+inside the namespace would exceed half of the value of the kernel constant
+.B KTIME_SEC_MAX
+(this limits the clock value to a maximum of approximately 146 years).
+.RE
+.PP
+In a new time namespace created by
+.BR unshare (2),
+the contents of the
+.I timens_offsets
+file are inherited from the time namespace of the creating process.
+.SH NOTES
+Use of time namespaces requires a kernel that is configured with the
+.B CONFIG_TIME_NS
+option.
+.PP
+Note that time namespaces do not virtualize the
+.B CLOCK_REALTIME
+clock.
+Virtualization of this clock was avoided for reasons of complexity
+and overhead within the kernel.
+.PP
+For compatibility with the initial implementation, when writing a
+.I clock-id
+to the
+.IR /proc/ pid /timens_offsets
+file, the numerical values of the IDs can be written
+instead of the symbolic names show above; i.e., 1 instead of
+.IR monotonic ,
+and 7 instead of
+.IR boottime .
+For readability, the use of the symbolic names over the numbers is preferred.
+.PP
+The motivation for adding time namespaces was to allow
+the monotonic and boot-time clocks to maintain consistent values
+during container migration and checkpoint/restore.
+.SH EXAMPLES
+The following shell session demonstrates the operation of time namespaces.
+We begin by displaying the inode number of the time namespace
+of a shell in the initial time namespace:
+.PP
+.in +4n
+.EX
+$ \fBreadlink /proc/$$/ns/time\fP
+time:[4026531834]
+.EE
+.in
+.PP
+Continuing in the initial time namespace, we display the system uptime using
+.BR uptime (1)
+and use the
+.I clock_times
+example program shown in
+.BR clock_getres (2)
+to display the values of various clocks:
+.PP
+.in +4n
+.EX
+$ \fBuptime \-\-pretty\fP
+up 21 hours, 17 minutes
+$ \fB./clock_times\fP
+CLOCK_REALTIME : 1585989401.971 (18356 days + 8h 36m 41s)
+CLOCK_TAI : 1585989438.972 (18356 days + 8h 37m 18s)
+CLOCK_MONOTONIC: 56338.247 (15h 38m 58s)
+CLOCK_BOOTTIME : 76633.544 (21h 17m 13s)
+.EE
+.in
+.PP
+We then use
+.BR unshare (1)
+to create a time namespace and execute a
+.BR bash (1)
+shell.
+From the new shell, we use the built-in
+.B echo
+command to write records to the
+.I timens_offsets
+file adjusting the offset for the
+.B CLOCK_MONOTONIC
+clock forward 2 days
+and the offset for the
+.B CLOCK_BOOTTIME
+clock forward 7 days:
+.PP
+.in +4n
+.EX
+$ \fBPS1="ns2# " sudo unshare \-T \-\- bash \-\-norc\fP
+ns2# \fBecho "monotonic $((2*24*60*60)) 0" > /proc/$$/timens_offsets\fP
+ns2# \fBecho "boottime $((7*24*60*60)) 0" > /proc/$$/timens_offsets\fP
+.EE
+.in
+.PP
+Above, we started the
+.BR bash (1)
+shell with the
+.B \-\-norc
+option so that no start-up scripts were executed.
+This ensures that no child processes are created from the
+shell before we have a chance to update the
+.I timens_offsets
+file.
+.PP
+We then use
+.BR cat (1)
+to display the contents of the
+.I timens_offsets
+file.
+The execution of
+.BR cat (1)
+creates the first process in the new time namespace,
+after which further attempts to update the
+.I timens_offsets
+file produce an error.
+.PP
+.in +4n
+.EX
+ns2# \fBcat /proc/$$/timens_offsets\fP
+monotonic 172800 0
+boottime 604800 0
+ns2# \fBecho "boottime $((9*24*60*60)) 0" > /proc/$$/timens_offsets\fP
+bash: echo: write error: Permission denied
+.EE
+.in
+.PP
+Continuing in the new namespace, we execute
+.BR uptime (1)
+and the
+.I clock_times
+example program:
+.PP
+.in +4n
+.EX
+ns2# \fBuptime \-\-pretty\fP
+up 1 week, 21 hours, 18 minutes
+ns2# \fB./clock_times\fP
+CLOCK_REALTIME : 1585989457.056 (18356 days + 8h 37m 37s)
+CLOCK_TAI : 1585989494.057 (18356 days + 8h 38m 14s)
+CLOCK_MONOTONIC: 229193.332 (2 days + 15h 39m 53s)
+CLOCK_BOOTTIME : 681488.629 (7 days + 21h 18m 8s)
+.EE
+.in
+.PP
+From the above output, we can see that the monotonic
+and boot-time clocks have different values in the new time namespace.
+.PP
+Examining the
+.IR /proc/ pid /ns/time
+and
+.IR /proc/ pid /ns/time_for_children
+symbolic links, we see that the shell is a member of the initial time
+namespace, but its children are created in the new namespace.
+.PP
+.in +4n
+.EX
+ns2# \fBreadlink /proc/$$/ns/time\fP
+time:[4026531834]
+ns2# \fBreadlink /proc/$$/ns/time_for_children\fP
+time:[4026532900]
+ns2# \fBreadlink /proc/self/ns/time\fP # Creates a child process
+time:[4026532900]
+.EE
+.in
+.PP
+Returning to the shell in the initial time namespace,
+we see that the monotonic and boot-time clocks
+are unaffected by the
+.I timens_offsets
+changes that were made in the other time namespace:
+.PP
+.in +4n
+.EX
+$ \fBuptime \-\-pretty\fP
+up 21 hours, 19 minutes
+$ \fB./clock_times\fP
+CLOCK_REALTIME : 1585989401.971 (18356 days + 8h 38m 51s)
+CLOCK_TAI : 1585989438.972 (18356 days + 8h 39m 28s)
+CLOCK_MONOTONIC: 56338.247 (15h 41m 8s)
+CLOCK_BOOTTIME : 76633.544 (21h 19m 23s)
+.EE
+.in
+.SH SEE ALSO
+.BR nsenter (1),
+.BR unshare (1),
+.BR clock_settime (2),
+.\" clone3() support for time namespaces is a work in progress
+.\" .BR clone3 (2),
+.BR setns (2),
+.BR unshare (2),
+.BR namespaces (7),
+.BR time (7)
diff --git a/man7/tis-620.7 b/man7/tis-620.7
new file mode 100644
index 0000000..cbd4cfe
--- /dev/null
+++ b/man7/tis-620.7
@@ -0,0 +1 @@
+.so man7/iso_8859-11.7
diff --git a/man7/udp.7 b/man7/udp.7
new file mode 100644
index 0000000..45c5cad
--- /dev/null
+++ b/man7/udp.7
@@ -0,0 +1,312 @@
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>.
+.\"
+.\" $Id: udp.7,v 1.7 2000/01/22 01:55:05 freitag Exp $
+.\"
+.TH udp 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+udp \- User Datagram Protocol for IPv4
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <netinet/in.h>
+.B #include <netinet/udp.h>
+.PP
+.IB udp_socket " = socket(AF_INET, SOCK_DGRAM, 0);"
+.fi
+.SH DESCRIPTION
+This is an implementation of the User Datagram Protocol
+described in RFC\ 768.
+It implements a connectionless, unreliable datagram packet service.
+Packets may be reordered or duplicated before they arrive.
+UDP generates and checks checksums to catch transmission errors.
+.PP
+When a UDP socket is created,
+its local and remote addresses are unspecified.
+Datagrams can be sent immediately using
+.BR sendto (2)
+or
+.BR sendmsg (2)
+with a valid destination address as an argument.
+When
+.BR connect (2)
+is called on the socket, the default destination address is set and
+datagrams can now be sent using
+.BR send (2)
+or
+.BR write (2)
+without specifying a destination address.
+It is still possible to send to other destinations by passing an
+address to
+.BR sendto (2)
+or
+.BR sendmsg (2).
+In order to receive packets, the socket can be bound to a local
+address first by using
+.BR bind (2).
+Otherwise, the socket layer will automatically assign
+a free local port out of the range defined by
+.I /proc/sys/net/ipv4/ip_local_port_range
+and bind the socket to
+.BR INADDR_ANY .
+.PP
+All receive operations return only one packet.
+When the packet is smaller than the passed buffer, only that much
+data is returned; when it is bigger, the packet is truncated and the
+.B MSG_TRUNC
+flag is set.
+.B MSG_WAITALL
+is not supported.
+.PP
+IP options may be sent or received using the socket options described in
+.BR ip (7).
+They are processed by the kernel only when the appropriate
+.I /proc
+parameter
+is enabled (but still passed to the user even when it is turned off).
+See
+.BR ip (7).
+.PP
+When the
+.B MSG_DONTROUTE
+flag is set on sending, the destination address must refer to a local
+interface address and the packet is sent only to that interface.
+.PP
+By default, Linux UDP does path MTU (Maximum Transmission Unit) discovery.
+This means the kernel
+will keep track of the MTU to a specific target IP address and return
+.B EMSGSIZE
+when a UDP packet write exceeds it.
+When this happens, the application should decrease the packet size.
+Path MTU discovery can be also turned off using the
+.B IP_MTU_DISCOVER
+socket option or the
+.I /proc/sys/net/ipv4/ip_no_pmtu_disc
+file; see
+.BR ip (7)
+for details.
+When turned off, UDP will fragment outgoing UDP packets
+that exceed the interface MTU.
+However, disabling it is not recommended
+for performance and reliability reasons.
+.SS Address format
+UDP uses the IPv4
+.I sockaddr_in
+address format described in
+.BR ip (7).
+.SS Error handling
+All fatal errors will be passed to the user as an error return even
+when the socket is not connected.
+This includes asynchronous errors
+received from the network.
+You may get an error for an earlier packet
+that was sent on the same socket.
+This behavior differs from many other BSD socket implementations
+which don't pass any errors unless the socket is connected.
+Linux's behavior is mandated by
+.BR RFC\ 1122 .
+.PP
+For compatibility with legacy code, in Linux 2.0 and 2.2
+it was possible to set the
+.B SO_BSDCOMPAT
+.B SOL_SOCKET
+option to receive remote errors only when the socket has been
+connected (except for
+.B EPROTO
+and
+.BR EMSGSIZE ).
+Locally generated errors are always passed.
+Support for this socket option was removed in later kernels; see
+.BR socket (7)
+for further information.
+.PP
+When the
+.B IP_RECVERR
+option is enabled, all errors are stored in the socket error queue,
+and can be received by
+.BR recvmsg (2)
+with the
+.B MSG_ERRQUEUE
+flag set.
+.SS /proc interfaces
+System-wide UDP parameter settings can be accessed by files in the directory
+.IR /proc/sys/net/ipv4/ .
+.TP
+.IR udp_mem " (since Linux 2.6.25)"
+This is a vector of three integers governing the number
+of pages allowed for queueing by all UDP sockets.
+.RS
+.TP
+.I min
+Below this number of pages, UDP is not bothered about its
+memory appetite.
+When the amount of memory allocated by UDP exceeds
+this number, UDP starts to moderate memory usage.
+.TP
+.I pressure
+This value was introduced to follow the format of
+.I tcp_mem
+(see
+.BR tcp (7)).
+.TP
+.I max
+Number of pages allowed for queueing by all UDP sockets.
+.RE
+.IP
+Defaults values for these three items are
+calculated at boot time from the amount of available memory.
+.TP
+.IR udp_rmem_min " (integer; default value: PAGE_SIZE; since Linux 2.6.25)"
+Minimal size, in bytes, of receive buffers used by UDP sockets in moderation.
+Each UDP socket is able to use the size for receiving data,
+even if total pages of UDP sockets exceed
+.I udp_mem
+pressure.
+.TP
+.IR udp_wmem_min " (integer; default value: PAGE_SIZE; since Linux 2.6.25)"
+Minimal size, in bytes, of send buffer used by UDP sockets in moderation.
+Each UDP socket is able to use the size for sending data,
+even if total pages of UDP sockets exceed
+.I udp_mem
+pressure.
+.SS Socket options
+To set or get a UDP socket option, call
+.BR getsockopt (2)
+to read or
+.BR setsockopt (2)
+to write the option with the option level argument set to
+.BR IPPROTO_UDP .
+Unless otherwise noted,
+.I optval
+is a pointer to an
+.IR int .
+.PP
+Following is a list of UDP-specific socket options.
+For details of some other socket options that are also applicable
+for UDP sockets, see
+.BR socket (7).
+.TP
+.BR UDP_CORK " (since Linux 2.5.44)"
+If this option is enabled, then all data output on this socket
+is accumulated into a single datagram that is transmitted when
+the option is disabled.
+This option should not be used in code intended to be
+portable.
+.\" FIXME document UDP_ENCAP (new in Linux 2.5.67)
+.\" From include/linux/udp.h:
+.\" UDP_ENCAP_ESPINUDP_NON_IKE draft-ietf-ipsec-nat-t-ike-00/01
+.\" UDP_ENCAP_ESPINUDP draft-ietf-ipsec-udp-encaps-06
+.\" UDP_ENCAP_L2TPINUDP rfc2661
+.\" FIXME Document UDP_NO_CHECK6_TX and UDP_NO_CHECK6_RX, added in Linux 3.16
+.TP
+.BR UDP_SEGMENT " (since Linux 4.18)"
+Enables UDP segmentation offload.
+Segmentation offload reduces
+.BR send (2)
+cost by transferring multiple datagrams worth of data
+as a single large packet through the kernel transmit path,
+even when that exceeds MTU.
+As late as possible,
+the large packet is split by segment size into a series of datagrams.
+This segmentation offload step is deferred to hardware if supported,
+else performed in software.
+This option takes a value in the range
+.RB [ 0 ,\~ USHRT_MAX ]
+that sets the segment size:
+the size of datagram payload,
+excluding the UDP header.
+The segment size must be chosen such that
+at most 64 datagrams are sent in a single call
+and that the datagrams after segmentation meet
+the same MTU rules that apply to datagrams sent without this option.
+Segmentation offload depends on checksum offload,
+as datagram checksums are computed after segmentation.
+The option may also be set for individual
+.BR sendmsg (2)
+calls by passing it as a
+.BR cmsg (3).
+A value of zero disables the feature.
+This option should not be used in code intended to be portable.
+.TP
+.BR UDP_GRO " (since Linux 5.0)"
+Enables UDP receive offload.
+If enabled,
+the socket may receive multiple datagrams worth of data
+as a single large buffer,
+together with a
+.BR cmsg (3)
+that holds the segment size.
+This option is the inverse of segmentation offload.
+It reduces receive cost by handling multiple datagrams worth of data
+as a single large packet in the kernel receive path,
+even when that exceeds MTU.
+This option should not be used in code intended to be portable.
+.SS Ioctls
+These ioctls can be accessed using
+.BR ioctl (2).
+The correct syntax is:
+.PP
+.RS
+.nf
+.BI int " value";
+.IB error " = ioctl(" udp_socket ", " ioctl_type ", &" value ");"
+.fi
+.RE
+.TP
+.BR FIONREAD " (" SIOCINQ )
+Gets a pointer to an integer as argument.
+Returns the size of the next pending datagram in the integer in bytes,
+or 0 when no datagram is pending.
+.B Warning:
+Using
+.BR FIONREAD ,
+it is impossible to distinguish the case where no datagram is pending
+from the case where the next pending datagram contains zero bytes of data.
+It is safer to use
+.BR select (2),
+.BR poll (2),
+or
+.BR epoll (7)
+to distinguish these cases.
+.\" See http://www.securiteam.com/unixfocus/5KP0I15IKO.html
+.\" "GNUnet DoS (UDP Socket Unreachable)", 14 May 2006
+.TP
+.BR TIOCOUTQ " (" SIOCOUTQ )
+Returns the number of data bytes in the local send queue.
+Supported only with Linux 2.4 and above.
+.PP
+In addition, all ioctls documented in
+.BR ip (7)
+and
+.BR socket (7)
+are supported.
+.SH ERRORS
+All errors documented for
+.BR socket (7)
+or
+.BR ip (7)
+may be returned by a send or receive on a UDP socket.
+.TP
+.B ECONNREFUSED
+No receiver was associated with the destination address.
+This might be caused by a previous packet sent over the socket.
+.SH VERSIONS
+.B IP_RECVERR
+is a new feature in Linux 2.2.
+.\" .SH CREDITS
+.\" This man page was written by Andi Kleen.
+.SH SEE ALSO
+.BR ip (7),
+.BR raw (7),
+.BR socket (7),
+.BR udplite (7)
+.PP
+The kernel source file
+.IR Documentation/networking/ip\-sysctl.txt .
+.PP
+RFC\ 768 for the User Datagram Protocol.
+.br
+RFC\ 1122 for the host requirements.
+.br
+RFC\ 1191 for a description of path MTU discovery.
diff --git a/man7/udplite.7 b/man7/udplite.7
new file mode 100644
index 0000000..36a2db8
--- /dev/null
+++ b/man7/udplite.7
@@ -0,0 +1,137 @@
+.\" Copyright (c) 2008 by Gerrit Renker <gerrit@erg.abdn.ac.uk>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" $Id: udplite.7,v 1.12 2008/07/23 15:22:22 gerrit Exp gerrit $
+.\"
+.TH udplite 7 2023-02-10 "Linux man-pages 6.05.01"
+.SH NAME
+udplite \- Lightweight User Datagram Protocol
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.\" FIXME . see #defines under `BUGS',
+.\" when glibc supports this, add
+.\" #include <netinet/udplite.h>
+.PP
+.B sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDPLITE);
+.fi
+.SH DESCRIPTION
+This is an implementation of the Lightweight User Datagram Protocol
+(UDP-Lite), as described in RFC\ 3828.
+.PP
+UDP-Lite is an extension of UDP (RFC\ 768) to support variable-length
+checksums.
+This has advantages for some types of multimedia transport that
+may be able to make use of slightly damaged datagrams,
+rather than having them discarded by lower-layer protocols.
+.PP
+The variable-length checksum coverage is set via a
+.BR setsockopt (2)
+option.
+If this option is not set, the only difference from UDP is
+in using a different IP protocol identifier (IANA number 136).
+.PP
+The UDP-Lite implementation is a full extension of
+.BR udp (7)\[em]that
+is, it shares the same API and API behavior, and in addition
+offers two socket options to control the checksum coverage.
+.SS Address format
+UDP-Litev4 uses the
+.I sockaddr_in
+address format described in
+.BR ip (7).
+UDP-Litev6 uses the
+.I sockaddr_in6
+address format described in
+.BR ipv6 (7).
+.SS Socket options
+To set or get a UDP-Lite socket option, call
+.BR getsockopt (2)
+to read or
+.BR setsockopt (2)
+to write the option with the option level argument set to
+.BR IPPROTO_UDPLITE .
+In addition, all
+.B IPPROTO_UDP
+socket options are valid on a UDP-Lite socket.
+See
+.BR udp (7)
+for more information.
+.PP
+The following two options are specific to UDP-Lite.
+.TP
+.B UDPLITE_SEND_CSCOV
+This option sets the sender checksum coverage and takes an
+.I int
+as argument, with a checksum coverage value in the range 0..2\[ha]16-1.
+.IP
+A value of 0 means that the entire datagram is always covered.
+Values from 1\-7 are illegal (RFC\ 3828, 3.1) and are rounded up to
+the minimum coverage of 8.
+.IP
+With regard to IPv6 jumbograms (RFC\ 2675), the UDP-Litev6 checksum
+coverage is limited to the first 2\[ha]16-1 octets, as per RFC\ 3828, 3.5.
+Higher values are therefore silently truncated to 2\[ha]16-1.
+If in doubt, the current coverage value can always be queried using
+.BR getsockopt (2).
+.TP
+.B UDPLITE_RECV_CSCOV
+This is the receiver-side analogue and uses the same argument format
+and value range as
+.BR UDPLITE_SEND_CSCOV .
+This option is not required to enable traffic with partial checksum
+coverage.
+Its function is that of a traffic filter: when enabled, it
+instructs the kernel to drop all packets which have a coverage
+.I less
+than the specified coverage value.
+.IP
+When the value of
+.B UDPLITE_RECV_CSCOV
+exceeds the actual packet coverage, incoming packets are silently dropped,
+but may generate a warning message in the system log.
+.\" SO_NO_CHECK exists and is supported by UDPv4, but is
+.\" commented out in socket(7), hence also commented out here
+.\".PP
+.\"Since UDP-Lite mandates checksums, checksumming can not be disabled
+.\"via the
+.\".B SO_NO_CHECK
+.\"option from
+.\".BR socket (7).
+.SH ERRORS
+All errors documented for
+.BR udp (7)
+may be returned.
+UDP-Lite does not add further errors.
+.SH FILES
+.TP
+.I /proc/net/snmp
+Basic UDP-Litev4 statistics counters.
+.TP
+.I /proc/net/snmp6
+Basic UDP-Litev6 statistics counters.
+.SH VERSIONS
+UDP-Litev4/v6 first appeared in Linux 2.6.20.
+.SH BUGS
+.\" FIXME . remove this section once glibc supports UDP-Lite
+Where glibc support is missing, the following definitions are needed:
+.PP
+.in +4n
+.EX
+#define IPPROTO_UDPLITE 136
+.\" The following two are defined in the kernel in linux/net/udplite.h
+#define UDPLITE_SEND_CSCOV 10
+#define UDPLITE_RECV_CSCOV 11
+.EE
+.in
+.SH SEE ALSO
+.BR ip (7),
+.BR ipv6 (7),
+.BR socket (7),
+.BR udp (7)
+.PP
+RFC\ 3828 for the Lightweight User Datagram Protocol (UDP-Lite).
+.PP
+.I Documentation/networking/udplite.txt
+in the Linux kernel source tree
diff --git a/man7/unicode.7 b/man7/unicode.7
new file mode 100644
index 0000000..f65a9b2
--- /dev/null
+++ b/man7/unicode.7
@@ -0,0 +1,246 @@
+.\" Copyright (C) Markus Kuhn, 1995, 2001
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" 1995-11-26 Markus Kuhn <mskuhn@cip.informatik.uni-erlangen.de>
+.\" First version written
+.\" 2001-05-11 Markus Kuhn <mgk25@cl.cam.ac.uk>
+.\" Update
+.\"
+.TH unicode 7 2023-03-12 "Linux man-pages 6.05.01"
+.SH NAME
+unicode \- universal character set
+.SH DESCRIPTION
+The international standard ISO/IEC 10646 defines the
+Universal Character Set (UCS).
+UCS contains all characters of all other character set standards.
+It also guarantees "round-trip compatibility";
+in other words,
+conversion tables can be built such that no information is lost
+when a string is converted from any other encoding to UCS and back.
+.PP
+UCS contains the characters required to represent practically all
+known languages.
+This includes not only the Latin, Greek, Cyrillic,
+Hebrew, Arabic, Armenian, and Georgian scripts, but also Chinese,
+Japanese and Korean Han ideographs as well as scripts such as
+Hiragana, Katakana, Hangul, Devanagari, Bengali, Gurmukhi, Gujarati,
+Oriya, Tamil, Telugu, Kannada, Malayalam, Thai, Lao, Khmer, Bopomofo,
+Tibetan, Runic, Ethiopic, Canadian Syllabics, Cherokee, Mongolian,
+Ogham, Myanmar, Sinhala, Thaana, Yi, and others.
+For scripts not yet
+covered, research on how to best encode them for computer usage is
+still going on and they will be added eventually.
+This might
+eventually include not only Hieroglyphs and various historic
+Indo-European languages, but even some selected artistic scripts such
+as Tengwar, Cirth, and Klingon.
+UCS also covers a large number of
+graphical, typographical, mathematical, and scientific symbols,
+including those provided by TeX, Postscript, APL, MS-DOS, MS-Windows,
+Macintosh, OCR fonts, as well as many word processing and publishing
+systems, and more are being added.
+.PP
+The UCS standard (ISO/IEC 10646) describes a
+31-bit character set architecture
+consisting of 128 24-bit
+.IR groups ,
+each divided into 256 16-bit
+.I planes
+made up of 256 8-bit
+.I rows
+with 256
+.I column
+positions, one for each character.
+Part 1 of the standard (ISO/IEC 10646-1)
+defines the first 65534 code positions (0x0000 to 0xfffd), which form
+the
+.I Basic Multilingual Plane
+(BMP), that is plane 0 in group 0.
+Part 2 of the standard (ISO/IEC 10646-2)
+adds characters to group 0 outside the BMP in several
+.I "supplementary planes"
+in the range 0x10000 to 0x10ffff.
+There are no plans to add characters
+beyond 0x10ffff to the standard, therefore of the entire code space,
+only a small fraction of group 0 will ever be actually used in the
+foreseeable future.
+The BMP contains all characters found in the
+commonly used other character sets.
+The supplemental planes added by
+ISO/IEC 10646-2 cover only more exotic characters for special scientific,
+dictionary printing, publishing industry, higher-level protocol and
+enthusiast needs.
+.PP
+The representation of each UCS character as a 2-byte word is referred
+to as the UCS-2 form (only for BMP characters),
+whereas UCS-4 is the representation of each character by a 4-byte word.
+In addition, there exist two encoding forms UTF-8
+for backward compatibility with ASCII processing software and UTF-16
+for the backward-compatible handling of non-BMP characters up to
+0x10ffff by UCS-2 software.
+.PP
+The UCS characters 0x0000 to 0x007f are identical to those of the
+classic US-ASCII
+character set and the characters in the range 0x0000 to 0x00ff
+are identical to those in
+ISO 8859-1 (Latin-1).
+.SS Combining characters
+Some code points in UCS
+have been assigned to
+.IR "combining characters" .
+These are similar to the nonspacing accent keys on a typewriter.
+A combining character just adds an accent to the previous character.
+The most important accented characters have codes of their own in UCS,
+however, the combining character mechanism allows us to add accents
+and other diacritical marks to any character.
+The combining characters
+always follow the character which they modify.
+For example, the German
+character Umlaut-A ("Latin capital letter A with diaeresis") can
+either be represented by the precomposed UCS code 0x00c4, or
+alternatively as the combination of a normal "Latin capital letter A"
+followed by a "combining diaeresis": 0x0041 0x0308.
+.PP
+Combining characters are essential for instance for encoding the Thai
+script or for mathematical typesetting and users of the International
+Phonetic Alphabet.
+.SS Implementation levels
+As not all systems are expected to support advanced mechanisms like
+combining characters, ISO/IEC 10646-1 specifies the following three
+.I implementation levels
+of UCS:
+.TP 0.9i
+Level 1
+Combining characters and Hangul Jamo
+(a variant encoding of the Korean script, where a Hangul syllable
+glyph is coded as a triplet or pair of vowel/consonant codes) are not
+supported.
+.TP
+Level 2
+In addition to level 1, combining characters are now allowed for some
+languages where they are essential (e.g., Thai, Lao, Hebrew,
+Arabic, Devanagari, Malayalam).
+.TP
+Level 3
+All UCS characters are supported.
+.PP
+The Unicode 3.0 Standard
+published by the Unicode Consortium
+contains exactly the UCS Basic Multilingual Plane
+at implementation level 3, as described in ISO/IEC 10646-1:2000.
+Unicode 3.1 added the supplemental planes of ISO/IEC 10646-2.
+The Unicode standard and
+technical reports published by the Unicode Consortium provide much
+additional information on the semantics and recommended usages of
+various characters.
+They provide guidelines and algorithms for
+editing, sorting, comparing, normalizing, converting, and displaying
+Unicode strings.
+.SS Unicode under Linux
+Under GNU/Linux, the C type
+.I wchar_t
+is a signed 32-bit integer type.
+Its values are always interpreted
+by the C library as UCS
+code values (in all locales), a convention that is signaled by the GNU
+C library to applications by defining the constant
+.B __STDC_ISO_10646__
+as specified in the ISO C99 standard.
+.PP
+UCS/Unicode can be used just like ASCII in input/output streams,
+terminal communication, plaintext files, filenames, and environment
+variables in the ASCII compatible UTF-8 multibyte encoding.
+To signal the use of UTF-8 as the character
+encoding to all applications, a suitable
+.I locale
+has to be selected via environment variables (e.g.,
+"LANG=en_GB.UTF-8").
+.PP
+The
+.B nl_langinfo(CODESET)
+function returns the name of the selected encoding.
+Library functions such as
+.BR wctomb (3)
+and
+.BR mbsrtowcs (3)
+can be used to transform the internal
+.I wchar_t
+characters and strings into the system character encoding and back
+and
+.BR wcwidth (3)
+tells how many positions (0\[en]2) the cursor is advanced by the
+output of a character.
+.SS Private Use Areas (PUA)
+In the Basic Multilingual Plane,
+the range 0xe000 to 0xf8ff will never be assigned to any characters by
+the standard and is reserved for private usage.
+For the Linux
+community, this private area has been subdivided further into the
+range 0xe000 to 0xefff which can be used individually by any end-user
+and the Linux zone in the range 0xf000 to 0xf8ff where extensions are
+coordinated among all Linux users.
+The registry of the characters
+assigned to the Linux zone is maintained by LANANA and the registry
+itself is
+.I Documentation/admin\-guide/unicode.rst
+in the Linux kernel sources
+.\" commit 9d85025b0418163fae079c9ba8f8445212de8568
+(or
+.I Documentation/unicode.txt
+before Linux 4.10).
+.PP
+Two other planes are reserved for private usage, plane 15
+(Supplementary Private Use Area-A, range 0xf0000 to 0xffffd)
+and plane 16 (Supplementary Private Use Area-B, range
+0x100000 to 0x10fffd).
+.SS Literature
+.IP \[bu] 3
+Information technology \[em] Universal Multiple-Octet Coded Character
+Set (UCS) \[em] Part 1: Architecture and Basic Multilingual Plane.
+International Standard ISO/IEC 10646-1, International Organization
+for Standardization, Geneva, 2000.
+.IP
+This is the official specification of UCS.
+Available from
+.UR http://www.iso.ch/
+.UE .
+.IP \[bu]
+The Unicode Standard, Version 3.0.
+The Unicode Consortium, Addison-Wesley,
+Reading, MA, 2000, ISBN 0-201-61633-5.
+.IP \[bu]
+S.\& Harbison, G.\& Steele. C: A Reference Manual. Fourth edition,
+Prentice Hall, Englewood Cliffs, 1995, ISBN 0-13-326224-3.
+.IP
+A good reference book about the C programming language.
+The fourth
+edition covers the 1994 Amendment 1 to the ISO C90 standard, which
+adds a large number of new C library functions for handling wide and
+multibyte character encodings, but it does not yet cover ISO C99,
+which improved wide and multibyte character support even further.
+.IP \[bu]
+Unicode Technical Reports.
+.RS
+.UR http://www.unicode.org\:/reports/
+.UE
+.RE
+.IP \[bu]
+Markus Kuhn: UTF-8 and Unicode FAQ for UNIX/Linux.
+.RS
+.UR http://www.cl.cam.ac.uk\:/\[ti]mgk25\:/unicode.html
+.UE
+.RE
+.IP \[bu]
+Bruno Haible: Unicode HOWTO.
+.RS
+.UR http://www.tldp.org\:/HOWTO\:/Unicode\-HOWTO.html
+.UE
+.RE
+.\" .SH AUTHOR
+.\" Markus Kuhn <mgk25@cl.cam.ac.uk>
+.SH SEE ALSO
+.BR locale (1),
+.BR setlocale (3),
+.BR charsets (7),
+.BR utf\-8 (7)
diff --git a/man7/units.7 b/man7/units.7
new file mode 100644
index 0000000..ca2bd2d
--- /dev/null
+++ b/man7/units.7
@@ -0,0 +1,108 @@
+'\" t
+.\" Copyright (C) 2001 Andries Brouwer <aeb@cwi.nl>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH units 7 2023-02-10 "Linux man-pages 6.05.01"
+.SH NAME
+units \- decimal and binary prefixes
+.SH DESCRIPTION
+.SS Decimal prefixes
+The SI system of units uses prefixes that indicate powers of ten.
+A kilometer is 1000 meter, and a megawatt is 1000000 watt.
+Below the standard prefixes.
+.RS
+.TS
+l l l.
+Prefix Name Value
+q quecto 10\[ha]\-30 = 0.000000000000000000000000000001
+r ronto 10\[ha]\-27 = 0.000000000000000000000000001
+y yocto 10\[ha]\-24 = 0.000000000000000000000001
+z zepto 10\[ha]\-21 = 0.000000000000000000001
+a atto 10\[ha]\-18 = 0.000000000000000001
+f femto 10\[ha]\-15 = 0.000000000000001
+p pico 10\[ha]\-12 = 0.000000000001
+n nano 10\[ha]\-9 = 0.000000001
+\[mc] micro 10\[ha]\-6 = 0.000001
+m milli 10\[ha]\-3 = 0.001
+c centi 10\[ha]\-2 = 0.01
+d deci 10\[ha]\-1 = 0.1
+da deka 10\[ha] 1 = 10
+h hecto 10\[ha] 2 = 100
+k kilo 10\[ha] 3 = 1000
+M mega 10\[ha] 6 = 1000000
+G giga 10\[ha] 9 = 1000000000
+T tera 10\[ha]12 = 1000000000000
+P peta 10\[ha]15 = 1000000000000000
+E exa 10\[ha]18 = 1000000000000000000
+Z zetta 10\[ha]21 = 1000000000000000000000
+Y yotta 10\[ha]24 = 1000000000000000000000000
+R ronna 10\[ha]27 = 1000000000000000000000000000
+Q quetta 10\[ha]30 = 1000000000000000000000000000000
+.TE
+.RE
+.PP
+The symbol for micro is the Greek letter mu, often written u
+in an ASCII context where this Greek letter is not available.
+.SS Binary prefixes
+The binary prefixes resemble the decimal ones,
+but have an additional \[aq]i\[aq]
+(and "Ki" starts with a capital \[aq]K\[aq]).
+The names are formed by taking the
+first syllable of the names of the decimal prefix with roughly the same
+size, followed by "bi" for "binary".
+.RS
+.TS
+l l l.
+Prefix Name Value
+Ki kibi 2\[ha]10 = 1024
+Mi mebi 2\[ha]20 = 1048576
+Gi gibi 2\[ha]30 = 1073741824
+Ti tebi 2\[ha]40 = 1099511627776
+Pi pebi 2\[ha]50 = 1125899906842624
+Ei exbi 2\[ha]60 = 1152921504606846976
+Zi zebi 2\[ha]70 = 1180591620717411303424
+Yi yobi 2\[ha]80 = 1208925819614629174706176
+.TE
+.RE
+.SS Discussion
+Before these binary prefixes were introduced, it was fairly
+common to use k=1000 and K=1024, just like b=bit, B=byte.
+Unfortunately, the M is capital already, and cannot be
+capitalized to indicate binary-ness.
+.PP
+At first that didn't matter too much, since memory modules
+and disks came in sizes that were powers of two, so everyone
+knew that in such contexts "kilobyte" and "megabyte" meant
+1024 and 1048576 bytes, respectively.
+What originally was a
+sloppy use of the prefixes "kilo" and "mega" started to become
+regarded as the "real true meaning" when computers were involved.
+But then disk technology changed, and disk sizes became arbitrary numbers.
+After a period of uncertainty all disk manufacturers settled on the
+standard, namely k=1000, M=1000\ k, G=1000\ M.
+.PP
+The situation was messy: in the 14k4 modems, k=1000; in the 1.44\ MB
+.\" also common: 14.4k modem
+diskettes, M=1024000; and so on.
+In 1998 the IEC approved the standard
+that defines the binary prefixes given above, enabling people
+to be precise and unambiguous.
+.PP
+Thus, today, MB = 1000000\ B and MiB = 1048576\ B.
+.PP
+In the free software world programs are slowly
+being changed to conform.
+When the Linux kernel boots and says
+.PP
+.in +4n
+.EX
+hda: 120064896 sectors (61473 MB) w/2048KiB Cache
+.EE
+.in
+.PP
+the MB are megabytes and the KiB are kibibytes.
+.SH SEE ALSO
+.UR https://www.bipm.org/\:documents/\:20126/\:41483022/\:SI\-Brochure\-9.pdf
+The International System of Units
+.UE .
diff --git a/man7/unix.7 b/man7/unix.7
new file mode 100644
index 0000000..cfa4188
--- /dev/null
+++ b/man7/unix.7
@@ -0,0 +1,1205 @@
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1999 Andi Kleen <ak@muc.de>,
+.\" Copyright (C) 2008-2014, Michael Kerrisk <mtk.manpages@gmail.com>,
+.\" and Copyright (C) 2016, Heinrich Schuchardt <xypron.glpk@gmx.de>
+.\"
+.\" Modified, 2003-12-02, Michael Kerrisk, <mtk.manpages@gmail.com>
+.\" Modified, 2003-09-23, Adam Langley
+.\" Modified, 2004-05-27, Michael Kerrisk, <mtk.manpages@gmail.com>
+.\" Added SOCK_SEQPACKET
+.\" 2008-05-27, mtk, Provide a clear description of the three types of
+.\" address that can appear in the sockaddr_un structure: pathname,
+.\" unnamed, and abstract.
+.\"
+.TH UNIX 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+unix \- sockets for local interprocess communication
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <sys/un.h>
+.PP
+.IB unix_socket " = socket(AF_UNIX, type, 0);"
+.IB error " = socketpair(AF_UNIX, type, 0, int *" sv ");"
+.fi
+.SH DESCRIPTION
+The
+.B AF_UNIX
+(also known as
+.BR AF_LOCAL )
+socket family is used to communicate between processes on the same machine
+efficiently.
+Traditionally, UNIX domain sockets can be either unnamed,
+or bound to a filesystem pathname (marked as being of type socket).
+Linux also supports an abstract namespace which is independent of the
+filesystem.
+.PP
+Valid socket types in the UNIX domain are:
+.BR SOCK_STREAM ,
+for a stream-oriented socket;
+.BR SOCK_DGRAM ,
+for a datagram-oriented socket that preserves message boundaries
+(as on most UNIX implementations, UNIX domain datagram
+sockets are always reliable and don't reorder datagrams);
+and (since Linux 2.6.4)
+.BR SOCK_SEQPACKET ,
+for a sequenced-packet socket that is connection-oriented,
+preserves message boundaries,
+and delivers messages in the order that they were sent.
+.PP
+UNIX domain sockets support passing file descriptors or process credentials
+to other processes using ancillary data.
+.SS Address format
+A UNIX domain socket address is represented in the following structure:
+.PP
+.in +4n
+.EX
+.\" #define UNIX_PATH_MAX 108
+.\"
+struct sockaddr_un {
+ sa_family_t sun_family; /* AF_UNIX */
+ char sun_path[108]; /* Pathname */
+};
+.EE
+.in
+.PP
+The
+.I sun_family
+field always contains
+.BR AF_UNIX .
+On Linux,
+.I sun_path
+is 108 bytes in size; see also BUGS, below.
+.PP
+Various systems calls (for example,
+.BR bind (2),
+.BR connect (2),
+and
+.BR sendto (2))
+take a
+.I sockaddr_un
+argument as input.
+Some other system calls (for example,
+.BR getsockname (2),
+.BR getpeername (2),
+.BR recvfrom (2),
+and
+.BR accept (2))
+return an argument of this type.
+.PP
+Three types of address are distinguished in the
+.I sockaddr_un
+structure:
+.TP
+pathname
+a UNIX domain socket can be bound to a null-terminated
+filesystem pathname using
+.BR bind (2).
+When the address of a pathname socket is returned
+(by one of the system calls noted above),
+its length is
+.IP
+.in +4n
+.EX
+offsetof(struct sockaddr_un, sun_path) + strlen(sun_path) + 1
+.EE
+.in
+.IP
+and
+.I sun_path
+contains the null-terminated pathname.
+(On Linux, the above
+.BR offsetof ()
+expression equates to the same value as
+.IR sizeof(sa_family_t) ,
+but some other implementations include other fields before
+.IR sun_path ,
+so the
+.BR offsetof ()
+expression more portably describes the size of the address structure.)
+.IP
+For further details of pathname sockets, see below.
+.TP
+unnamed
+A stream socket that has not been bound to a pathname using
+.BR bind (2)
+has no name.
+Likewise, the two sockets created by
+.BR socketpair (2)
+are unnamed.
+When the address of an unnamed socket is returned,
+its length is
+.IR "sizeof(sa_family_t)" ,
+and
+.I sun_path
+should not be inspected.
+.\" There is quite some variation across implementations: FreeBSD
+.\" says the length is 16 bytes, HP-UX 11 says it's zero bytes.
+.TP
+abstract
+an abstract socket address is distinguished (from a pathname socket)
+by the fact that
+.I sun_path[0]
+is a null byte (\[aq]\e0\[aq]).
+The socket's address in this namespace is given by the additional
+bytes in
+.I sun_path
+that are covered by the specified length of the address structure.
+(Null bytes in the name have no special significance.)
+The name has no connection with filesystem pathnames.
+When the address of an abstract socket is returned,
+the returned
+.I addrlen
+is greater than
+.I sizeof(sa_family_t)
+(i.e., greater than 2), and the name of the socket is contained in
+the first
+.I (addrlen \- sizeof(sa_family_t))
+bytes of
+.IR sun_path .
+.SS Pathname sockets
+When binding a socket to a pathname, a few rules should be observed
+for maximum portability and ease of coding:
+.IP \[bu] 3
+The pathname in
+.I sun_path
+should be null-terminated.
+.IP \[bu]
+The length of the pathname, including the terminating null byte,
+should not exceed the size of
+.IR sun_path .
+.IP \[bu]
+The
+.I addrlen
+argument that describes the enclosing
+.I sockaddr_un
+structure should have a value of at least:
+.IP
+.in +4n
+.EX
+offsetof(struct sockaddr_un, sun_path)+strlen(addr.sun_path)+1
+.EE
+.in
+.IP
+or, more simply,
+.I addrlen
+can be specified as
+.IR "sizeof(struct sockaddr_un)" .
+.PP
+There is some variation in how implementations handle UNIX domain
+socket addresses that do not follow the above rules.
+For example, some (but not all) implementations
+.\" Linux does this, including for the case where the supplied path
+.\" is 108 bytes
+append a null terminator if none is present in the supplied
+.IR sun_path .
+.PP
+When coding portable applications,
+keep in mind that some implementations
+.\" HP-UX
+have
+.I sun_path
+as short as 92 bytes.
+.\" Modern BSDs generally have 104, Tru64 and AIX have 104,
+.\" Solaris and Irix have 108
+.PP
+Various system calls
+.RB ( accept (2),
+.BR recvfrom (2),
+.BR getsockname (2),
+.BR getpeername (2))
+return socket address structures.
+When applied to UNIX domain sockets, the value-result
+.I addrlen
+argument supplied to the call should be initialized as above.
+Upon return, the argument is set to indicate the
+.I actual
+size of the address structure.
+The caller should check the value returned in this argument:
+if the output value exceeds the input value,
+then there is no guarantee that a null terminator is present in
+.IR sun_path .
+(See BUGS.)
+.\"
+.SS Pathname socket ownership and permissions
+In the Linux implementation,
+pathname sockets honor the permissions of the directory they are in.
+Creation of a new socket fails if the process does not have write and
+search (execute) permission on the directory in which the socket is created.
+.PP
+On Linux,
+connecting to a stream socket object requires write permission on that socket;
+sending a datagram to a datagram socket likewise
+requires write permission on that socket.
+POSIX does not make any statement about the effect of the permissions
+on a socket file, and on some systems (e.g., older BSDs),
+the socket permissions are ignored.
+Portable programs should not rely on
+this feature for security.
+.PP
+When creating a new socket, the owner and group of the socket file
+are set according to the usual rules.
+The socket file has all permissions enabled,
+other than those that are turned off by the process
+.BR umask (2).
+.PP
+The owner, group, and permissions of a pathname socket can be changed (using
+.BR chown (2)
+and
+.BR chmod (2)).
+.\" However, fchown() and fchmod() do not seem to have an effect
+.\"
+.SS Abstract sockets
+Socket permissions have no meaning for abstract sockets:
+the process
+.BR umask (2)
+has no effect when binding an abstract socket,
+and changing the ownership and permissions of the object (via
+.BR fchown (2)
+and
+.BR fchmod (2))
+has no effect on the accessibility of the socket.
+.PP
+Abstract sockets automatically disappear when all open references
+to the socket are closed.
+.PP
+The abstract socket namespace is a nonportable Linux extension.
+.\"
+.SS Socket options
+For historical reasons, these socket options are specified with a
+.B SOL_SOCKET
+type even though they are
+.B AF_UNIX
+specific.
+They can be set with
+.BR setsockopt (2)
+and read with
+.BR getsockopt (2)
+by specifying
+.B SOL_SOCKET
+as the socket family.
+.TP
+.B SO_PASSCRED
+Enabling this socket option causes receipt of the credentials of
+the sending process in an
+.B SCM_CREDENTIALS ancillary
+message in each subsequently received message.
+The returned credentials are those specified by the sender using
+.BR SCM_CREDENTIALS ,
+or a default that includes the sender's PID, real user ID, and real group ID,
+if the sender did not specify
+.B SCM_CREDENTIALS
+ancillary data.
+.IP
+When this option is set and the socket is not yet connected,
+a unique name in the abstract namespace will be generated automatically.
+.IP
+The value given as an argument to
+.BR setsockopt (2)
+and returned as the result of
+.BR getsockopt (2)
+is an integer boolean flag.
+.TP
+.B SO_PASSSEC
+Enables receiving of the SELinux security label of the peer socket
+in an ancillary message of type
+.B SCM_SECURITY
+(see below).
+.IP
+The value given as an argument to
+.BR setsockopt (2)
+and returned as the result of
+.BR getsockopt (2)
+is an integer boolean flag.
+.IP
+The
+.B SO_PASSSEC
+option is supported for UNIX domain datagram sockets
+.\" commit 877ce7c1b3afd69a9b1caeb1b9964c992641f52a
+since Linux 2.6.18;
+support for UNIX domain stream sockets was added
+.\" commit 37a9a8df8ce9de6ea73349c9ac8bdf6ba4ec4f70
+in Linux 4.2.
+.TP
+.B SO_PEEK_OFF
+See
+.BR socket (7).
+.TP
+.B SO_PEERCRED
+This read-only socket option returns the
+credentials of the peer process connected to this socket.
+The returned credentials are those that were in effect at the time
+of the call to
+.BR connect (2)
+or
+.BR socketpair (2).
+.IP
+The argument to
+.BR getsockopt (2)
+is a pointer to a
+.I ucred
+structure; define the
+.B _GNU_SOURCE
+feature test macro to obtain the definition of that structure from
+.IR <sys/socket.h> .
+.IP
+The use of this option is possible only for connected
+.B AF_UNIX
+stream sockets and for
+.B AF_UNIX
+stream and datagram socket pairs created using
+.BR socketpair (2).
+.TP
+.B SO_PEERSEC
+This read-only socket option returns the
+security context of the peer socket connected to this socket.
+By default, this will be the same as the security context of
+the process that created the peer socket unless overridden
+by the policy or by a process with the required permissions.
+.IP
+The argument to
+.BR getsockopt (2)
+is a pointer to a buffer of the specified length in bytes
+into which the security context string will be copied.
+If the buffer length is less than the length of the security
+context string, then
+.BR getsockopt (2)
+returns \-1, sets
+.I errno
+to
+.BR ERANGE ,
+and returns the required length via
+.IR optlen .
+The caller should allocate at least
+.B NAME_MAX
+bytes for the buffer initially, although this is not guaranteed
+to be sufficient.
+Resizing the buffer to the returned length
+and retrying may be necessary.
+.IP
+The security context string may include a terminating null character
+in the returned length, but is not guaranteed to do so: a security
+context "foo" might be represented as either {'f','o','o'} of length 3
+or {'f','o','o','\\0'} of length 4, which are considered to be
+interchangeable.
+The string is printable, does not contain non-terminating null characters,
+and is in an unspecified encoding (in particular, it
+is not guaranteed to be ASCII or UTF-8).
+.IP
+The use of this option for sockets in the
+.B AF_UNIX
+address family is supported since Linux 2.6.2 for connected stream sockets,
+and since Linux 4.18
+.\" commit 0b811db2cb2aabc910e53d34ebb95a15997c33e7
+also for stream and datagram socket pairs created using
+.BR socketpair (2).
+.\"
+.SS Autobind feature
+If a
+.BR bind (2)
+call specifies
+.I addrlen
+as
+.IR sizeof(sa_family_t) ,
+.\" i.e., sizeof(short)
+or the
+.B SO_PASSCRED
+socket option was specified for a socket that was
+not explicitly bound to an address,
+then the socket is autobound to an abstract address.
+The address consists of a null byte
+followed by 5 bytes in the character set
+.IR [0\-9a\-f] .
+Thus, there is a limit of 2\[ha]20 autobind addresses.
+(From Linux 2.1.15, when the autobind feature was added,
+8 bytes were used, and the limit was thus 2\[ha]32 autobind addresses.
+The change to 5 bytes came in Linux 2.3.15.)
+.SS Sockets API
+The following paragraphs describe domain-specific details and
+unsupported features of the sockets API for UNIX domain sockets on Linux.
+.PP
+UNIX domain sockets do not support the transmission of
+out-of-band data (the
+.B MSG_OOB
+flag for
+.BR send (2)
+and
+.BR recv (2)).
+.PP
+The
+.BR send (2)
+.B MSG_MORE
+flag is not supported by UNIX domain sockets.
+.PP
+Before Linux 3.4,
+.\" commit 9f6f9af7694ede6314bed281eec74d588ba9474f
+the use of
+.B MSG_TRUNC
+in the
+.I flags
+argument of
+.BR recv (2)
+was not supported by UNIX domain sockets.
+.PP
+The
+.B SO_SNDBUF
+socket option does have an effect for UNIX domain sockets, but the
+.B SO_RCVBUF
+option does not.
+For datagram sockets, the
+.B SO_SNDBUF
+value imposes an upper limit on the size of outgoing datagrams.
+This limit is calculated as the doubled (see
+.BR socket (7))
+option value less 32 bytes used for overhead.
+.SS Ancillary messages
+Ancillary data is sent and received using
+.BR sendmsg (2)
+and
+.BR recvmsg (2).
+For historical reasons, the ancillary message types listed below
+are specified with a
+.B SOL_SOCKET
+type even though they are
+.B AF_UNIX
+specific.
+To send them, set the
+.I cmsg_level
+field of the struct
+.I cmsghdr
+to
+.B SOL_SOCKET
+and the
+.I cmsg_type
+field to the type.
+For more information, see
+.BR cmsg (3).
+.TP
+.B SCM_RIGHTS
+Send or receive a set of open file descriptors from another process.
+The data portion contains an integer array of the file descriptors.
+.IP
+Commonly, this operation is referred to as "passing a file descriptor"
+to another process.
+However, more accurately,
+what is being passed is a reference to an open file description (see
+.BR open (2)),
+and in the receiving process it is likely that a different
+file descriptor number will be used.
+Semantically, this operation is equivalent to duplicating
+.RB ( dup (2))
+a file descriptor into the file descriptor table of another process.
+.IP
+If the buffer used to receive the ancillary data containing
+file descriptors is too small (or is absent),
+then the ancillary data is truncated (or discarded)
+and the excess file descriptors are automatically closed
+in the receiving process.
+.IP
+If the number of file descriptors received in the ancillary data would
+cause the process to exceed its
+.B RLIMIT_NOFILE
+resource limit (see
+.BR getrlimit (2)),
+the excess file descriptors are automatically closed
+in the receiving process.
+.IP
+The kernel constant
+.B SCM_MAX_FD
+defines a limit on the number of file descriptors in the array.
+Attempting to send an array larger than this limit causes
+.BR sendmsg (2)
+to fail with the error
+.BR EINVAL .
+.B SCM_MAX_FD
+has the value 253
+.\" commit bba14de98753cb6599a2dae0e520714b2153522d
+(or 255 before Linux 2.6.38).
+.TP
+.B SCM_CREDENTIALS
+Send or receive UNIX credentials.
+This can be used for authentication.
+The credentials are passed as a
+.I struct ucred
+ancillary message.
+This structure is defined in
+.I <sys/socket.h>
+as follows:
+.IP
+.in +4n
+.EX
+struct ucred {
+ pid_t pid; /* Process ID of the sending process */
+ uid_t uid; /* User ID of the sending process */
+ gid_t gid; /* Group ID of the sending process */
+};
+.EE
+.in
+.IP
+Since glibc 2.8, the
+.B _GNU_SOURCE
+feature test macro must be defined (before including
+.I any
+header files) in order to obtain the definition
+of this structure.
+.IP
+The credentials which the sender specifies are checked by the kernel.
+A privileged process is allowed to specify values that do not match its own.
+The sender must specify its own process ID (unless it has the capability
+.BR CAP_SYS_ADMIN ,
+in which case the PID of any existing process may be specified),
+its real user ID, effective user ID, or saved set-user-ID (unless it has
+.BR CAP_SETUID ),
+and its real group ID, effective group ID, or saved set-group-ID
+(unless it has
+.BR CAP_SETGID ).
+.IP
+To receive a
+.I struct ucred
+message, the
+.B SO_PASSCRED
+option must be enabled on the socket.
+.TP
+.B SCM_SECURITY
+Receive the SELinux security context (the security label)
+of the peer socket.
+The received ancillary data is a null-terminated string containing
+the security context.
+The receiver should allocate at least
+.B NAME_MAX
+bytes in the data portion of the ancillary message for this data.
+.IP
+To receive the security context, the
+.B SO_PASSSEC
+option must be enabled on the socket (see above).
+.PP
+When sending ancillary data with
+.BR sendmsg (2),
+only one item of each of the above types may be included in the sent message.
+.PP
+At least one byte of real data should be sent when sending ancillary data.
+On Linux, this is required to successfully send ancillary data over
+a UNIX domain stream socket.
+When sending ancillary data over a UNIX domain datagram socket,
+it is not necessary on Linux to send any accompanying real data.
+However, portable applications should also include at least one byte
+of real data when sending ancillary data over a datagram socket.
+.PP
+When receiving from a stream socket,
+ancillary data forms a kind of barrier for the received data.
+For example, suppose that the sender transmits as follows:
+.PP
+.RS
+.PD 0
+.IP (1) 5
+.BR sendmsg (2)
+of four bytes, with no ancillary data.
+.IP (2)
+.BR sendmsg (2)
+of one byte, with ancillary data.
+.IP (3)
+.BR sendmsg (2)
+of four bytes, with no ancillary data.
+.PD
+.RE
+.PP
+Suppose that the receiver now performs
+.BR recvmsg (2)
+calls each with a buffer size of 20 bytes.
+The first call will receive five bytes of data,
+along with the ancillary data sent by the second
+.BR sendmsg (2)
+call.
+The next call will receive the remaining four bytes of data.
+.PP
+If the space allocated for receiving incoming ancillary data is too small
+then the ancillary data is truncated to the number of headers
+that will fit in the supplied buffer (or, in the case of an
+.B SCM_RIGHTS
+file descriptor list, the list of file descriptors may be truncated).
+If no buffer is provided for incoming ancillary data (i.e., the
+.I msg_control
+field of the
+.I msghdr
+structure supplied to
+.BR recvmsg (2)
+is NULL),
+then the incoming ancillary data is discarded.
+In both of these cases, the
+.B MSG_CTRUNC
+flag will be set in the
+.I msg.msg_flags
+value returned by
+.BR recvmsg (2).
+.\"
+.SS Ioctls
+The following
+.BR ioctl (2)
+calls return information in
+.IR value .
+The correct syntax is:
+.PP
+.RS
+.nf
+.BI int " value";
+.IB error " = ioctl(" unix_socket ", " ioctl_type ", &" value ");"
+.fi
+.RE
+.PP
+.I ioctl_type
+can be:
+.TP
+.B SIOCINQ
+For
+.B SOCK_STREAM
+sockets, this call returns the number of unread bytes in the receive buffer.
+The socket must not be in LISTEN state, otherwise an error
+.RB ( EINVAL )
+is returned.
+.B SIOCINQ
+is defined in
+.IR <linux/sockios.h> .
+.\" FIXME . https://www.sourceware.org/bugzilla/show_bug.cgi?id=12002,
+.\" filed 2010-09-10, may cause SIOCINQ to be defined in glibc headers
+Alternatively,
+you can use the synonymous
+.BR FIONREAD ,
+defined in
+.IR <sys/ioctl.h> .
+.\" SIOCOUTQ also has an effect for UNIX domain sockets, but not
+.\" quite what userland might expect. It seems to return the number
+.\" of bytes allocated for buffers containing pending output.
+.\" That number is normally larger than the number of bytes of pending
+.\" output. Since this info is, from userland's point of view, imprecise,
+.\" and it may well change, probably best not to document this now.
+For
+.B SOCK_DGRAM
+sockets,
+the returned value is the same as
+for Internet domain datagram sockets;
+see
+.BR udp (7).
+.SH ERRORS
+.TP
+.B EADDRINUSE
+The specified local address is already in use or the filesystem socket
+object already exists.
+.TP
+.B EBADF
+This error can occur for
+.BR sendmsg (2)
+when sending a file descriptor as ancillary data over
+a UNIX domain socket (see the description of
+.BR SCM_RIGHTS ,
+above), and indicates that the file descriptor number that
+is being sent is not valid (e.g., it is not an open file descriptor).
+.TP
+.B ECONNREFUSED
+The remote address specified by
+.BR connect (2)
+was not a listening socket.
+This error can also occur if the target pathname is not a socket.
+.TP
+.B ECONNRESET
+Remote socket was unexpectedly closed.
+.TP
+.B EFAULT
+User memory address was not valid.
+.TP
+.B EINVAL
+Invalid argument passed.
+A common cause is that the value
+.B AF_UNIX
+was not specified in the
+.I sun_type
+field of passed addresses, or the socket was in an
+invalid state for the applied operation.
+.TP
+.B EISCONN
+.BR connect (2)
+called on an already connected socket or a target address was
+specified on a connected socket.
+.TP
+.B ENFILE
+The system-wide limit on the total number of open files has been reached.
+.TP
+.B ENOENT
+The pathname in the remote address specified to
+.BR connect (2)
+did not exist.
+.TP
+.B ENOMEM
+Out of memory.
+.TP
+.B ENOTCONN
+Socket operation needs a target address, but the socket is not connected.
+.TP
+.B EOPNOTSUPP
+Stream operation called on non-stream oriented socket or tried to
+use the out-of-band data option.
+.TP
+.B EPERM
+The sender passed invalid credentials in the
+.IR "struct ucred" .
+.TP
+.B EPIPE
+Remote socket was closed on a stream socket.
+If enabled, a
+.B SIGPIPE
+is sent as well.
+This can be avoided by passing the
+.B MSG_NOSIGNAL
+flag to
+.BR send (2)
+or
+.BR sendmsg (2).
+.TP
+.B EPROTONOSUPPORT
+Passed protocol is not
+.BR AF_UNIX .
+.TP
+.B EPROTOTYPE
+Remote socket does not match the local socket type
+.RB ( SOCK_DGRAM
+versus
+.BR SOCK_STREAM ).
+.TP
+.B ESOCKTNOSUPPORT
+Unknown socket type.
+.TP
+.B ESRCH
+While sending an ancillary message containing credentials
+.RB ( SCM_CREDENTIALS ),
+the caller specified a PID that does not match any existing process.
+.TP
+.B ETOOMANYREFS
+This error can occur for
+.BR sendmsg (2)
+when sending a file descriptor as ancillary data over
+a UNIX domain socket (see the description of
+.BR SCM_RIGHTS ,
+above).
+It occurs if the number of "in-flight" file descriptors exceeds the
+.B RLIMIT_NOFILE
+resource limit and the caller does not have the
+.B CAP_SYS_RESOURCE
+capability.
+An in-flight file descriptor is one that has been sent using
+.BR sendmsg (2)
+but has not yet been accepted in the recipient process using
+.BR recvmsg (2).
+.IP
+This error is diagnosed since mainline Linux 4.5
+(and in some earlier kernel versions where the fix has been backported).
+.\" commit 712f4aad406bb1ed67f3f98d04c044191f0ff593
+In earlier kernel versions,
+it was possible to place an unlimited number of file descriptors in flight,
+by sending each file descriptor with
+.BR sendmsg (2)
+and then closing the file descriptor so that it was not accounted against the
+.B RLIMIT_NOFILE
+resource limit.
+.PP
+Other errors can be generated by the generic socket layer or
+by the filesystem while generating a filesystem socket object.
+See the appropriate manual pages for more information.
+.SH VERSIONS
+.B SCM_CREDENTIALS
+and the abstract namespace were introduced with Linux 2.2 and should not
+be used in portable programs.
+(Some BSD-derived systems also support credential passing,
+but the implementation details differ.)
+.SH NOTES
+Binding to a socket with a filename creates a socket
+in the filesystem that must be deleted by the caller when it is no
+longer needed (using
+.BR unlink (2)).
+The usual UNIX close-behind semantics apply; the socket can be unlinked
+at any time and will be finally removed from the filesystem when the last
+reference to it is closed.
+.PP
+To pass file descriptors or credentials over a
+.B SOCK_STREAM
+socket, you must
+send or receive at least one byte of nonancillary data in the same
+.BR sendmsg (2)
+or
+.BR recvmsg (2)
+call.
+.PP
+UNIX domain stream sockets do not support the notion of out-of-band data.
+.\"
+.SH BUGS
+When binding a socket to an address,
+Linux is one of the implementations that appends a null terminator
+if none is supplied in
+.IR sun_path .
+In most cases this is unproblematic:
+when the socket address is retrieved,
+it will be one byte longer than that supplied when the socket was bound.
+However, there is one case where confusing behavior can result:
+if 108 non-null bytes are supplied when a socket is bound,
+then the addition of the null terminator takes the length of
+the pathname beyond
+.IR sizeof(sun_path) .
+Consequently, when retrieving the socket address
+(for example, via
+.BR accept (2)),
+.\" The behavior on Solaris is quite similar.
+if the input
+.I addrlen
+argument for the retrieving call is specified as
+.IR "sizeof(struct sockaddr_un)" ,
+then the returned address structure
+.I won't
+have a null terminator in
+.IR sun_path .
+.PP
+In addition, some implementations
+.\" i.e., traditional BSD
+don't require a null terminator when binding a socket (the
+.I addrlen
+argument is used to determine the length of
+.IR sun_path )
+and when the socket address is retrieved on these implementations,
+there is no null terminator in
+.IR sun_path .
+.PP
+Applications that retrieve socket addresses can (portably) code
+to handle the possibility that there is no null terminator in
+.I sun_path
+by respecting the fact that the number of valid bytes in the pathname is:
+.PP
+.in +4n
+.EX
+strnlen(addr.sun_path, addrlen \- offsetof(sockaddr_un, sun_path))
+.EE
+.in
+.\" The following patch to amend kernel behavior was rejected:
+.\" http://thread.gmane.org/gmane.linux.kernel.api/2437
+.\" Subject: [patch] Fix handling of overlength pathname in AF_UNIX sun_path
+.\" 2012-04-17
+.\" And there was a related discussion in the Austin list:
+.\" http://thread.gmane.org/gmane.comp.standards.posix.austin.general/5735
+.\" Subject: Having a sun_path with no null terminator
+.\" 2012-04-18
+.\"
+.\" FIXME . Track http://austingroupbugs.net/view.php?id=561
+.PP
+Alternatively, an application can retrieve
+the socket address by allocating a buffer of size
+.I "sizeof(struct sockaddr_un)+1"
+that is zeroed out before the retrieval.
+The retrieving call can specify
+.I addrlen
+as
+.IR "sizeof(struct sockaddr_un)" ,
+and the extra zero byte ensures that there will be
+a null terminator for the string returned in
+.IR sun_path :
+.PP
+.in +4n
+.EX
+void *addrp;
+\&
+addrlen = sizeof(struct sockaddr_un);
+addrp = malloc(addrlen + 1);
+if (addrp == NULL)
+ /* Handle error */ ;
+memset(addrp, 0, addrlen + 1);
+\&
+if (getsockname(sfd, (struct sockaddr *) addrp, &addrlen)) == \-1)
+ /* handle error */ ;
+\&
+printf("sun_path = %s\en", ((struct sockaddr_un *) addrp)\->sun_path);
+.EE
+.in
+.PP
+This sort of messiness can be avoided if it is guaranteed
+that the applications that
+.I create
+pathname sockets follow the rules outlined above under
+.IR "Pathname sockets" .
+.SH EXAMPLES
+The following code demonstrates the use of sequenced-packet
+sockets for local interprocess communication.
+It consists of two programs.
+The server program waits for a connection from the client program.
+The client sends each of its command-line arguments in separate messages.
+The server treats the incoming messages as integers and adds them up.
+The client sends the command string "END".
+The server sends back a message containing the sum of the client's integers.
+The client prints the sum and exits.
+The server waits for the next client to connect.
+To stop the server, the client is called with the command-line argument "DOWN".
+.PP
+The following output was recorded while running the server in the background
+and repeatedly executing the client.
+Execution of the server program ends when it receives the "DOWN" command.
+.SS Example output
+.in +4n
+.EX
+$ \fB./server &\fP
+[1] 25887
+$ \fB./client 3 4\fP
+Result = 7
+$ \fB./client 11 \-5\fP
+Result = 6
+$ \fB./client DOWN\fP
+Result = 0
+[1]+ Done ./server
+$
+.EE
+.in
+.SS Program source
+\&
+.EX
+/*
+ * File connection.h
+ */
+\&
+#define SOCKET_NAME "/tmp/9Lq7BNBnBycd6nxy.socket"
+#define BUFFER_SIZE 12
+\&
+/*
+ * File server.c
+ */
+\&
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include "connection.h"
+\&
+int
+main(int argc, char *argv[])
+{
+ struct sockaddr_un name;
+ int down_flag = 0;
+ int ret;
+ int connection_socket;
+ int data_socket;
+ int result;
+ char buffer[BUFFER_SIZE];
+\&
+ /* Create local socket. */
+\&
+ connection_socket = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+ if (connection_socket == \-1) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /*
+ * For portability clear the whole structure, since some
+ * implementations have additional (nonstandard) fields in
+ * the structure.
+ */
+\&
+ memset(&name, 0, sizeof(name));
+\&
+ /* Bind socket to socket name. */
+\&
+ name.sun_family = AF_UNIX;
+ strncpy(name.sun_path, SOCKET_NAME, sizeof(name.sun_path) \- 1);
+\&
+ ret = bind(connection_socket, (const struct sockaddr *) &name,
+ sizeof(name));
+ if (ret == \-1) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /*
+ * Prepare for accepting connections. The backlog size is set
+ * to 20. So while one request is being processed other requests
+ * can be waiting.
+ */
+\&
+ ret = listen(connection_socket, 20);
+ if (ret == \-1) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* This is the main loop for handling connections. */
+\&
+ for (;;) {
+\&
+ /* Wait for incoming connection. */
+\&
+ data_socket = accept(connection_socket, NULL, NULL);
+ if (data_socket == \-1) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+\&
+ result = 0;
+ for (;;) {
+\&
+ /* Wait for next data packet. */
+\&
+ ret = read(data_socket, buffer, sizeof(buffer));
+ if (ret == \-1) {
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Ensure buffer is 0\-terminated. */
+\&
+ buffer[sizeof(buffer) \- 1] = 0;
+\&
+ /* Handle commands. */
+\&
+ if (!strncmp(buffer, "DOWN", sizeof(buffer))) {
+ down_flag = 1;
+ break;
+ }
+\&
+ if (!strncmp(buffer, "END", sizeof(buffer))) {
+ break;
+ }
+\&
+ /* Add received summand. */
+\&
+ result += atoi(buffer);
+ }
+\&
+ /* Send result. */
+\&
+ sprintf(buffer, "%d", result);
+ ret = write(data_socket, buffer, sizeof(buffer));
+ if (ret == \-1) {
+ perror("write");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Close socket. */
+\&
+ close(data_socket);
+\&
+ /* Quit on DOWN command. */
+\&
+ if (down_flag) {
+ break;
+ }
+ }
+\&
+ close(connection_socket);
+\&
+ /* Unlink the socket. */
+\&
+ unlink(SOCKET_NAME);
+\&
+ exit(EXIT_SUCCESS);
+}
+\&
+/*
+ * File client.c
+ */
+\&
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include "connection.h"
+\&
+int
+main(int argc, char *argv[])
+{
+ struct sockaddr_un addr;
+ int ret;
+ int data_socket;
+ char buffer[BUFFER_SIZE];
+\&
+ /* Create local socket. */
+\&
+ data_socket = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+ if (data_socket == \-1) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /*
+ * For portability clear the whole structure, since some
+ * implementations have additional (nonstandard) fields in
+ * the structure.
+ */
+\&
+ memset(&addr, 0, sizeof(addr));
+\&
+ /* Connect socket to socket address. */
+\&
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, SOCKET_NAME, sizeof(addr.sun_path) \- 1);
+\&
+ ret = connect(data_socket, (const struct sockaddr *) &addr,
+ sizeof(addr));
+ if (ret == \-1) {
+ fprintf(stderr, "The server is down.\en");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Send arguments. */
+\&
+ for (size_t i = 1; i < argc; ++i) {
+ ret = write(data_socket, argv[i], strlen(argv[i]) + 1);
+ if (ret == \-1) {
+ perror("write");
+ break;
+ }
+ }
+\&
+ /* Request result. */
+\&
+ strcpy(buffer, "END");
+ ret = write(data_socket, buffer, strlen(buffer) + 1);
+ if (ret == \-1) {
+ perror("write");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Receive result. */
+\&
+ ret = read(data_socket, buffer, sizeof(buffer));
+ if (ret == \-1) {
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Ensure buffer is 0\-terminated. */
+\&
+ buffer[sizeof(buffer) \- 1] = 0;
+\&
+ printf("Result = %s\en", buffer);
+\&
+ /* Close socket. */
+\&
+ close(data_socket);
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.PP
+For examples of the use of
+.BR SCM_RIGHTS ,
+see
+.BR cmsg (3)
+and
+.BR seccomp_unotify (2).
+.SH SEE ALSO
+.BR recvmsg (2),
+.BR sendmsg (2),
+.BR socket (2),
+.BR socketpair (2),
+.BR cmsg (3),
+.BR capabilities (7),
+.BR credentials (7),
+.BR socket (7),
+.BR udp (7)
diff --git a/man7/uri.7 b/man7/uri.7
new file mode 100644
index 0000000..a571aec
--- /dev/null
+++ b/man7/uri.7
@@ -0,0 +1,761 @@
+.\" (C) Copyright 1999-2000 David A. Wheeler (dwheeler@dwheeler.com)
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\" Fragments of this document are directly derived from IETF standards.
+.\" For those fragments which are directly derived from such standards,
+.\" the following notice applies, which is the standard copyright and
+.\" rights announcement of The Internet Society:
+.\"
+.\" Copyright (C) The Internet Society (1998). All Rights Reserved.
+.\" This document and translations of it may be copied and furnished to
+.\" others, and derivative works that comment on or otherwise explain it
+.\" or assist in its implementation may be prepared, copied, published
+.\" and distributed, in whole or in part, without restriction of any
+.\" kind, provided that the above copyright notice and this paragraph are
+.\" included on all such copies and derivative works. However, this
+.\" document itself may not be modified in any way, such as by removing
+.\" the copyright notice or references to the Internet Society or other
+.\" Internet organizations, except as needed for the purpose of
+.\" developing Internet standards in which case the procedures for
+.\" copyrights defined in the Internet Standards process must be
+.\" followed, or as required to translate it into languages other than English.
+.\"
+.\" Modified Fri Jul 25 23:00:00 1999 by David A. Wheeler (dwheeler@dwheeler.com)
+.\" Modified Fri Aug 21 23:00:00 1999 by David A. Wheeler (dwheeler@dwheeler.com)
+.\" Modified Tue Mar 14 2000 by David A. Wheeler (dwheeler@dwheeler.com)
+.\"
+.TH uri 7 2023-04-30 "Linux man-pages 6.05.01"
+.SH NAME
+uri, url, urn \- uniform resource identifier (URI), including a URL or URN
+.SH SYNOPSIS
+.SY "\fIURI\fP \fR=\fP"
+.RI [\~ absoluteURI
+|
+.IR relativeURI \~]
+.RB [\~\[dq] # \[dq]\~\c
+.IR fragment \~]
+.YS
+.PP
+.SY "\fIabsoluteURI\fP \fR=\fP"
+.I scheme\~\c
+.RB \[dq] : \[dq]
+.RI (\~ hierarchical_part
+|
+.IR opaque_part \~)
+.YS
+.PP
+.SY "\fIrelativeURI\fP \fR=\fP"
+.RI (\~ net_path
+|
+.I absolute_path
+|
+.IR relative_path \~)
+.RB [\~\[dq] ? \[dq]\~\c
+.IR query \~]
+.YS
+.PP
+.SY "\fIscheme\fP \fR=\fP"
+.RB \[dq] http \[dq]
+|
+.RB \[dq] ftp \[dq]
+|
+.RB \[dq] gopher \[dq]
+|
+.RB \[dq] mailto \[dq]
+|
+.RB \[dq] news \[dq]
+|
+.RB \[dq] telnet \[dq]
+|
+.RB \[dq] file \[dq]
+|
+.RB \[dq] ftp \[dq]
+|
+.RB \[dq] man \[dq]
+|
+.RB \[dq] info \[dq]
+|
+.RB \[dq] whatis \[dq]
+|
+.RB \[dq] ldap \[dq]
+|
+.RB \[dq] wais \[dq]
+| \&...
+.YS
+.PP
+.SY "\fIhierarchical_part\fP \fR=\fP"
+.RI (\~ net_path
+|
+.IR absolute_path \~)
+.RB [\~\[dq] ? \[dq]\~\c
+.IR query \~]
+.YS
+.PP
+.SY "\fInet_path\fP \fR=\fP"
+.RB \[dq] // \[dq]\~\c
+.I authority
+.RI [\~ absolute_path \~]
+.YS
+.PP
+.SY "\fIabsolute_path\fP \fR=\fP"
+.RB \[dq] / \[dq]\~\c
+.I path_segments
+.YS
+.PP
+.SY "\fIrelative_path\fP \fR=\fP"
+.I relative_segment
+.RI [\~ absolute_path \~]
+.YS
+.SH DESCRIPTION
+A Uniform Resource Identifier (URI) is a short string of characters
+identifying an abstract or physical resource (for example, a web page).
+A Uniform Resource Locator (URL) is a URI
+that identifies a resource through its primary access
+mechanism (e.g., its network "location"), rather than
+by name or some other attribute of that resource.
+A Uniform Resource Name (URN) is a URI
+that must remain globally unique and persistent even when
+the resource ceases to exist or becomes unavailable.
+.PP
+URIs are the standard way to name hypertext link destinations
+for tools such as web browsers.
+The string "http://www.kernel.org" is a URL (and thus it
+is also a URI).
+Many people use the term URL loosely as a synonym for URI
+(though technically URLs are a subset of URIs).
+.PP
+URIs can be absolute or relative.
+An absolute identifier refers to a resource independent of
+context, while a relative
+identifier refers to a resource by describing the difference
+from the current context.
+Within a relative path reference, the complete path segments "." and
+".." have special meanings: "the current hierarchy level" and "the
+level above this hierarchy level", respectively, just like they do in
+UNIX-like systems.
+A path segment which contains a colon
+character can't be used as the first segment of a relative URI path
+(e.g., "this:that"), because it would be mistaken for a scheme name;
+precede such segments with ./ (e.g., "./this:that").
+Note that descendants of MS-DOS (e.g., Microsoft Windows) replace
+devicename colons with the vertical bar ("|") in URIs, so "C:" becomes "C|".
+.PP
+A fragment identifier,
+if included,
+refers to a particular named portion (fragment) of a resource;
+text after a \[aq]#\[aq] identifies the fragment.
+A URI beginning with \[aq]#\[aq]
+refers to that fragment in the current resource.
+.SS Usage
+There are many different URI schemes, each with specific
+additional rules and meanings, but they are intentionally made to be
+as similar as possible.
+For example, many URL schemes
+permit the authority to be the following format, called here an
+.I ip_server
+(square brackets show what's optional):
+.PP
+.IR "ip_server = " [ user " [ : " password " ] @ ] " host " [ : " port ]
+.PP
+This format allows you to optionally insert a username,
+a user plus password, and/or a port number.
+The
+.I host
+is the name of the host computer, either its name as determined by DNS
+or an IP address (numbers separated by periods).
+Thus the URI
+<http://fred:fredpassword@example.com:8080/>
+logs into a web server on host example.com
+as fred (using fredpassword) using port 8080.
+Avoid including a password in a URI if possible because of the many
+security risks of having a password written down.
+If the URL supplies a username but no password, and the remote
+server requests a password, the program interpreting the URL
+should request one from the user.
+.PP
+Here are some of the most common schemes in use on UNIX-like systems
+that are understood by many tools.
+Note that many tools using URIs also have internal schemes or specialized
+schemes; see those tools' documentation for information on those schemes.
+.PP
+.B "http \- Web (HTTP) server"
+.PP
+.RI http:// ip_server / path
+.br
+.RI http:// ip_server / path ? query
+.PP
+This is a URL accessing a web (HTTP) server.
+The default port is 80.
+If the path refers to a directory, the web server will choose what
+to return; usually if there is a file named "index.html" or "index.htm"
+its content is returned, otherwise, a list of the files in the current
+directory (with appropriate links) is generated and returned.
+An example is <http://lwn.net>.
+.PP
+A query can be given in the archaic "isindex" format, consisting of a
+word or phrase and not including an equal sign (=).
+A query can also be in the longer "GET" format, which has one or more
+query entries of the form
+.IR key = value
+separated by the ampersand character (&).
+Note that
+.I key
+can be repeated more than once, though it's up to the web server
+and its application programs to determine if there's any meaning to that.
+There is an unfortunate interaction with HTML/XML/SGML and
+the GET query format; when such URIs with more than one key
+are embedded in SGML/XML documents (including HTML), the ampersand
+(&) has to be rewritten as &amp;.
+Note that not all queries use this format; larger forms
+may be too long to store as a URI, so they use a different
+interaction mechanism (called POST) which does
+not include the data in the URI.
+See the Common Gateway Interface specification at
+.UR http://www.w3.org\:/CGI
+.UE
+for more information.
+.PP
+.B "ftp \- File Transfer Protocol (FTP)"
+.PP
+.RI ftp:// ip_server / path
+.PP
+This is a URL accessing a file through the file transfer protocol (FTP).
+The default port (for control) is 21.
+If no username is included, the username "anonymous" is supplied, and
+in that case many clients provide as the password the requestor's
+Internet email address.
+An example is
+<ftp://ftp.is.co.za/rfc/rfc1808.txt>.
+.PP
+.B "gopher \- Gopher server"
+.PP
+.RI gopher:// ip_server / "gophertype selector"
+.br
+.RI gopher:// ip_server / "gophertype selector" %09 search
+.br
+.RI gopher:// ip_server / "gophertype selector" %09 search %09 gopher+_string
+.br
+.PP
+The default gopher port is 70.
+.I gophertype
+is a single-character field to denote the
+Gopher type of the resource to
+which the URL refers.
+The entire path may also be empty, in
+which case the delimiting "/" is also optional and the gophertype
+defaults to "1".
+.PP
+.I selector
+is the Gopher selector string.
+In the Gopher protocol,
+Gopher selector strings are a sequence of octets which may contain
+any octets except 09 hexadecimal (US-ASCII HT or tab), 0A hexadecimal
+(US-ASCII character LF), and 0D (US-ASCII character CR).
+.PP
+.B "mailto \- Email address"
+.PP
+.RI mailto: email-address
+.PP
+This is an email address, usually of the form
+.IR name @ hostname .
+See
+.BR mailaddr (7)
+for more information on the correct format of an email address.
+Note that any % character must be rewritten as %25.
+An example is <mailto:dwheeler@dwheeler.com>.
+.PP
+.B "news \- Newsgroup or News message"
+.PP
+.RI news: newsgroup-name
+.br
+.RI news: message-id
+.PP
+A
+.I newsgroup-name
+is a period-delimited hierarchical name, such as
+"comp.infosystems.www.misc".
+If <newsgroup-name> is "*" (as in <news:*>), it is used to refer
+to "all available news groups".
+An example is <news:comp.lang.ada>.
+.PP
+A
+.I message-id
+corresponds to the Message-ID of
+.UR http://www.ietf.org\:/rfc\:/rfc1036.txt
+IETF RFC\ 1036,
+.UE
+without the enclosing "<"
+and ">"; it takes the form
+.IR unique @ full_domain_name .
+A message identifier may be distinguished from a news group name by the
+presence of the "@" character.
+.PP
+.B "telnet \- Telnet login"
+.PP
+.RI telnet:// ip_server /
+.PP
+The Telnet URL scheme is used to designate interactive text services that
+may be accessed by the Telnet protocol.
+The final "/" character may be omitted.
+The default port is 23.
+An example is <telnet://melvyl.ucop.edu/>.
+.PP
+.B "file \- Normal file"
+.PP
+.RI file:// ip_server / path_segments
+.br
+.RI file: path_segments
+.PP
+This represents a file or directory accessible locally.
+As a special case,
+.I ip_server
+can be the string "localhost" or the empty
+string; this is interpreted as "the machine from which the URL is
+being interpreted".
+If the path is to a directory, the viewer should display the
+directory's contents with links to each containee;
+not all viewers currently do this.
+KDE supports generated files through the URL <file:/cgi-bin>.
+If the given file isn't found, browser writers may want to try to expand
+the filename via filename globbing
+(see
+.BR glob (7)
+and
+.BR glob (3)).
+.PP
+The second format (e.g., <file:/etc/passwd>)
+is a correct format for referring to
+a local file.
+However, older standards did not permit this format,
+and some programs don't recognize this as a URI.
+A more portable syntax is to use an empty string as the server name,
+for example,
+<file:///etc/passwd>; this form does the same thing
+and is easily recognized by pattern matchers and older programs as a URI.
+Note that if you really mean to say "start from the current location", don't
+specify the scheme at all; use a relative address like <../test.txt>,
+which has the side-effect of being scheme-independent.
+An example of this scheme is <file:///etc/passwd>.
+.PP
+.B "man \- Man page documentation"
+.PP
+.RI man: command-name
+.br
+.RI man: command-name ( section )
+.PP
+This refers to local online manual (man) reference pages.
+The command name can optionally be followed by a
+parenthesis and section number; see
+.BR man (7)
+for more information on the meaning of the section numbers.
+This URI scheme is unique to UNIX-like systems (such as Linux)
+and is not currently registered by the IETF.
+An example is <man:ls(1)>.
+.PP
+.B "info \- Info page documentation"
+.PP
+.RI info: virtual-filename
+.br
+.RI info: virtual-filename # nodename
+.br
+.RI info:( virtual-filename )
+.br
+.RI info:( virtual-filename ) nodename
+.PP
+This scheme refers to online info reference pages (generated from
+texinfo files),
+a documentation format used by programs such as the GNU tools.
+This URI scheme is unique to UNIX-like systems (such as Linux)
+and is not currently registered by the IETF.
+As of this writing, GNOME and KDE differ in their URI syntax
+and do not accept the other's syntax.
+The first two formats are the GNOME format; in nodenames all spaces
+are written as underscores.
+The second two formats are the KDE format;
+spaces in nodenames must be written as spaces, even though this
+is forbidden by the URI standards.
+It's hoped that in the future most tools will understand all of these
+formats and will always accept underscores for spaces in nodenames.
+In both GNOME and KDE, if the form without the nodename is used the
+nodename is assumed to be "Top".
+Examples of the GNOME format are <info:gcc> and <info:gcc#G++_and_GCC>.
+Examples of the KDE format are <info:(gcc)> and <info:(gcc)G++ and GCC>.
+.PP
+.B "whatis \- Documentation search"
+.PP
+.RI whatis: string
+.PP
+This scheme searches the database of short (one-line) descriptions of
+commands and returns a list of descriptions containing that string.
+Only complete word matches are returned.
+See
+.BR whatis (1).
+This URI scheme is unique to UNIX-like systems (such as Linux)
+and is not currently registered by the IETF.
+.PP
+.B "ghelp \- GNOME help documentation"
+.PP
+.RI ghelp: name-of-application
+.PP
+This loads GNOME help for the given application.
+Note that not much documentation currently exists in this format.
+.PP
+.B "ldap \- Lightweight Directory Access Protocol"
+.PP
+.RI ldap:// hostport
+.br
+.RI ldap:// hostport /
+.br
+.RI ldap:// hostport / dn
+.br
+.RI ldap:// hostport / dn ? attributes
+.br
+.RI ldap:// hostport / dn ? attributes ? scope
+.br
+.RI ldap:// hostport / dn ? attributes ? scope ? filter
+.br
+.RI ldap:// hostport / dn ? attributes ? scope ? filter ? extensions
+.PP
+This scheme supports queries to the
+Lightweight Directory Access Protocol (LDAP), a protocol for querying
+a set of servers for hierarchically organized information
+(such as people and computing resources).
+See
+.UR http://www.ietf.org\:/rfc\:/rfc2255.txt
+RFC\ 2255
+.UE
+for more information on the LDAP URL scheme.
+The components of this URL are:
+.TP
+hostport
+the LDAP server to query, written as a hostname optionally followed by
+a colon and the port number.
+The default LDAP port is TCP port 389.
+If empty, the client determines which the LDAP server to use.
+.TP
+dn
+the LDAP Distinguished Name, which identifies
+the base object of the LDAP search (see
+.UR http://www.ietf.org\:/rfc\:/rfc2253.txt
+RFC\ 2253
+.UE
+section 3).
+.TP
+attributes
+a comma-separated list of attributes to be returned;
+see RFC\ 2251 section 4.1.5.
+If omitted, all attributes should be returned.
+.TP
+scope
+specifies the scope of the search, which can be one of
+"base" (for a base object search), "one" (for a one-level search),
+or "sub" (for a subtree search).
+If scope is omitted, "base" is assumed.
+.TP
+filter
+specifies the search filter (subset of entries
+to return).
+If omitted, all entries should be returned.
+See
+.UR http://www.ietf.org\:/rfc\:/rfc2254.txt
+RFC\ 2254
+.UE
+section 4.
+.TP
+extensions
+a comma-separated list of type=value
+pairs, where the =value portion may be omitted for options not
+requiring it.
+An extension prefixed with a \[aq]!\[aq] is critical
+(must be supported to be valid), otherwise it is noncritical (optional).
+.PP
+LDAP queries are easiest to explain by example.
+Here's a query that asks ldap.itd.umich.edu for information about
+the University of Michigan in the U.S.:
+.PP
+.nf
+ldap://ldap.itd.umich.edu/o=University%20of%20Michigan,c=US
+.fi
+.PP
+To just get its postal address attribute, request:
+.PP
+.nf
+ldap://ldap.itd.umich.edu/o=University%20of%20Michigan,c=US?postalAddress
+.fi
+.PP
+To ask a host.com at port 6666 for information about the person
+with common name (cn) "Babs Jensen" at University of Michigan, request:
+.PP
+.nf
+ldap://host.com:6666/o=University%20of%20Michigan,c=US??sub?(cn=Babs%20Jensen)
+.fi
+.PP
+.B "wais \- Wide Area Information Servers"
+.PP
+.RI wais:// hostport / database
+.br
+.RI wais:// hostport / database ? search
+.br
+.RI wais:// hostport / database / wtype / wpath
+.PP
+This scheme designates a WAIS database, search, or document
+(see
+.UR http://www.ietf.org\:/rfc\:/rfc1625.txt
+IETF RFC\ 1625
+.UE
+for more information on WAIS).
+Hostport is the hostname, optionally followed by a colon and port number
+(the default port number is 210).
+.PP
+The first form designates a WAIS database for searching.
+The second form designates a particular search of the WAIS database
+.IR database .
+The third form designates a particular document within a WAIS
+database to be retrieved.
+.I wtype
+is the WAIS designation of the type of the object and
+.I wpath
+is the WAIS document-id.
+.PP
+.B "other schemes"
+.PP
+There are many other URI schemes.
+Most tools that accept URIs support a set of internal URIs
+(e.g., Mozilla has the about: scheme for internal information,
+and the GNOME help browser has the toc: scheme for various starting
+locations).
+There are many schemes that have been defined but are not as widely
+used at the current time
+(e.g., prospero).
+The nntp: scheme is deprecated in favor of the news: scheme.
+URNs are to be supported by the urn: scheme, with a hierarchical name space
+(e.g., urn:ietf:... would identify IETF documents); at this time
+URNs are not widely implemented.
+Not all tools support all schemes.
+.SS Character encoding
+URIs use a limited number of characters so that they can be
+typed in and used in a variety of situations.
+.PP
+The following characters are reserved, that is, they may appear in a
+URI but their use is limited to their reserved purpose
+(conflicting data must be escaped before forming the URI):
+.IP
+.in +4n
+.EX
+; / ? : @ & = + $ ,
+.EE
+.in
+.PP
+Unreserved characters may be included in a URI.
+Unreserved characters
+include uppercase and lowercase Latin letters,
+decimal digits, and the following
+limited set of punctuation marks and symbols:
+.IP
+.in +4n
+.EX
+\- _ . ! \[ti] * ' ( )
+.EE
+.in
+.PP
+All other characters must be escaped.
+An escaped octet is encoded as a character triplet, consisting of the
+percent character "%" followed by the two hexadecimal digits
+representing the octet code (you can use uppercase or lowercase letters
+for the hexadecimal digits).
+For example, a blank space must be escaped
+as "%20", a tab character as "%09", and the "&" as "%26".
+Because the percent "%" character always has the reserved purpose of
+being the escape indicator, it must be escaped as "%25".
+It is common practice to escape space characters as the plus symbol (+)
+in query text; this practice isn't uniformly defined
+in the relevant RFCs (which recommend %20 instead) but any tool accepting
+URIs with query text should be prepared for them.
+A URI is always shown in its "escaped" form.
+.PP
+Unreserved characters can be escaped without changing the semantics
+of the URI, but this should not be done unless the URI is being used
+in a context that does not allow the unescaped character to appear.
+For example, "%7e" is sometimes used instead of "\[ti]" in an HTTP URL
+path, but the two are equivalent for an HTTP URL.
+.PP
+For URIs which must handle characters outside the US ASCII character set,
+the HTML 4.01 specification (section B.2) and
+IETF RFC\~3986 (last paragraph of section 2.5)
+recommend the following approach:
+.IP (1) 5
+translate the character sequences into UTF-8 (IETF RFC\~3629)\[em]see
+.BR utf\-8 (7)\[em]and
+then
+.IP (2)
+use the URI escaping mechanism, that is,
+use the %HH encoding for unsafe octets.
+.SS Writing a URI
+When written, URIs should be placed inside double quotes
+(e.g., "http://www.kernel.org"),
+enclosed in angle brackets (e.g., <http://lwn.net>),
+or placed on a line by themselves.
+A warning for those who use double-quotes:
+.B never
+move extraneous punctuation (such as the period ending a sentence or the
+comma in a list)
+inside a URI, since this will change the value of the URI.
+Instead, use angle brackets instead, or
+switch to a quoting system that never includes extraneous characters
+inside quotation marks.
+This latter system, called the 'new' or 'logical' quoting system by
+"Hart's Rules" and the "Oxford Dictionary for Writers and Editors",
+is preferred practice in Great Britain and in various European languages.
+Older documents suggested inserting the prefix "URL:"
+just before the URI, but this form has never caught on.
+.PP
+The URI syntax was designed to be unambiguous.
+However, as URIs have become commonplace, traditional media
+(television, radio, newspapers, billboards, etc.) have increasingly
+used abbreviated URI references consisting of
+only the authority and path portions of the identified resource
+(e.g., <www.w3.org/Addressing>).
+Such references are primarily
+intended for human interpretation rather than machine, with the
+assumption that context-based heuristics are sufficient to complete
+the URI (e.g., hostnames beginning with "www" are likely to have
+a URI prefix of "http://" and hostnames beginning with "ftp" likely
+to have a prefix of "ftp://").
+Many client implementations heuristically resolve these references.
+Such heuristics may
+change over time, particularly when new schemes are introduced.
+Since an abbreviated URI has the same syntax as a relative URL path,
+abbreviated URI references cannot be used where relative URIs are
+permitted, and can be used only when there is no defined base
+(such as in dialog boxes).
+Don't use abbreviated URIs as hypertext links inside a document;
+use the standard format as described here.
+.SH STANDARDS
+.UR http://www.ietf.org\:/rfc\:/rfc2396.txt
+(IETF RFC\ 2396)
+.UE ,
+.UR http://www.w3.org\:/TR\:/REC\-html40
+(HTML 4.0)
+.UE .
+.SH NOTES
+Any tool accepting URIs (e.g., a web browser) on a Linux system should
+be able to handle (directly or indirectly) all of the
+schemes described here, including the man: and info: schemes.
+Handling them by invoking some other program is
+fine and in fact encouraged.
+.PP
+Technically the fragment isn't part of the URI.
+.PP
+For information on how to embed URIs (including URLs) in a data format,
+see documentation on that format.
+HTML uses the format <A HREF="\fIuri\fP">
+.I text
+</A>.
+Texinfo files use the format @uref{\fIuri\fP}.
+Man and mdoc have the recently added UR macro, or just include the
+URI in the text (viewers should be able to detect :// as part of a URI).
+.PP
+The GNOME and KDE desktop environments currently vary in the URIs
+they accept, in particular in their respective help browsers.
+To list man pages, GNOME uses <toc:man> while KDE uses <man:(index)>, and
+to list info pages, GNOME uses <toc:info> while KDE uses <info:(dir)>
+(the author of this man page prefers the KDE approach here, though a more
+regular format would be even better).
+In general, KDE uses <file:/cgi-bin/> as a prefix to a set of generated
+files.
+KDE prefers documentation in HTML, accessed via the
+<file:/cgi-bin/helpindex>.
+GNOME prefers the ghelp scheme to store and find documentation.
+Neither browser handles file: references to directories at the time
+of this writing, making it difficult to refer to an entire directory with
+a browsable URI.
+As noted above, these environments differ in how they handle the
+info: scheme, probably the most important variation.
+It is expected that GNOME and KDE
+will converge to common URI formats, and a future
+version of this man page will describe the converged result.
+Efforts to aid this convergence are encouraged.
+.SS Security
+A URI does not in itself pose a security threat.
+There is no general guarantee that a URL, which at one time
+located a given resource, will continue to do so.
+Nor is there any
+guarantee that a URL will not locate a different resource at some
+later point in time; such a guarantee can be
+obtained only from the person(s) controlling that namespace and the
+resource in question.
+.PP
+It is sometimes possible to construct a URL such that an attempt to
+perform a seemingly harmless operation, such as the
+retrieval of an entity associated with the resource, will in fact
+cause a possibly damaging remote operation to occur.
+The unsafe URL
+is typically constructed by specifying a port number other than that
+reserved for the network protocol in question.
+The client unwittingly contacts a site that is in fact
+running a different protocol.
+The content of the URL contains instructions that, when
+interpreted according to this other protocol, cause an unexpected
+operation.
+An example has been the use of a gopher URL to cause an
+unintended or impersonating message to be sent via a SMTP server.
+.PP
+Caution should be used when using any URL that specifies a port
+number other than the default for the protocol, especially when it is
+a number within the reserved space.
+.PP
+Care should be taken when a URI contains escaped delimiters for a
+given protocol (for example, CR and LF characters for telnet
+protocols) that these are not unescaped before transmission.
+This might violate the protocol, but avoids the potential for such
+characters to be used to simulate an extra operation or parameter in
+that protocol, which might lead to an unexpected and possibly harmful
+remote operation to be performed.
+.PP
+It is clearly unwise to use a URI that contains a password which is
+intended to be secret.
+In particular, the use of a password within
+the "userinfo" component of a URI is strongly recommended against except
+in those rare cases where the "password" parameter is intended to be public.
+.SH BUGS
+Documentation may be placed in a variety of locations, so there
+currently isn't a good URI scheme for general online documentation
+in arbitrary formats.
+References of the form
+<file:///usr/doc/ZZZ> don't work because different distributions and
+local installation requirements may place the files in different
+directories
+(it may be in /usr/doc, or /usr/local/doc, or /usr/share,
+or somewhere else).
+Also, the directory ZZZ usually changes when a version changes
+(though filename globbing could partially overcome this).
+Finally, using the file: scheme doesn't easily support people
+who dynamically load documentation from the Internet (instead of
+loading the files onto a local filesystem).
+A future URI scheme may be added (e.g., "userdoc:") to permit
+programs to include cross-references to more detailed documentation
+without having to know the exact location of that documentation.
+Alternatively, a future version of the filesystem specification may
+specify file locations sufficiently so that the file: scheme will
+be able to locate documentation.
+.PP
+Many programs and file formats don't include a way to incorporate
+or implement links using URIs.
+.PP
+Many programs can't handle all of these different URI formats; there
+should be a standard mechanism to load an arbitrary URI that automatically
+detects the users' environment (e.g., text or graphics,
+desktop environment, local user preferences, and currently executing
+tools) and invokes the right tool for any URI.
+.\" .SH AUTHOR
+.\" David A. Wheeler (dwheeler@dwheeler.com) wrote this man page.
+.SH SEE ALSO
+.BR lynx (1),
+.BR man2html (1),
+.BR mailaddr (7),
+.BR utf\-8 (7)
+.PP
+.UR http://www.ietf.org\:/rfc\:/rfc2255.txt
+IETF RFC\ 2255
+.UE
diff --git a/man7/url.7 b/man7/url.7
new file mode 100644
index 0000000..079fb5e
--- /dev/null
+++ b/man7/url.7
@@ -0,0 +1 @@
+.so man7/uri.7
diff --git a/man7/urn.7 b/man7/urn.7
new file mode 100644
index 0000000..079fb5e
--- /dev/null
+++ b/man7/urn.7
@@ -0,0 +1 @@
+.so man7/uri.7
diff --git a/man7/user-keyring.7 b/man7/user-keyring.7
new file mode 100644
index 0000000..7468cc5
--- /dev/null
+++ b/man7/user-keyring.7
@@ -0,0 +1,81 @@
+.\" Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH user-keyring 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+user-keyring \- per-user keyring
+.SH DESCRIPTION
+The user keyring is a keyring used to anchor keys on behalf of a user.
+Each UID the kernel deals with has its own user keyring that
+is shared by all processes with that UID.
+The user keyring has a name (description) of the form
+.I _uid.<UID>
+where
+.I <UID>
+is the user ID of the corresponding user.
+.PP
+The user keyring is associated with the record that the kernel maintains
+for the UID.
+It comes into existence upon the first attempt to access either the
+user keyring, the
+.BR user\-session\-keyring (7),
+or the
+.BR session\-keyring (7).
+The keyring remains pinned in existence so long as there are processes
+running with that real UID or files opened by those processes remain open.
+(The keyring can also be pinned indefinitely by linking it
+into another keyring.)
+.PP
+Typically, the user keyring is created by
+.BR pam_keyinit (8)
+when a user logs in.
+.PP
+The user keyring is not searched by default by
+.BR request_key (2).
+When
+.BR pam_keyinit (8)
+creates a session keyring, it adds to it a link to the user
+keyring so that the user keyring will be searched when the session keyring is.
+.PP
+A special serial number value,
+.BR KEY_SPEC_USER_KEYRING ,
+is defined that can be used in lieu of the actual serial number of
+the calling process's user keyring.
+.PP
+From the
+.BR keyctl (1)
+utility, '\fB@u\fP' can be used instead of a numeric key ID in
+much the same way.
+.PP
+User keyrings are independent of
+.BR clone (2),
+.BR fork (2),
+.BR vfork (2),
+.BR execve (2),
+and
+.BR _exit (2)
+excepting that the keyring is destroyed when the UID record is destroyed when
+the last process pinning it exits.
+.PP
+If it is necessary for a key associated with a user to exist beyond the UID
+record being garbage collected\[em]for example, for use by a
+.BR cron (8)
+script\[em]then the
+.BR persistent\-keyring (7)
+should be used instead.
+.PP
+If a user keyring does not exist when it is accessed, it will be created.
+.SH SEE ALSO
+.ad l
+.nh
+.BR keyctl (1),
+.BR keyctl (3),
+.BR keyrings (7),
+.BR persistent\-keyring (7),
+.BR process\-keyring (7),
+.BR session\-keyring (7),
+.BR thread\-keyring (7),
+.BR user\-session\-keyring (7),
+.BR pam_keyinit (8)
diff --git a/man7/user-session-keyring.7 b/man7/user-session-keyring.7
new file mode 100644
index 0000000..3fc8795
--- /dev/null
+++ b/man7/user-session-keyring.7
@@ -0,0 +1,92 @@
+.\" Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+.\" Written by David Howells (dhowells@redhat.com)
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH user-session-keyring 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+user-session-keyring \- per-user default session keyring
+.SH DESCRIPTION
+The user session keyring is a keyring used to anchor keys on behalf of a user.
+Each UID the kernel deals with has its own user session keyring that
+is shared by all processes with that UID.
+The user session keyring has a name (description) of the form
+.I _uid_ses.<UID>
+where
+.I <UID>
+is the user ID of the corresponding user.
+.PP
+The user session keyring is associated with the record that
+the kernel maintains for the UID.
+It comes into existence upon the first attempt to access either the
+user session keyring, the
+.BR user\-keyring (7),
+or the
+.BR session\-keyring (7).
+.\" Davis Howells: the user and user-session keyrings are managed as a pair.
+The keyring remains pinned in existence so long as there are processes
+running with that real UID or files opened by those processes remain open.
+(The keyring can also be pinned indefinitely by linking it
+into another keyring.)
+.PP
+The user session keyring is created on demand when a thread requests it
+or when a thread asks for its
+.BR session\-keyring (7)
+and that keyring doesn't exist.
+In the latter case, a user session keyring will be created and,
+if the session keyring wasn't to be created,
+the user session keyring will be set as the process's actual session keyring.
+.PP
+The user session keyring is searched by
+.BR request_key (2)
+if the actual session keyring does not exist and is ignored otherwise.
+.PP
+A special serial number value,
+.BR KEY_SPEC_USER_SESSION_KEYRING ,
+is defined
+that can be used in lieu of the actual serial number of
+the calling process's user session keyring.
+.PP
+From the
+.BR keyctl (1)
+utility, '\fB@us\fP' can be used instead of a numeric key ID in
+much the same way.
+.PP
+User session keyrings are independent of
+.BR clone (2),
+.BR fork (2),
+.BR vfork (2),
+.BR execve (2),
+and
+.BR _exit (2)
+excepting that the keyring is destroyed when the UID record is destroyed
+when the last process pinning it exits.
+.PP
+If a user session keyring does not exist when it is accessed,
+it will be created.
+.PP
+Rather than relying on the user session keyring,
+it is strongly recommended\[em]especially if the process
+is running as root\[em]that a
+.BR session\-keyring (7)
+be set explicitly, for example by
+.BR pam_keyinit (8).
+.SH NOTES
+The user session keyring was added to support situations where
+a process doesn't have a session keyring,
+perhaps because it was created via a pathway that didn't involve PAM
+(e.g., perhaps it was a daemon started by
+.BR inetd (8)).
+In such a scenario, the user session keyring acts as a substitute for the
+.BR session\-keyring (7).
+.SH SEE ALSO
+.ad l
+.nh
+.BR keyctl (1),
+.BR keyctl (3),
+.BR keyrings (7),
+.BR persistent\-keyring (7),
+.BR process\-keyring (7),
+.BR session\-keyring (7),
+.BR thread\-keyring (7),
+.BR user\-keyring (7)
diff --git a/man7/user_namespaces.7 b/man7/user_namespaces.7
new file mode 100644
index 0000000..0c29f93
--- /dev/null
+++ b/man7/user_namespaces.7
@@ -0,0 +1,1469 @@
+.\" Copyright (c) 2013, 2014 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" and Copyright (c) 2012, 2014 by Eric W. Biederman <ebiederm@xmission.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH user_namespaces 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+user_namespaces \- overview of Linux user namespaces
+.SH DESCRIPTION
+For an overview of namespaces, see
+.BR namespaces (7).
+.PP
+User namespaces isolate security-related identifiers and attributes,
+in particular,
+user IDs and group IDs (see
+.BR credentials (7)),
+the root directory,
+keys (see
+.BR keyrings (7)),
+.\" FIXME: This page says very little about the interaction
+.\" of user namespaces and keys. Add something on this topic.
+and capabilities (see
+.BR capabilities (7)).
+A process's user and group IDs can be different
+inside and outside a user namespace.
+In particular,
+a process can have a normal unprivileged user ID outside a user namespace
+while at the same time having a user ID of 0 inside the namespace;
+in other words,
+the process has full privileges for operations inside the user namespace,
+but is unprivileged for operations outside the namespace.
+.\"
+.\" ============================================================
+.\"
+.SS Nested namespaces, namespace membership
+User namespaces can be nested;
+that is, each user namespace\[em]except the initial ("root")
+namespace\[em]has a parent user namespace,
+and can have zero or more child user namespaces.
+The parent user namespace is the user namespace
+of the process that creates the user namespace via a call to
+.BR unshare (2)
+or
+.BR clone (2)
+with the
+.B CLONE_NEWUSER
+flag.
+.PP
+The kernel imposes (since Linux 3.11) a limit of 32 nested levels of
+.\" commit 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8
+user namespaces.
+.\" FIXME Explain the rationale for this limit. (What is the rationale?)
+Calls to
+.BR unshare (2)
+or
+.BR clone (2)
+that would cause this limit to be exceeded fail with the error
+.BR EUSERS .
+.PP
+Each process is a member of exactly one user namespace.
+A process created via
+.BR fork (2)
+or
+.BR clone (2)
+without the
+.B CLONE_NEWUSER
+flag is a member of the same user namespace as its parent.
+A single-threaded process can join another user namespace with
+.BR setns (2)
+if it has the
+.B CAP_SYS_ADMIN
+in that namespace;
+upon doing so, it gains a full set of capabilities in that namespace.
+.PP
+A call to
+.BR clone (2)
+or
+.BR unshare (2)
+with the
+.B CLONE_NEWUSER
+flag makes the new child process (for
+.BR clone (2))
+or the caller (for
+.BR unshare (2))
+a member of the new user namespace created by the call.
+.PP
+The
+.B NS_GET_PARENT
+.BR ioctl (2)
+operation can be used to discover the parental relationship
+between user namespaces; see
+.BR ioctl_ns (2).
+.PP
+A task that changes one of its effective IDs
+will have its dumpability reset to the value in
+.IR /proc/sys/fs/suid_dumpable .
+This may affect the ownership of proc files of child processes
+and may thus cause the parent to lack the permissions
+to write to mapping files of child processes running in a new user namespace.
+In such cases making the parent process dumpable, using
+.B PR_SET_DUMPABLE
+in a call to
+.BR prctl (2),
+before creating a child process in a new user namespace
+may rectify this problem.
+See
+.BR prctl (2)
+and
+.BR proc (5)
+for details on how ownership is affected.
+.\"
+.\" ============================================================
+.\"
+.SS Capabilities
+The child process created by
+.BR clone (2)
+with the
+.B CLONE_NEWUSER
+flag starts out with a complete set
+of capabilities in the new user namespace.
+Likewise, a process that creates a new user namespace using
+.BR unshare (2)
+or joins an existing user namespace using
+.BR setns (2)
+gains a full set of capabilities in that namespace.
+On the other hand,
+that process has no capabilities in the parent (in the case of
+.BR clone (2))
+or previous (in the case of
+.BR unshare (2)
+and
+.BR setns (2))
+user namespace,
+even if the new namespace is created or joined by the root user
+(i.e., a process with user ID 0 in the root namespace).
+.PP
+Note that a call to
+.BR execve (2)
+will cause a process's capabilities to be recalculated in the usual way (see
+.BR capabilities (7)).
+Consequently,
+unless the process has a user ID of 0 within the namespace,
+or the executable file has a nonempty inheritable capabilities mask,
+the process will lose all capabilities.
+See the discussion of user and group ID mappings, below.
+.PP
+A call to
+.BR clone (2)
+or
+.BR unshare (2)
+using the
+.B CLONE_NEWUSER
+flag
+or a call to
+.BR setns (2)
+that moves the caller into another user namespace
+sets the "securebits" flags
+(see
+.BR capabilities (7))
+to their default values (all flags disabled) in the child (for
+.BR clone (2))
+or caller (for
+.BR unshare (2)
+or
+.BR setns (2)).
+Note that because the caller no longer has capabilities
+in its original user namespace after a call to
+.BR setns (2),
+it is not possible for a process to reset its "securebits" flags while
+retaining its user namespace membership by using a pair of
+.BR setns (2)
+calls to move to another user namespace and then return to
+its original user namespace.
+.PP
+The rules for determining whether or not a process has a capability
+in a particular user namespace are as follows:
+.IP \[bu] 3
+A process has a capability inside a user namespace
+if it is a member of that namespace and
+it has the capability in its effective capability set.
+A process can gain capabilities in its effective capability
+set in various ways.
+For example, it may execute a set-user-ID program or an
+executable with associated file capabilities.
+In addition,
+a process may gain capabilities via the effect of
+.BR clone (2),
+.BR unshare (2),
+or
+.BR setns (2),
+as already described.
+.\" In the 3.8 sources, see security/commoncap.c::cap_capable():
+.IP \[bu]
+If a process has a capability in a user namespace,
+then it has that capability in all child (and further removed descendant)
+namespaces as well.
+.IP \[bu]
+.\" * The owner of the user namespace in the parent of the
+.\" * user namespace has all caps.
+When a user namespace is created, the kernel records the effective
+user ID of the creating process as being the "owner" of the namespace.
+.\" (and likewise associates the effective group ID of the creating process
+.\" with the namespace).
+A process that resides
+in the parent of the user namespace
+.\" See kernel commit 520d9eabce18edfef76a60b7b839d54facafe1f9 for a fix
+.\" on this point
+and whose effective user ID matches the owner of the namespace
+has all capabilities in the namespace.
+.\" This includes the case where the process executes a set-user-ID
+.\" program that confers the effective UID of the creator of the namespace.
+By virtue of the previous rule,
+this means that the process has all capabilities in all
+further removed descendant user namespaces as well.
+The
+.B NS_GET_OWNER_UID
+.BR ioctl (2)
+operation can be used to discover the user ID of the owner of the namespace;
+see
+.BR ioctl_ns (2).
+.\"
+.\" ============================================================
+.\"
+.SS Effect of capabilities within a user namespace
+Having a capability inside a user namespace
+permits a process to perform operations (that require privilege)
+only on resources governed by that namespace.
+In other words, having a capability in a user namespace permits a process
+to perform privileged operations on resources that are governed by (nonuser)
+namespaces owned by (associated with) the user namespace
+(see the next subsection).
+.PP
+On the other hand, there are many privileged operations that affect
+resources that are not associated with any namespace type,
+for example, changing the system (i.e., calendar) time (governed by
+.BR CAP_SYS_TIME ),
+loading a kernel module (governed by
+.BR CAP_SYS_MODULE ),
+and creating a device (governed by
+.BR CAP_MKNOD ).
+Only a process with privileges in the
+.I initial
+user namespace can perform such operations.
+.PP
+Holding
+.B CAP_SYS_ADMIN
+within the user namespace that owns a process's mount namespace
+allows that process to create bind mounts
+and mount the following types of filesystems:
+.\" fs_flags = FS_USERNS_MOUNT in kernel sources
+.PP
+.RS 4
+.PD 0
+.IP \[bu] 3
+.I /proc
+(since Linux 3.8)
+.IP \[bu]
+.I /sys
+(since Linux 3.8)
+.IP \[bu]
+.I devpts
+(since Linux 3.9)
+.IP \[bu]
+.BR tmpfs (5)
+(since Linux 3.9)
+.IP \[bu]
+.I ramfs
+(since Linux 3.9)
+.IP \[bu]
+.I mqueue
+(since Linux 3.9)
+.IP \[bu]
+.I bpf
+.\" commit b2197755b2633e164a439682fb05a9b5ea48f706
+(since Linux 4.4)
+.IP \[bu]
+.I overlayfs
+.\" commit 92dbc9dedccb9759c7f9f2f0ae6242396376988f
+.\" commit 4cb2c00c43b3fe88b32f29df4f76da1b92c33224
+(since Linux 5.11)
+.PD
+.RE
+.PP
+Holding
+.B CAP_SYS_ADMIN
+within the user namespace that owns a process's cgroup namespace
+allows (since Linux 4.6)
+that process to the mount the cgroup version 2 filesystem and
+cgroup version 1 named hierarchies
+(i.e., cgroup filesystems mounted with the
+.I """none,name="""
+option).
+.PP
+Holding
+.B CAP_SYS_ADMIN
+within the user namespace that owns a process's PID namespace
+allows (since Linux 3.8)
+that process to mount
+.I /proc
+filesystems.
+.PP
+Note, however, that mounting block-based filesystems can be done
+only by a process that holds
+.B CAP_SYS_ADMIN
+in the initial user namespace.
+.\"
+.\" ============================================================
+.\"
+.SS Interaction of user namespaces and other types of namespaces
+Starting in Linux 3.8, unprivileged processes can create user namespaces,
+and the other types of namespaces can be created with just the
+.B CAP_SYS_ADMIN
+capability in the caller's user namespace.
+.PP
+When a nonuser namespace is created,
+it is owned by the user namespace in which the creating process
+was a member at the time of the creation of the namespace.
+Privileged operations on resources governed by the nonuser namespace
+require that the process has the necessary capabilities
+in the user namespace that owns the nonuser namespace.
+.PP
+If
+.B CLONE_NEWUSER
+is specified along with other
+.B CLONE_NEW*
+flags in a single
+.BR clone (2)
+or
+.BR unshare (2)
+call, the user namespace is guaranteed to be created first,
+giving the child
+.RB ( clone (2))
+or caller
+.RB ( unshare (2))
+privileges over the remaining namespaces created by the call.
+Thus, it is possible for an unprivileged caller to specify this combination
+of flags.
+.PP
+When a new namespace (other than a user namespace) is created via
+.BR clone (2)
+or
+.BR unshare (2),
+the kernel records the user namespace of the creating process as the owner of
+the new namespace.
+(This association can't be changed.)
+When a process in the new namespace subsequently performs
+privileged operations that operate on global
+resources isolated by the namespace,
+the permission checks are performed according to the process's capabilities
+in the user namespace that the kernel associated with the new namespace.
+For example, suppose that a process attempts to change the hostname
+.RB ( sethostname (2)),
+a resource governed by the UTS namespace.
+In this case,
+the kernel will determine which user namespace owns
+the process's UTS namespace, and check whether the process has the
+required capability
+.RB ( CAP_SYS_ADMIN )
+in that user namespace.
+.PP
+The
+.B NS_GET_USERNS
+.BR ioctl (2)
+operation can be used to discover the user namespace
+that owns a nonuser namespace; see
+.BR ioctl_ns (2).
+.\"
+.\" ============================================================
+.\"
+.SS User and group ID mappings: uid_map and gid_map
+When a user namespace is created,
+it starts out without a mapping of user IDs (group IDs)
+to the parent user namespace.
+The
+.IR /proc/ pid /uid_map
+and
+.IR /proc/ pid /gid_map
+files (available since Linux 3.5)
+.\" commit 22d917d80e842829d0ca0a561967d728eb1d6303
+expose the mappings for user and group IDs
+inside the user namespace for the process
+.IR pid .
+These files can be read to view the mappings in a user namespace and
+written to (once) to define the mappings.
+.PP
+The description in the following paragraphs explains the details for
+.IR uid_map ;
+.I gid_map
+is exactly the same,
+but each instance of "user ID" is replaced by "group ID".
+.PP
+The
+.I uid_map
+file exposes the mapping of user IDs from the user namespace
+of the process
+.I pid
+to the user namespace of the process that opened
+.I uid_map
+(but see a qualification to this point below).
+In other words, processes that are in different user namespaces
+will potentially see different values when reading from a particular
+.I uid_map
+file, depending on the user ID mappings for the user namespaces
+of the reading processes.
+.PP
+Each line in the
+.I uid_map
+file specifies a 1-to-1 mapping of a range of contiguous
+user IDs between two user namespaces.
+(When a user namespace is first created, this file is empty.)
+The specification in each line takes the form of
+three numbers delimited by white space.
+The first two numbers specify the starting user ID in
+each of the two user namespaces.
+The third number specifies the length of the mapped range.
+In detail, the fields are interpreted as follows:
+.IP (1) 5
+The start of the range of user IDs in
+the user namespace of the process
+.IR pid .
+.IP (2)
+The start of the range of user
+IDs to which the user IDs specified by field one map.
+How field two is interpreted depends on whether the process that opened
+.I uid_map
+and the process
+.I pid
+are in the same user namespace, as follows:
+.RS
+.IP (a) 5
+If the two processes are in different user namespaces:
+field two is the start of a range of
+user IDs in the user namespace of the process that opened
+.IR uid_map .
+.IP (b)
+If the two processes are in the same user namespace:
+field two is the start of the range of
+user IDs in the parent user namespace of the process
+.IR pid .
+This case enables the opener of
+.I uid_map
+(the common case here is opening
+.IR /proc/self/uid_map )
+to see the mapping of user IDs into the user namespace of the process
+that created this user namespace.
+.RE
+.IP (3)
+The length of the range of user IDs that is mapped between the two
+user namespaces.
+.PP
+System calls that return user IDs (group IDs)\[em]for example,
+.BR getuid (2),
+.BR getgid (2),
+and the credential fields in the structure returned by
+.BR stat (2)\[em]return
+the user ID (group ID) mapped into the caller's user namespace.
+.PP
+When a process accesses a file, its user and group IDs
+are mapped into the initial user namespace for the purpose of permission
+checking and assigning IDs when creating a file.
+When a process retrieves file user and group IDs via
+.BR stat (2),
+the IDs are mapped in the opposite direction,
+to produce values relative to the process user and group ID mappings.
+.PP
+The initial user namespace has no parent namespace,
+but, for consistency, the kernel provides dummy user and group
+ID mapping files for this namespace.
+Looking at the
+.I uid_map
+file
+.RI ( gid_map
+is the same) from a shell in the initial namespace shows:
+.PP
+.in +4n
+.EX
+$ \fBcat /proc/$$/uid_map\fP
+ 0 0 4294967295
+.EE
+.in
+.PP
+This mapping tells us
+that the range starting at user ID 0 in this namespace
+maps to a range starting at 0 in the (nonexistent) parent namespace,
+and the length of the range is the largest 32-bit unsigned integer.
+This leaves 4294967295 (the 32-bit signed \-1 value) unmapped.
+This is deliberate:
+.I (uid_t)\~\-1
+is used in several interfaces (e.g.,
+.BR setreuid (2))
+as a way to specify "no user ID".
+Leaving
+.I (uid_t)\~\-1
+unmapped and unusable guarantees that there will be no
+confusion when using these interfaces.
+.\"
+.\" ============================================================
+.\"
+.SS Defining user and group ID mappings: writing to uid_map and gid_map
+After the creation of a new user namespace, the
+.I uid_map
+file of
+.I one
+of the processes in the namespace may be written to
+.I once
+to define the mapping of user IDs in the new user namespace.
+An attempt to write more than once to a
+.I uid_map
+file in a user namespace fails with the error
+.BR EPERM .
+Similar rules apply for
+.I gid_map
+files.
+.PP
+The lines written to
+.I uid_map
+.RI ( gid_map )
+must conform to the following validity rules:
+.IP \[bu] 3
+The three fields must be valid numbers,
+and the last field must be greater than 0.
+.IP \[bu]
+Lines are terminated by newline characters.
+.IP \[bu]
+There is a limit on the number of lines in the file.
+In Linux 4.14 and earlier, this limit was (arbitrarily)
+.\" 5*12-byte records could fit in a 64B cache line
+set at 5 lines.
+Since Linux 4.15,
+.\" commit 6397fac4915ab3002dc15aae751455da1a852f25
+the limit is 340 lines.
+In addition, the number of bytes written to
+the file must be less than the system page size,
+and the write must be performed at the start of the file (i.e.,
+.BR lseek (2)
+and
+.BR pwrite (2)
+can't be used to write to nonzero offsets in the file).
+.IP \[bu]
+The range of user IDs (group IDs)
+specified in each line cannot overlap with the ranges
+in any other lines.
+In the initial implementation (Linux 3.8), this requirement was
+satisfied by a simplistic implementation that imposed the further
+requirement that
+the values in both field 1 and field 2 of successive lines must be
+in ascending numerical order,
+which prevented some otherwise valid maps from being created.
+Linux 3.9 and later
+.\" commit 0bd14b4fd72afd5df41e9fd59f356740f22fceba
+fix this limitation, allowing any valid set of nonoverlapping maps.
+.IP \[bu]
+At least one line must be written to the file.
+.PP
+Writes that violate the above rules fail with the error
+.BR EINVAL .
+.PP
+In order for a process to write to the
+.IR /proc/ pid /uid_map
+.RI ( /proc/ pid /gid_map )
+file, all of the following permission requirements must be met:
+.IP \[bu] 3
+The writing process must have the
+.B CAP_SETUID
+.RB ( CAP_SETGID )
+capability in the user namespace of the process
+.IR pid .
+.IP \[bu]
+The writing process must either be in the user namespace of the process
+.I pid
+or be in the parent user namespace of the process
+.IR pid .
+.IP \[bu]
+The mapped user IDs (group IDs) must in turn have a mapping
+in the parent user namespace.
+.IP \[bu]
+If updating
+.IR /proc/ pid /uid_map
+to create a mapping that maps UID 0 in the parent namespace,
+then one of the following must be true:
+.RS
+.IP (a) 5
+if writing process is in the parent user namespace,
+then it must have the
+.B CAP_SETFCAP
+capability in that user namespace; or
+.IP (b)
+if the writing process is in the child user namespace,
+then the process that created the user namespace must have had the
+.B CAP_SETFCAP
+capability when the namespace was created.
+.RE
+.IP
+This rule has been in place since
+.\" commit db2e718a47984b9d71ed890eb2ea36ecf150de18
+Linux 5.12.
+It eliminates an earlier security bug whereby
+a UID 0 process that lacks the
+.B CAP_SETFCAP
+capability,
+which is needed to create a binary with namespaced file capabilities
+(as described in
+.BR capabilities (7)),
+could nevertheless create such a binary,
+by the following steps:
+.RS
+.IP (1) 5
+Create a new user namespace with the identity mapping
+(i.e., UID 0 in the new user namespace maps to UID 0 in the parent namespace),
+so that UID 0 in both namespaces is equivalent to the same root user ID.
+.IP (2)
+Since the child process has the
+.B CAP_SETFCAP
+capability, it could create a binary with namespaced file capabilities
+that would then be effective in the parent user namespace
+(because the root user IDs are the same in the two namespaces).
+.RE
+.IP \[bu]
+One of the following two cases applies:
+.RS
+.IP (a) 5
+.I Either
+the writing process has the
+.B CAP_SETUID
+.RB ( CAP_SETGID )
+capability in the
+.I parent
+user namespace.
+.RS
+.IP \[bu] 3
+No further restrictions apply:
+the process can make mappings to arbitrary user IDs (group IDs)
+in the parent user namespace.
+.RE
+.IP (b)
+.I Or
+otherwise all of the following restrictions apply:
+.RS
+.IP \[bu] 3
+The data written to
+.I uid_map
+.RI ( gid_map )
+must consist of a single line that maps
+the writing process's effective user ID
+(group ID) in the parent user namespace to a user ID (group ID)
+in the user namespace.
+.IP \[bu]
+The writing process must have the same effective user ID as the process
+that created the user namespace.
+.IP \[bu]
+In the case of
+.IR gid_map ,
+use of the
+.BR setgroups (2)
+system call must first be denied by writing
+.RI \[dq] deny \[dq]
+to the
+.IR /proc/ pid /setgroups
+file (see below) before writing to
+.IR gid_map .
+.RE
+.RE
+.PP
+Writes that violate the above rules fail with the error
+.BR EPERM .
+.\"
+.\" ============================================================
+.\"
+.SS Project ID mappings: projid_map
+Similarly to user and group ID mappings,
+it is possible to create project ID mappings for a user namespace.
+(Project IDs are used for disk quotas; see
+.BR setquota (8)
+and
+.BR quotactl (2).)
+.PP
+Project ID mappings are defined by writing to the
+.IR /proc/ pid /projid_map
+file (present since
+.\" commit f76d207a66c3a53defea67e7d36c3eb1b7d6d61d
+Linux 3.7).
+.PP
+The validity rules for writing to the
+.IR /proc/ pid /projid_map
+file are as for writing to the
+.I uid_map
+file; violation of these rules causes
+.BR write (2)
+to fail with the error
+.BR EINVAL .
+.PP
+The permission rules for writing to the
+.IR /proc/ pid /projid_map
+file are as follows:
+.IP \[bu] 3
+The writing process must either be in the user namespace of the process
+.I pid
+or be in the parent user namespace of the process
+.IR pid .
+.IP \[bu]
+The mapped project IDs must in turn have a mapping
+in the parent user namespace.
+.PP
+Violation of these rules causes
+.BR write (2)
+to fail with the error
+.BR EPERM .
+.\"
+.\" ============================================================
+.\"
+.SS Interaction with system calls that change process UIDs or GIDs
+In a user namespace where the
+.I uid_map
+file has not been written, the system calls that change user IDs will fail.
+Similarly, if the
+.I gid_map
+file has not been written, the system calls that change group IDs will fail.
+After the
+.I uid_map
+and
+.I gid_map
+files have been written, only the mapped values may be used in
+system calls that change user and group IDs.
+.PP
+For user IDs, the relevant system calls include
+.BR setuid (2),
+.BR setfsuid (2),
+.BR setreuid (2),
+and
+.BR setresuid (2).
+For group IDs, the relevant system calls include
+.BR setgid (2),
+.BR setfsgid (2),
+.BR setregid (2),
+.BR setresgid (2),
+and
+.BR setgroups (2).
+.PP
+Writing
+.RI \[dq] deny \[dq]
+to the
+.IR /proc/ pid /setgroups
+file before writing to
+.IR /proc/ pid /gid_map
+.\" Things changed in Linux 3.19
+.\" commit 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8
+.\" commit 66d2f338ee4c449396b6f99f5e75cd18eb6df272
+.\" http://lwn.net/Articles/626665/
+will permanently disable
+.BR setgroups (2)
+in a user namespace and allow writing to
+.IR /proc/ pid /gid_map
+without having the
+.B CAP_SETGID
+capability in the parent user namespace.
+.\"
+.\" ============================================================
+.\"
+.SS The \fI/proc/\fPpid\fI/setgroups\fP file
+.\"
+.\" commit 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8
+.\" commit 66d2f338ee4c449396b6f99f5e75cd18eb6df272
+.\" http://lwn.net/Articles/626665/
+.\" http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-8989
+.\"
+The
+.IR /proc/ pid /setgroups
+file displays the string
+.RI \[dq] allow \[dq]
+if processes in the user namespace that contains the process
+.I pid
+are permitted to employ the
+.BR setgroups (2)
+system call; it displays
+.RI \[dq] deny \[dq]
+if
+.BR setgroups (2)
+is not permitted in that user namespace.
+Note that regardless of the value in the
+.IR /proc/ pid /setgroups
+file (and regardless of the process's capabilities), calls to
+.BR setgroups (2)
+are also not permitted if
+.IR /proc/ pid /gid_map
+has not yet been set.
+.PP
+A privileged process (one with the
+.B CAP_SYS_ADMIN
+capability in the namespace) may write either of the strings
+.RI \[dq] allow \[dq]
+or
+.RI \[dq] deny \[dq]
+to this file
+.I before
+writing a group ID mapping
+for this user namespace to the file
+.IR /proc/ pid /gid_map .
+Writing the string
+.RI \[dq] deny \[dq]
+prevents any process in the user namespace from employing
+.BR setgroups (2).
+.PP
+The essence of the restrictions described in the preceding
+paragraph is that it is permitted to write to
+.IR /proc/ pid /setgroups
+only so long as calling
+.BR setgroups (2)
+is disallowed because
+.IR /proc/ pid /gid_map
+has not been set.
+This ensures that a process cannot transition from a state where
+.BR setgroups (2)
+is allowed to a state where
+.BR setgroups (2)
+is denied;
+a process can transition only from
+.BR setgroups (2)
+being disallowed to
+.BR setgroups (2)
+being allowed.
+.PP
+The default value of this file in the initial user namespace is
+.RI \[dq] allow \[dq].
+.PP
+Once
+.IR /proc/ pid /gid_map
+has been written to
+(which has the effect of enabling
+.BR setgroups (2)
+in the user namespace),
+it is no longer possible to disallow
+.BR setgroups (2)
+by writing
+.RI \[dq] deny \[dq]
+to
+.IR /proc/ pid /setgroups
+(the write fails with the error
+.BR EPERM ).
+.PP
+A child user namespace inherits the
+.IR /proc/ pid /setgroups
+setting from its parent.
+.PP
+If the
+.I setgroups
+file has the value
+.RI \[dq] deny \[dq],
+then the
+.BR setgroups (2)
+system call can't subsequently be reenabled (by writing
+.RI \[dq] allow \[dq]
+to the file) in this user namespace.
+(Attempts to do so fail with the error
+.BR EPERM .)
+This restriction also propagates down to all child user namespaces of
+this user namespace.
+.PP
+The
+.IR /proc/ pid /setgroups
+file was added in Linux 3.19,
+but was backported to many earlier stable kernel series,
+because it addresses a security issue.
+The issue concerned files with permissions such as "rwx\-\-\-rwx".
+Such files give fewer permissions to "group" than they do to "other".
+This means that dropping groups using
+.BR setgroups (2)
+might allow a process file access that it did not formerly have.
+Before the existence of user namespaces this was not a concern,
+since only a privileged process (one with the
+.B CAP_SETGID
+capability) could call
+.BR setgroups (2).
+However, with the introduction of user namespaces,
+it became possible for an unprivileged process to create
+a new namespace in which the user had all privileges.
+This then allowed formerly unprivileged
+users to drop groups and thus gain file access
+that they did not previously have.
+The
+.IR /proc/ pid /setgroups
+file was added to address this security issue,
+by denying any pathway for an unprivileged process to drop groups with
+.BR setgroups (2).
+.\"
+.\" /proc/PID/setgroups
+.\" [allow == setgroups() is allowed, "deny" == setgroups() is disallowed]
+.\" * Can write if have CAP_SYS_ADMIN in NS
+.\" * Must write BEFORE writing to /proc/PID/gid_map
+.\"
+.\" setgroups()
+.\" * Must already have written to gid_map
+.\" * /proc/PID/setgroups must be "allow"
+.\"
+.\" /proc/PID/gid_map -- writing
+.\" * Must already have written "deny" to /proc/PID/setgroups
+.\"
+.\" ============================================================
+.\"
+.SS Unmapped user and group IDs
+There are various places where an unmapped user ID (group ID)
+may be exposed to user space.
+For example, the first process in a new user namespace may call
+.BR getuid (2)
+before a user ID mapping has been defined for the namespace.
+In most such cases, an unmapped user ID is converted
+.\" from_kuid_munged(), from_kgid_munged()
+to the overflow user ID (group ID);
+the default value for the overflow user ID (group ID) is 65534.
+See the descriptions of
+.I /proc/sys/kernel/overflowuid
+and
+.I /proc/sys/kernel/overflowgid
+in
+.BR proc (5).
+.PP
+The cases where unmapped IDs are mapped in this fashion include
+system calls that return user IDs
+.RB ( getuid (2),
+.BR getgid (2),
+and similar),
+credentials passed over a UNIX domain socket,
+.\" also SO_PEERCRED
+credentials returned by
+.BR stat (2),
+.BR waitid (2),
+and the System V IPC "ctl"
+.B IPC_STAT
+operations,
+credentials exposed by
+.IR /proc/ pid /status
+and the files in
+.IR /proc/sysvipc/* ,
+credentials returned via the
+.I si_uid
+field in the
+.I siginfo_t
+received with a signal (see
+.BR sigaction (2)),
+credentials written to the process accounting file (see
+.BR acct (5)),
+and credentials returned with POSIX message queue notifications (see
+.BR mq_notify (3)).
+.PP
+There is one notable case where unmapped user and group IDs are
+.I not
+.\" from_kuid(), from_kgid()
+.\" Also F_GETOWNER_UIDS is an exception
+converted to the corresponding overflow ID value.
+When viewing a
+.I uid_map
+or
+.I gid_map
+file in which there is no mapping for the second field,
+that field is displayed as 4294967295 (\-1 as an unsigned integer).
+.\"
+.\" ============================================================
+.\"
+.SS Accessing files
+In order to determine permissions when an unprivileged process accesses a file,
+the process credentials (UID, GID) and the file credentials
+are in effect mapped back to what they would be in
+the initial user namespace and then compared to determine
+the permissions that the process has on the file.
+The same is also true of other objects that employ the credentials plus
+permissions mask accessibility model, such as System V IPC objects.
+.\"
+.\" ============================================================
+.\"
+.SS Operation of file-related capabilities
+Certain capabilities allow a process to bypass various
+kernel-enforced restrictions when performing operations on
+files owned by other users or groups.
+These capabilities are:
+.BR CAP_CHOWN ,
+.BR CAP_DAC_OVERRIDE ,
+.BR CAP_DAC_READ_SEARCH ,
+.BR CAP_FOWNER ,
+and
+.BR CAP_FSETID .
+.PP
+Within a user namespace,
+these capabilities allow a process to bypass the rules
+if the process has the relevant capability over the file,
+meaning that:
+.IP \[bu] 3
+the process has the relevant effective capability in its user namespace; and
+.IP \[bu]
+the file's user ID and group ID both have valid mappings
+in the user namespace.
+.PP
+The
+.B CAP_FOWNER
+capability is treated somewhat exceptionally:
+.\" These are the checks performed by the kernel function
+.\" inode_owner_or_capable(). There is one exception to the exception:
+.\" overriding the directory sticky permission bit requires that
+.\" the file has a valid mapping for both its UID and GID.
+it allows a process to bypass the corresponding rules so long as
+at least the file's user ID has a mapping in the user namespace
+(i.e., the file's group ID does not need to have a valid mapping).
+.\"
+.\" ============================================================
+.\"
+.SS Set-user-ID and set-group-ID programs
+When a process inside a user namespace executes
+a set-user-ID (set-group-ID) program,
+the process's effective user (group) ID inside the namespace is changed
+to whatever value is mapped for the user (group) ID of the file.
+However, if either the user
+.I or
+the group ID of the file has no mapping inside the namespace,
+the set-user-ID (set-group-ID) bit is silently ignored:
+the new program is executed,
+but the process's effective user (group) ID is left unchanged.
+(This mirrors the semantics of executing a set-user-ID or set-group-ID
+program that resides on a filesystem that was mounted with the
+.B MS_NOSUID
+flag, as described in
+.BR mount (2).)
+.\"
+.\" ============================================================
+.\"
+.SS Miscellaneous
+When a process's user and group IDs are passed over a UNIX domain socket
+to a process in a different user namespace (see the description of
+.B SCM_CREDENTIALS
+in
+.BR unix (7)),
+they are translated into the corresponding values as per the
+receiving process's user and group ID mappings.
+.\"
+.SH STANDARDS
+Linux.
+.\"
+.SH NOTES
+Over the years, there have been a lot of features that have been added
+to the Linux kernel that have been made available only to privileged users
+because of their potential to confuse set-user-ID-root applications.
+In general, it becomes safe to allow the root user in a user namespace to
+use those features because it is impossible, while in a user namespace,
+to gain more privilege than the root user of a user namespace has.
+.\"
+.\" ============================================================
+.\"
+.SS Global root
+The term "global root" is sometimes used as a shorthand for
+user ID 0 in the initial user namespace.
+.\"
+.\" ============================================================
+.\"
+.SS Availability
+Use of user namespaces requires a kernel that is configured with the
+.B CONFIG_USER_NS
+option.
+User namespaces require support in a range of subsystems across
+the kernel.
+When an unsupported subsystem is configured into the kernel,
+it is not possible to configure user namespaces support.
+.PP
+As at Linux 3.8, most relevant subsystems supported user namespaces,
+but a number of filesystems did not have the infrastructure needed
+to map user and group IDs between user namespaces.
+Linux 3.9 added the required infrastructure support for many of
+the remaining unsupported filesystems
+(Plan 9 (9P), Andrew File System (AFS), Ceph, CIFS, CODA, NFS, and OCFS2).
+Linux 3.12 added support for the last of the unsupported major filesystems,
+.\" commit d6970d4b726cea6d7a9bc4120814f95c09571fc3
+XFS.
+.\"
+.SH EXAMPLES
+The program below is designed to allow experimenting with
+user namespaces, as well as other types of namespaces.
+It creates namespaces as specified by command-line options and then executes
+a command inside those namespaces.
+The comments and
+.IR usage ()
+function inside the program provide a full explanation of the program.
+The following shell session demonstrates its use.
+.PP
+First, we look at the run-time environment:
+.PP
+.in +4n
+.EX
+$ \fBuname \-rs\fP # Need Linux 3.8 or later
+Linux 3.8.0
+$ \fBid \-u\fP # Running as unprivileged user
+1000
+$ \fBid \-g\fP
+1000
+.EE
+.in
+.PP
+Now start a new shell in new user
+.RI ( \-U ),
+mount
+.RI ( \-m ),
+and PID
+.RI ( \-p )
+namespaces, with user ID
+.RI ( \-M )
+and group ID
+.RI ( \-G )
+1000 mapped to 0 inside the user namespace:
+.PP
+.in +4n
+.EX
+$ \fB./userns_child_exec \-p \-m \-U \-M \[aq]0 1000 1\[aq] \-G \[aq]0 1000 1\[aq] bash\fP
+.EE
+.in
+.PP
+The shell has PID 1, because it is the first process in the new
+PID namespace:
+.PP
+.in +4n
+.EX
+bash$ \fBecho $$\fP
+1
+.EE
+.in
+.PP
+Mounting a new
+.I /proc
+filesystem and listing all of the processes visible
+in the new PID namespace shows that the shell can't see
+any processes outside the PID namespace:
+.PP
+.in +4n
+.EX
+bash$ \fBmount \-t proc proc /proc\fP
+bash$ \fBps ax\fP
+ PID TTY STAT TIME COMMAND
+ 1 pts/3 S 0:00 bash
+ 22 pts/3 R+ 0:00 ps ax
+.EE
+.in
+.PP
+Inside the user namespace, the shell has user and group ID 0,
+and a full set of permitted and effective capabilities:
+.PP
+.in +4n
+.EX
+bash$ \fBcat /proc/$$/status | egrep \[aq]\[ha][UG]id\[aq]\fP
+Uid: 0 0 0 0
+Gid: 0 0 0 0
+bash$ \fBcat /proc/$$/status | egrep \[aq]\[ha]Cap(Prm|Inh|Eff)\[aq]\fP
+CapInh: 0000000000000000
+CapPrm: 0000001fffffffff
+CapEff: 0000001fffffffff
+.EE
+.in
+.SS Program source
+\&
+.EX
+/* userns_child_exec.c
+\&
+ Licensed under GNU General Public License v2 or later
+\&
+ Create a child process that executes a shell command in new
+ namespace(s); allow UID and GID mappings to be specified when
+ creating a user namespace.
+*/
+#define _GNU_SOURCE
+#include <err.h>
+#include <sched.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+\&
+struct child_args {
+ char **argv; /* Command to be executed by child, with args */
+ int pipe_fd[2]; /* Pipe used to synchronize parent and child */
+};
+\&
+static int verbose;
+\&
+static void
+usage(char *pname)
+{
+ fprintf(stderr, "Usage: %s [options] cmd [arg...]\en\en", pname);
+ fprintf(stderr, "Create a child process that executes a shell "
+ "command in a new user namespace,\en"
+ "and possibly also other new namespace(s).\en\en");
+ fprintf(stderr, "Options can be:\en\en");
+#define fpe(str) fprintf(stderr, " %s", str);
+ fpe("\-i New IPC namespace\en");
+ fpe("\-m New mount namespace\en");
+ fpe("\-n New network namespace\en");
+ fpe("\-p New PID namespace\en");
+ fpe("\-u New UTS namespace\en");
+ fpe("\-U New user namespace\en");
+ fpe("\-M uid_map Specify UID map for user namespace\en");
+ fpe("\-G gid_map Specify GID map for user namespace\en");
+ fpe("\-z Map user\[aq]s UID and GID to 0 in user namespace\en");
+ fpe(" (equivalent to: \-M \[aq]0 <uid> 1\[aq] \-G \[aq]0 <gid> 1\[aq])\en");
+ fpe("\-v Display verbose messages\en");
+ fpe("\en");
+ fpe("If \-z, \-M, or \-G is specified, \-U is required.\en");
+ fpe("It is not permitted to specify both \-z and either \-M or \-G.\en");
+ fpe("\en");
+ fpe("Map strings for \-M and \-G consist of records of the form:\en");
+ fpe("\en");
+ fpe(" ID\-inside\-ns ID\-outside\-ns len\en");
+ fpe("\en");
+ fpe("A map string can contain multiple records, separated"
+ " by commas;\en");
+ fpe("the commas are replaced by newlines before writing"
+ " to map files.\en");
+\&
+ exit(EXIT_FAILURE);
+}
+\&
+/* Update the mapping file \[aq]map_file\[aq], with the value provided in
+ \[aq]mapping\[aq], a string that defines a UID or GID mapping. A UID or
+ GID mapping consists of one or more newline\-delimited records
+ of the form:
+\&
+ ID_inside\-ns ID\-outside\-ns length
+\&
+ Requiring the user to supply a string that contains newlines is
+ of course inconvenient for command\-line use. Thus, we permit the
+ use of commas to delimit records in this string, and replace them
+ with newlines before writing the string to the file. */
+\&
+static void
+update_map(char *mapping, char *map_file)
+{
+ int fd;
+ size_t map_len; /* Length of \[aq]mapping\[aq] */
+\&
+ /* Replace commas in mapping string with newlines. */
+\&
+ map_len = strlen(mapping);
+ for (size_t j = 0; j < map_len; j++)
+ if (mapping[j] == \[aq],\[aq])
+ mapping[j] = \[aq]\en\[aq];
+\&
+ fd = open(map_file, O_RDWR);
+ if (fd == \-1) {
+ fprintf(stderr, "ERROR: open %s: %s\en", map_file,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+\&
+ if (write(fd, mapping, map_len) != map_len) {
+ fprintf(stderr, "ERROR: write %s: %s\en", map_file,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+\&
+ close(fd);
+}
+\&
+/* Linux 3.19 made a change in the handling of setgroups(2) and
+ the \[aq]gid_map\[aq] file to address a security issue. The issue
+ allowed *unprivileged* users to employ user namespaces in
+ order to drop groups. The upshot of the 3.19 changes is that
+ in order to update the \[aq]gid_maps\[aq] file, use of the setgroups()
+ system call in this user namespace must first be disabled by
+ writing "deny" to one of the /proc/PID/setgroups files for
+ this namespace. That is the purpose of the following function. */
+\&
+static void
+proc_setgroups_write(pid_t child_pid, char *str)
+{
+ char setgroups_path[PATH_MAX];
+ int fd;
+\&
+ snprintf(setgroups_path, PATH_MAX, "/proc/%jd/setgroups",
+ (intmax_t) child_pid);
+\&
+ fd = open(setgroups_path, O_RDWR);
+ if (fd == \-1) {
+\&
+ /* We may be on a system that doesn\[aq]t support
+ /proc/PID/setgroups. In that case, the file won\[aq]t exist,
+ and the system won\[aq]t impose the restrictions that Linux 3.19
+ added. That\[aq]s fine: we don\[aq]t need to do anything in order
+ to permit \[aq]gid_map\[aq] to be updated.
+\&
+ However, if the error from open() was something other than
+ the ENOENT error that is expected for that case, let the
+ user know. */
+\&
+ if (errno != ENOENT)
+ fprintf(stderr, "ERROR: open %s: %s\en", setgroups_path,
+ strerror(errno));
+ return;
+ }
+\&
+ if (write(fd, str, strlen(str)) == \-1)
+ fprintf(stderr, "ERROR: write %s: %s\en", setgroups_path,
+ strerror(errno));
+\&
+ close(fd);
+}
+\&
+static int /* Start function for cloned child */
+childFunc(void *arg)
+{
+ struct child_args *args = arg;
+ char ch;
+\&
+ /* Wait until the parent has updated the UID and GID mappings.
+ See the comment in main(). We wait for end of file on a
+ pipe that will be closed by the parent process once it has
+ updated the mappings. */
+\&
+ close(args\->pipe_fd[1]); /* Close our descriptor for the write
+ end of the pipe so that we see EOF
+ when parent closes its descriptor. */
+ if (read(args\->pipe_fd[0], &ch, 1) != 0) {
+ fprintf(stderr,
+ "Failure in child: read from pipe returned != 0\en");
+ exit(EXIT_FAILURE);
+ }
+\&
+ close(args\->pipe_fd[0]);
+\&
+ /* Execute a shell command. */
+\&
+ printf("About to exec %s\en", args\->argv[0]);
+ execvp(args\->argv[0], args\->argv);
+ err(EXIT_FAILURE, "execvp");
+}
+\&
+#define STACK_SIZE (1024 * 1024)
+\&
+static char child_stack[STACK_SIZE]; /* Space for child\[aq]s stack */
+\&
+int
+main(int argc, char *argv[])
+{
+ int flags, opt, map_zero;
+ pid_t child_pid;
+ struct child_args args;
+ char *uid_map, *gid_map;
+ const int MAP_BUF_SIZE = 100;
+ char map_buf[MAP_BUF_SIZE];
+ char map_path[PATH_MAX];
+\&
+ /* Parse command\-line options. The initial \[aq]+\[aq] character in
+ the final getopt() argument prevents GNU\-style permutation
+ of command\-line options. That\[aq]s useful, since sometimes
+ the \[aq]command\[aq] to be executed by this program itself
+ has command\-line options. We don\[aq]t want getopt() to treat
+ those as options to this program. */
+\&
+ flags = 0;
+ verbose = 0;
+ gid_map = NULL;
+ uid_map = NULL;
+ map_zero = 0;
+ while ((opt = getopt(argc, argv, "+imnpuUM:G:zv")) != \-1) {
+ switch (opt) {
+ case \[aq]i\[aq]: flags |= CLONE_NEWIPC; break;
+ case \[aq]m\[aq]: flags |= CLONE_NEWNS; break;
+ case \[aq]n\[aq]: flags |= CLONE_NEWNET; break;
+ case \[aq]p\[aq]: flags |= CLONE_NEWPID; break;
+ case \[aq]u\[aq]: flags |= CLONE_NEWUTS; break;
+ case \[aq]v\[aq]: verbose = 1; break;
+ case \[aq]z\[aq]: map_zero = 1; break;
+ case \[aq]M\[aq]: uid_map = optarg; break;
+ case \[aq]G\[aq]: gid_map = optarg; break;
+ case \[aq]U\[aq]: flags |= CLONE_NEWUSER; break;
+ default: usage(argv[0]);
+ }
+ }
+\&
+ /* \-M or \-G without \-U is nonsensical */
+\&
+ if (((uid_map != NULL || gid_map != NULL || map_zero) &&
+ !(flags & CLONE_NEWUSER)) ||
+ (map_zero && (uid_map != NULL || gid_map != NULL)))
+ usage(argv[0]);
+\&
+ args.argv = &argv[optind];
+\&
+ /* We use a pipe to synchronize the parent and child, in order to
+ ensure that the parent sets the UID and GID maps before the child
+ calls execve(). This ensures that the child maintains its
+ capabilities during the execve() in the common case where we
+ want to map the child\[aq]s effective user ID to 0 in the new user
+ namespace. Without this synchronization, the child would lose
+ its capabilities if it performed an execve() with nonzero
+ user IDs (see the capabilities(7) man page for details of the
+ transformation of a process\[aq]s capabilities during execve()). */
+\&
+ if (pipe(args.pipe_fd) == \-1)
+ err(EXIT_FAILURE, "pipe");
+\&
+ /* Create the child in new namespace(s). */
+\&
+ child_pid = clone(childFunc, child_stack + STACK_SIZE,
+ flags | SIGCHLD, &args);
+ if (child_pid == \-1)
+ err(EXIT_FAILURE, "clone");
+\&
+ /* Parent falls through to here. */
+\&
+ if (verbose)
+ printf("%s: PID of child created by clone() is %jd\en",
+ argv[0], (intmax_t) child_pid);
+\&
+ /* Update the UID and GID maps in the child. */
+\&
+ if (uid_map != NULL || map_zero) {
+ snprintf(map_path, PATH_MAX, "/proc/%jd/uid_map",
+ (intmax_t) child_pid);
+ if (map_zero) {
+ snprintf(map_buf, MAP_BUF_SIZE, "0 %jd 1",
+ (intmax_t) getuid());
+ uid_map = map_buf;
+ }
+ update_map(uid_map, map_path);
+ }
+\&
+ if (gid_map != NULL || map_zero) {
+ proc_setgroups_write(child_pid, "deny");
+\&
+ snprintf(map_path, PATH_MAX, "/proc/%jd/gid_map",
+ (intmax_t) child_pid);
+ if (map_zero) {
+ snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1",
+ (intmax_t) getgid());
+ gid_map = map_buf;
+ }
+ update_map(gid_map, map_path);
+ }
+\&
+ /* Close the write end of the pipe, to signal to the child that we
+ have updated the UID and GID maps. */
+\&
+ close(args.pipe_fd[1]);
+\&
+ if (waitpid(child_pid, NULL, 0) == \-1) /* Wait for child */
+ err(EXIT_FAILURE, "waitpid");
+\&
+ if (verbose)
+ printf("%s: terminating\en", argv[0]);
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.SH SEE ALSO
+.BR newgidmap (1), \" From the shadow package
+.BR newuidmap (1), \" From the shadow package
+.BR clone (2),
+.BR ptrace (2),
+.BR setns (2),
+.BR unshare (2),
+.BR proc (5),
+.BR subgid (5), \" From the shadow package
+.BR subuid (5), \" From the shadow package
+.BR capabilities (7),
+.BR cgroup_namespaces (7),
+.BR credentials (7),
+.BR namespaces (7),
+.BR pid_namespaces (7)
+.PP
+The kernel source file
+.IR Documentation/admin\-guide/namespaces/resource\-control.rst .
diff --git a/man7/utf-8.7 b/man7/utf-8.7
new file mode 100644
index 0000000..8a5f7ab
--- /dev/null
+++ b/man7/utf-8.7
@@ -0,0 +1,211 @@
+.\" Copyright (C) Markus Kuhn, 1996, 2001
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" 1995-11-26 Markus Kuhn <mskuhn@cip.informatik.uni-erlangen.de>
+.\" First version written
+.\" 2001-05-11 Markus Kuhn <mgk25@cl.cam.ac.uk>
+.\" Update
+.\"
+.TH UTF-8 7 2023-03-12 "Linux man-pages 6.05.01"
+.SH NAME
+UTF-8 \- an ASCII compatible multibyte Unicode encoding
+.SH DESCRIPTION
+The Unicode 3.0 character set occupies a 16-bit code space.
+The most obvious
+Unicode encoding (known as UCS-2)
+consists of a sequence of 16-bit words.
+Such strings can contain\[em]as part of many 16-bit characters\[em]bytes
+such as \[aq]\e0\[aq] or \[aq]/\[aq], which have a
+special meaning in filenames and other C library function arguments.
+In addition, the majority of UNIX tools expect ASCII files and can't
+read 16-bit words as characters without major modifications.
+For these reasons,
+UCS-2 is not a suitable external encoding of Unicode
+in filenames, text files, environment variables, and so on.
+The ISO/IEC 10646 Universal Character Set (UCS),
+a superset of Unicode, occupies an even larger code
+space\[em]31\ bits\[em]and the obvious
+UCS-4 encoding for it (a sequence of 32-bit words) has the same problems.
+.PP
+The UTF-8 encoding of Unicode and UCS
+does not have these problems and is the common way in which
+Unicode is used on UNIX-style operating systems.
+.SS Properties
+The UTF-8 encoding has the following nice properties:
+.TP 0.2i
+*
+UCS
+characters 0x00000000 to 0x0000007f (the classic US-ASCII
+characters) are encoded simply as bytes 0x00 to 0x7f (ASCII
+compatibility).
+This means that files and strings which contain only
+7-bit ASCII characters have the same encoding under both
+ASCII
+and
+UTF-8 .
+.TP
+*
+All UCS characters greater than 0x7f are encoded as a multibyte sequence
+consisting only of bytes in the range 0x80 to 0xfd, so no ASCII
+byte can appear as part of another character and there are no
+problems with, for example, \[aq]\e0\[aq] or \[aq]/\[aq].
+.TP
+*
+The lexicographic sorting order of UCS-4 strings is preserved.
+.TP
+*
+All possible 2\[ha]31 UCS codes can be encoded using UTF-8.
+.TP
+*
+The bytes 0xc0, 0xc1, 0xfe, and 0xff are never used in the UTF-8 encoding.
+.TP
+*
+The first byte of a multibyte sequence which represents a single non-ASCII
+UCS character is always in the range 0xc2 to 0xfd and indicates how long
+this multibyte sequence is.
+All further bytes in a multibyte sequence
+are in the range 0x80 to 0xbf.
+This allows easy resynchronization and
+makes the encoding stateless and robust against missing bytes.
+.TP
+*
+UTF-8 encoded UCS characters may be up to six bytes long, however the
+Unicode standard specifies no characters above 0x10ffff, so Unicode characters
+can be only up to four bytes long in
+UTF-8.
+.SS Encoding
+The following byte sequences are used to represent a character.
+The sequence to be used depends on the UCS code number of the character:
+.TP 0.4i
+0x00000000 \- 0x0000007F:
+.RI 0 xxxxxxx
+.TP
+0x00000080 \- 0x000007FF:
+.RI 110 xxxxx
+.RI 10 xxxxxx
+.TP
+0x00000800 \- 0x0000FFFF:
+.RI 1110 xxxx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.TP
+0x00010000 \- 0x001FFFFF:
+.RI 11110 xxx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.TP
+0x00200000 \- 0x03FFFFFF:
+.RI 111110 xx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.TP
+0x04000000 \- 0x7FFFFFFF:
+.RI 1111110 x
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.RI 10 xxxxxx
+.PP
+The
+.I xxx
+bit positions are filled with the bits of the character code number in
+binary representation, most significant bit first (big-endian).
+Only the shortest possible multibyte sequence
+which can represent the code number of the character can be used.
+.PP
+The UCS code values 0xd800\[en]0xdfff (UTF-16 surrogates) as well as 0xfffe and
+0xffff (UCS noncharacters) should not appear in conforming UTF-8 streams.
+According to RFC 3629 no point above U+10FFFF should be used,
+which limits characters to four bytes.
+.SS Example
+The Unicode character 0xa9 = 1010 1001 (the copyright sign) is encoded
+in UTF-8 as
+.PP
+.RS
+11000010 10101001 = 0xc2 0xa9
+.RE
+.PP
+and character 0x2260 = 0010 0010 0110 0000 (the "not equal" symbol) is
+encoded as:
+.PP
+.RS
+11100010 10001001 10100000 = 0xe2 0x89 0xa0
+.RE
+.SS Application notes
+Users have to select a UTF-8 locale, for example with
+.PP
+.RS
+export LANG=en_GB.UTF-8
+.RE
+.PP
+in order to activate the UTF-8 support in applications.
+.PP
+Application software that has to be aware of the used character
+encoding should always set the locale with for example
+.PP
+.RS
+setlocale(LC_CTYPE, "")
+.RE
+.PP
+and programmers can then test the expression
+.PP
+.RS
+strcmp(nl_langinfo(CODESET), "UTF-8") == 0
+.RE
+.PP
+to determine whether a UTF-8 locale has been selected and whether
+therefore all plaintext standard input and output, terminal
+communication, plaintext file content, filenames, and environment
+variables are encoded in UTF-8.
+.PP
+Programmers accustomed to single-byte encodings such as US-ASCII or ISO 8859
+have to be aware that two assumptions made so far are no longer valid
+in UTF-8 locales.
+Firstly, a single byte does not necessarily correspond any
+more to a single character.
+Secondly, since modern terminal emulators in UTF-8
+mode also support Chinese, Japanese, and Korean
+double-width characters as well as nonspacing combining characters,
+outputting a single character does not necessarily advance the cursor
+by one position as it did in ASCII.
+Library functions such as
+.BR mbsrtowcs (3)
+and
+.BR wcswidth (3)
+should be used today to count characters and cursor positions.
+.PP
+The official ESC sequence to switch from an ISO 2022
+encoding scheme (as used for instance by VT100 terminals) to
+UTF-8 is ESC % G
+("\ex1b%G").
+The corresponding return sequence from
+UTF-8 to ISO 2022 is ESC % @ ("\ex1b%@").
+Other ISO 2022 sequences (such as
+for switching the G0 and G1 sets) are not applicable in UTF-8 mode.
+.SS Security
+The Unicode and UCS standards require that producers of UTF-8
+shall use the shortest form possible, for example, producing a two-byte
+sequence with first byte 0xc0 is nonconforming.
+Unicode 3.1 has added the requirement that conforming programs must not accept
+non-shortest forms in their input.
+This is for security reasons: if
+user input is checked for possible security violations, a program
+might check only for the ASCII
+version of "/../" or ";" or NUL and overlook that there are many
+non-ASCII ways to represent these things in a non-shortest UTF-8
+encoding.
+.SS Standards
+ISO/IEC 10646-1:2000, Unicode 3.1, RFC\ 3629, Plan 9.
+.\" .SH AUTHOR
+.\" Markus Kuhn <mgk25@cl.cam.ac.uk>
+.SH SEE ALSO
+.BR locale (1),
+.BR nl_langinfo (3),
+.BR setlocale (3),
+.BR charsets (7),
+.BR unicode (7)
diff --git a/man7/utf8.7 b/man7/utf8.7
new file mode 100644
index 0000000..52a9008
--- /dev/null
+++ b/man7/utf8.7
@@ -0,0 +1 @@
+.so man7/utf-8.7
diff --git a/man7/uts_namespaces.7 b/man7/uts_namespaces.7
new file mode 100644
index 0000000..670d19e
--- /dev/null
+++ b/man7/uts_namespaces.7
@@ -0,0 +1,46 @@
+.\" Copyright (c) 2019 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.\"
+.TH uts_namespaces 7 2022-12-04 "Linux man-pages 6.05.01"
+.SH NAME
+uts_namespaces \- overview of Linux UTS namespaces
+.SH DESCRIPTION
+UTS namespaces provide isolation of two system identifiers:
+the hostname and the NIS domain name.
+These identifiers are set using
+.BR sethostname (2)
+and
+.BR setdomainname (2),
+and can be retrieved using
+.BR uname (2),
+.BR gethostname (2),
+and
+.BR getdomainname (2).
+Changes made to these identifiers are visible to all other
+processes in the same UTS namespace,
+but are not visible to processes in other UTS namespaces.
+.PP
+When a process creates a new UTS namespace using
+.BR clone (2)
+or
+.BR unshare (2)
+with the
+.B CLONE_NEWUTS
+flag, the hostname and domain name of the new UTS namespace are copied
+from the corresponding values in the caller's UTS namespace.
+.PP
+Use of UTS namespaces requires a kernel that is configured with the
+.B CONFIG_UTS_NS
+option.
+.SH SEE ALSO
+.BR nsenter (1),
+.BR unshare (1),
+.BR clone (2),
+.BR getdomainname (2),
+.BR gethostname (2),
+.BR setns (2),
+.BR uname (2),
+.BR unshare (2),
+.BR namespaces (7)
diff --git a/man7/vdso.7 b/man7/vdso.7
new file mode 100644
index 0000000..338ef72
--- /dev/null
+++ b/man7/vdso.7
@@ -0,0 +1,612 @@
+'\" t
+.\" Written by Mike Frysinger <vapier@gentoo.org>
+.\"
+.\" %%%LICENSE_START(PUBLIC_DOMAIN)
+.\" This page is in the public domain.
+.\" %%%LICENSE_END
+.\"
+.\" Useful background:
+.\" http://articles.manugarg.com/systemcallinlinux2_6.html
+.\" https://lwn.net/Articles/446528/
+.\" http://www.linuxjournal.com/content/creating-vdso-colonels-other-chicken
+.\" http://www.trilithium.com/johan/2005/08/linux-gate/
+.\"
+.TH vDSO 7 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+vdso \- overview of the virtual ELF dynamic shared object
+.SH SYNOPSIS
+.nf
+.B #include <sys/auxv.h>
+.PP
+.B void *vdso = (uintptr_t) getauxval(AT_SYSINFO_EHDR);
+.fi
+.SH DESCRIPTION
+The "vDSO" (virtual dynamic shared object) is a small shared library that
+the kernel automatically maps into the
+address space of all user-space applications.
+Applications usually do not need to concern themselves with these details
+as the vDSO is most commonly called by the C library.
+This way you can code in the normal way using standard functions
+and the C library will take care
+of using any functionality that is available via the vDSO.
+.PP
+Why does the vDSO exist at all?
+There are some system calls the kernel provides that
+user-space code ends up using frequently,
+to the point that such calls can dominate overall performance.
+This is due both to the frequency of the call as well as the
+context-switch overhead that results
+from exiting user space and entering the kernel.
+.PP
+The rest of this documentation is geared toward the curious and/or
+C library writers rather than general developers.
+If you're trying to call the vDSO in your own application rather than using
+the C library, you're most likely doing it wrong.
+.SS Example background
+Making system calls can be slow.
+In x86 32-bit systems, you can trigger a software interrupt
+.RI ( "int $0x80" )
+to tell the kernel you wish to make a system call.
+However, this instruction is expensive: it goes through
+the full interrupt-handling paths
+in the processor's microcode as well as in the kernel.
+Newer processors have faster (but backward incompatible) instructions to
+initiate system calls.
+Rather than require the C library to figure out if this functionality is
+available at run time,
+the C library can use functions provided by the kernel in
+the vDSO.
+.PP
+Note that the terminology can be confusing.
+On x86 systems, the vDSO function
+used to determine the preferred method of making a system call is
+named "__kernel_vsyscall", but on x86-64,
+the term "vsyscall" also refers to an obsolete way to ask the kernel
+what time it is or what CPU the caller is on.
+.PP
+One frequently used system call is
+.BR gettimeofday (2).
+This system call is called both directly by user-space applications
+as well as indirectly by
+the C library.
+Think timestamps or timing loops or polling\[em]all of these
+frequently need to know what time it is right now.
+This information is also not secret\[em]any application in any
+privilege mode (root or any unprivileged user) will get the same answer.
+Thus the kernel arranges for the information required to answer
+this question to be placed in memory the process can access.
+Now a call to
+.BR gettimeofday (2)
+changes from a system call to a normal function
+call and a few memory accesses.
+.SS Finding the vDSO
+The base address of the vDSO (if one exists) is passed by the kernel to
+each program in the initial auxiliary vector (see
+.BR getauxval (3)),
+via the
+.B AT_SYSINFO_EHDR
+tag.
+.PP
+You must not assume the vDSO is mapped at any particular location in the
+user's memory map.
+The base address will usually be randomized at run time every time a new
+process image is created (at
+.BR execve (2)
+time).
+This is done for security reasons,
+to prevent "return-to-libc" attacks.
+.PP
+For some architectures, there is also an
+.B AT_SYSINFO
+tag.
+This is used only for locating the vsyscall entry point and is frequently
+omitted or set to 0 (meaning it's not available).
+This tag is a throwback to the initial vDSO work (see
+.I History
+below) and its use should be avoided.
+.SS File format
+Since the vDSO is a fully formed ELF image, you can do symbol lookups on it.
+This allows new symbols to be added with newer kernel releases,
+and allows the C library to detect available functionality at
+run time when running under different kernel versions.
+Oftentimes the C library will do detection with the first call and then
+cache the result for subsequent calls.
+.PP
+All symbols are also versioned (using the GNU version format).
+This allows the kernel to update the function signature without breaking
+backward compatibility.
+This means changing the arguments that the function accepts as well as the
+return value.
+Thus, when looking up a symbol in the vDSO,
+you must always include the version
+to match the ABI you expect.
+.PP
+Typically the vDSO follows the naming convention of prefixing
+all symbols with "__vdso_" or "__kernel_"
+so as to distinguish them from other standard symbols.
+For example, the "gettimeofday" function is named "__vdso_gettimeofday".
+.PP
+You use the standard C calling conventions when calling
+any of these functions.
+No need to worry about weird register or stack behavior.
+.SH NOTES
+.SS Source
+When you compile the kernel,
+it will automatically compile and link the vDSO code for you.
+You will frequently find it under the architecture-specific directory:
+.PP
+.in +4n
+.EX
+find arch/$ARCH/ \-name \[aq]*vdso*.so*\[aq] \-o \-name \[aq]*gate*.so*\[aq]
+.EE
+.in
+.\"
+.SS vDSO names
+The name of the vDSO varies across architectures.
+It will often show up in things like glibc's
+.BR ldd (1)
+output.
+The exact name should not matter to any code, so do not hardcode it.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+user ABI vDSO name
+_
+aarch64 linux\-vdso.so.1
+arm linux\-vdso.so.1
+ia64 linux\-gate.so.1
+mips linux\-vdso.so.1
+ppc/32 linux\-vdso32.so.1
+ppc/64 linux\-vdso64.so.1
+riscv linux\-vdso.so.1
+s390 linux\-vdso32.so.1
+s390x linux\-vdso64.so.1
+sh linux\-gate.so.1
+i386 linux\-gate.so.1
+x86-64 linux\-vdso.so.1
+x86/x32 linux\-vdso.so.1
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS strace(1), seccomp(2), and the vDSO
+When tracing systems calls with
+.BR strace (1),
+symbols (system calls) that are exported by the vDSO will
+.I not
+appear in the trace output.
+Those system calls will likewise not be visible to
+.BR seccomp (2)
+filters.
+.SH ARCHITECTURE-SPECIFIC NOTES
+The subsections below provide architecture-specific notes
+on the vDSO.
+.PP
+Note that the vDSO that is used is based on the ABI of your user-space code
+and not the ABI of the kernel.
+Thus, for example,
+when you run an i386 32-bit ELF binary,
+you'll get the same vDSO regardless of whether you run it under
+an i386 32-bit kernel or under an x86-64 64-bit kernel.
+Therefore, the name of the user-space ABI should be used to determine
+which of the sections below is relevant.
+.SS ARM functions
+.\" See linux/arch/arm/vdso/vdso.lds.S
+.\" Commit: 8512287a8165592466cb9cb347ba94892e9c56a5
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__vdso_gettimeofday LINUX_2.6 (exported since Linux 4.1)
+__vdso_clock_gettime LINUX_2.6 (exported since Linux 4.1)
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.PP
+.\" See linux/arch/arm/kernel/entry-armv.S
+.\" See linux/Documentation/arm/kernel_user_helpers.rst
+Additionally, the ARM port has a code page full of utility functions.
+Since it's just a raw page of code, there is no ELF information for doing
+symbol lookups or versioning.
+It does provide support for different versions though.
+.PP
+For information on this code page,
+it's best to refer to the kernel documentation
+as it's extremely detailed and covers everything you need to know:
+.IR Documentation/arm/kernel_user_helpers.rst .
+.SS aarch64 functions
+.\" See linux/arch/arm64/kernel/vdso/vdso.lds.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_rt_sigreturn LINUX_2.6.39
+__kernel_gettimeofday LINUX_2.6.39
+__kernel_clock_gettime LINUX_2.6.39
+__kernel_clock_getres LINUX_2.6.39
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS bfin (Blackfin) functions (port removed in Linux 4.17)
+.\" See linux/arch/blackfin/kernel/fixed_code.S
+.\" See http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:fixed-code
+As this CPU lacks a memory management unit (MMU),
+it doesn't set up a vDSO in the normal sense.
+Instead, it maps at boot time a few raw functions into
+a fixed location in memory.
+User-space applications then call directly into that region.
+There is no provision for backward compatibility
+beyond sniffing raw opcodes,
+but as this is an embedded CPU, it can get away with things\[em]some of the
+object formats it runs aren't even ELF based (they're bFLT/FLAT).
+.PP
+For information on this code page,
+it's best to refer to the public documentation:
+.br
+http://docs.blackfin.uclinux.org/doku.php?id=linux\-kernel:fixed\-code
+.SS mips functions
+.\" See linux/arch/mips/vdso/vdso.ld.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_gettimeofday LINUX_2.6 (exported since Linux 4.4)
+__kernel_clock_gettime LINUX_2.6 (exported since Linux 4.4)
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS ia64 (Itanium) functions
+.\" See linux/arch/ia64/kernel/gate.lds.S
+.\" Also linux/arch/ia64/kernel/fsys.S and linux/Documentation/ia64/fsys.rst
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_sigtramp LINUX_2.5
+__kernel_syscall_via_break LINUX_2.5
+__kernel_syscall_via_epc LINUX_2.5
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.PP
+The Itanium port is somewhat tricky.
+In addition to the vDSO above, it also has "light-weight system calls"
+(also known as "fast syscalls" or "fsys").
+You can invoke these via the
+.I __kernel_syscall_via_epc
+vDSO helper.
+The system calls listed here have the same semantics as if you called them
+directly via
+.BR syscall (2),
+so refer to the relevant
+documentation for each.
+The table below lists the functions available via this mechanism.
+.if t \{\
+.ft CW
+\}
+.TS
+l.
+function
+_
+clock_gettime
+getcpu
+getpid
+getppid
+gettimeofday
+set_tid_address
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS parisc (hppa) functions
+.\" See linux/arch/parisc/kernel/syscall.S
+.\" See linux/Documentation/parisc/registers.rst
+The parisc port has a code page with utility functions
+called a gateway page.
+Rather than use the normal ELF auxiliary vector approach,
+it passes the address of
+the page to the process via the SR2 register.
+The permissions on the page are such that merely executing those addresses
+automatically executes with kernel privileges and not in user space.
+This is done to match the way HP-UX works.
+.PP
+Since it's just a raw page of code, there is no ELF information for doing
+symbol lookups or versioning.
+Simply call into the appropriate offset via the branch instruction,
+for example:
+.PP
+.in +4n
+.EX
+ble <offset>(%sr2, %r0)
+.EE
+.in
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+offset function
+_
+00b0 lws_entry (CAS operations)
+00e0 set_thread_pointer (used by glibc)
+0100 linux_gateway_entry (syscall)
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS ppc/32 functions
+.\" See linux/arch/powerpc/kernel/vdso32/vdso32.lds.S
+The table below lists the symbols exported by the vDSO.
+The functions marked with a
+.I *
+are available only when the kernel is
+a PowerPC64 (64-bit) kernel.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_clock_getres LINUX_2.6.15
+__kernel_clock_gettime LINUX_2.6.15
+__kernel_clock_gettime64 LINUX_5.11
+__kernel_datapage_offset LINUX_2.6.15
+__kernel_get_syscall_map LINUX_2.6.15
+__kernel_get_tbfreq LINUX_2.6.15
+__kernel_getcpu \fI*\fR LINUX_2.6.15
+__kernel_gettimeofday LINUX_2.6.15
+__kernel_sigtramp_rt32 LINUX_2.6.15
+__kernel_sigtramp32 LINUX_2.6.15
+__kernel_sync_dicache LINUX_2.6.15
+__kernel_sync_dicache_p5 LINUX_2.6.15
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.PP
+Before Linux 5.6,
+.\" commit 654abc69ef2e69712e6d4e8a6cb9292b97a4aa39
+the
+.B CLOCK_REALTIME_COARSE
+and
+.B CLOCK_MONOTONIC_COARSE
+clocks are
+.I not
+supported by the
+.I __kernel_clock_getres
+and
+.I __kernel_clock_gettime
+interfaces;
+the kernel falls back to the real system call.
+.SS ppc/64 functions
+.\" See linux/arch/powerpc/kernel/vdso64/vdso64.lds.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_clock_getres LINUX_2.6.15
+__kernel_clock_gettime LINUX_2.6.15
+__kernel_datapage_offset LINUX_2.6.15
+__kernel_get_syscall_map LINUX_2.6.15
+__kernel_get_tbfreq LINUX_2.6.15
+__kernel_getcpu LINUX_2.6.15
+__kernel_gettimeofday LINUX_2.6.15
+__kernel_sigtramp_rt64 LINUX_2.6.15
+__kernel_sync_dicache LINUX_2.6.15
+__kernel_sync_dicache_p5 LINUX_2.6.15
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.PP
+Before Linux 4.16,
+.\" commit 5c929885f1bb4b77f85b1769c49405a0e0f154a1
+the
+.B CLOCK_REALTIME_COARSE
+and
+.B CLOCK_MONOTONIC_COARSE
+clocks are
+.I not
+supported by the
+.I __kernel_clock_getres
+and
+.I __kernel_clock_gettime
+interfaces;
+the kernel falls back to the real system call.
+.SS riscv functions
+.\" See linux/arch/riscv/kernel/vdso/vdso.lds.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__vdso_rt_sigreturn LINUX_4.15
+__vdso_gettimeofday LINUX_4.15
+__vdso_clock_gettime LINUX_4.15
+__vdso_clock_getres LINUX_4.15
+__vdso_getcpu LINUX_4.15
+__vdso_flush_icache LINUX_4.15
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS s390 functions
+.\" See linux/arch/s390/kernel/vdso32/vdso32.lds.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_clock_getres LINUX_2.6.29
+__kernel_clock_gettime LINUX_2.6.29
+__kernel_gettimeofday LINUX_2.6.29
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS s390x functions
+.\" See linux/arch/s390/kernel/vdso64/vdso64.lds.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_clock_getres LINUX_2.6.29
+__kernel_clock_gettime LINUX_2.6.29
+__kernel_gettimeofday LINUX_2.6.29
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS sh (SuperH) functions
+.\" See linux/arch/sh/kernel/vsyscall/vsyscall.lds.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_rt_sigreturn LINUX_2.6
+__kernel_sigreturn LINUX_2.6
+__kernel_vsyscall LINUX_2.6
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS i386 functions
+.\" See linux/arch/x86/vdso/vdso32/vdso32.lds.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__kernel_sigreturn LINUX_2.5
+__kernel_rt_sigreturn LINUX_2.5
+__kernel_vsyscall LINUX_2.5
+.\" Added in 7a59ed415f5b57469e22e41fc4188d5399e0b194 and updated
+.\" in 37c975545ec63320789962bf307f000f08fabd48.
+__vdso_clock_gettime LINUX_2.6 (exported since Linux 3.15)
+__vdso_gettimeofday LINUX_2.6 (exported since Linux 3.15)
+__vdso_time LINUX_2.6 (exported since Linux 3.15)
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS x86-64 functions
+.\" See linux/arch/x86/vdso/vdso.lds.S
+The table below lists the symbols exported by the vDSO.
+All of these symbols are also available without the "__vdso_" prefix, but
+you should ignore those and stick to the names below.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__vdso_clock_gettime LINUX_2.6
+__vdso_getcpu LINUX_2.6
+__vdso_gettimeofday LINUX_2.6
+__vdso_time LINUX_2.6
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS x86/x32 functions
+.\" See linux/arch/x86/vdso/vdso32.lds.S
+The table below lists the symbols exported by the vDSO.
+.if t \{\
+.ft CW
+\}
+.TS
+l l.
+symbol version
+_
+__vdso_clock_gettime LINUX_2.6
+__vdso_getcpu LINUX_2.6
+__vdso_gettimeofday LINUX_2.6
+__vdso_time LINUX_2.6
+.TE
+.if t \{\
+.in
+.ft P
+\}
+.SS History
+The vDSO was originally just a single function\[em]the vsyscall.
+In older kernels, you might see that name
+in a process's memory map rather than "vdso".
+Over time, people realized that this mechanism
+was a great way to pass more functionality
+to user space, so it was reconceived as a vDSO in the current format.
+.SH SEE ALSO
+.BR syscalls (2),
+.BR getauxval (3),
+.BR proc (5)
+.PP
+The documents, examples, and source code in the Linux source code tree:
+.PP
+.in +4n
+.EX
+Documentation/ABI/stable/vdso
+Documentation/ia64/fsys.rst
+Documentation/vDSO/* (includes examples of using the vDSO)
+.PP
+find arch/ \-iname \[aq]*vdso*\[aq] \-o \-iname \[aq]*gate*\[aq]
+.EE
+.in
diff --git a/man7/vsock.7 b/man7/vsock.7
new file mode 100644
index 0000000..f6c3711
--- /dev/null
+++ b/man7/vsock.7
@@ -0,0 +1,232 @@
+.\" Copyright (C) 2018, Stefan Hajnoczi <stefanha@redhat.com>
+.\"
+.\" SPDX-License-Identifier: Linux-man-pages-copyleft
+.\"
+.TH vsock 7 2022-10-30 "Linux man-pages 6.05.01"
+.SH NAME
+vsock \- Linux VSOCK address family
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <linux/vm_sockets.h>
+.PP
+.IB stream_socket " = socket(AF_VSOCK, SOCK_STREAM, 0);"
+.IB datagram_socket " = socket(AF_VSOCK, SOCK_DGRAM, 0);"
+.fi
+.SH DESCRIPTION
+The VSOCK address family facilitates communication between virtual machines and
+the host they are running on.
+This address family is used by guest agents and
+hypervisor services that need a communications channel that is independent of
+virtual machine network configuration.
+.PP
+Valid socket types are
+.B SOCK_STREAM
+and
+.BR SOCK_DGRAM .
+.B SOCK_STREAM
+provides connection-oriented byte streams with guaranteed, in-order delivery.
+.B SOCK_DGRAM
+provides a connectionless datagram packet service with best-effort delivery and
+best-effort ordering.
+Availability of these socket types is dependent on the
+underlying hypervisor.
+.PP
+A new socket is created with
+.PP
+.in +4n
+.EX
+socket(AF_VSOCK, socket_type, 0);
+.EE
+.in
+.PP
+When a process wants to establish a connection, it calls
+.BR connect (2)
+with a given destination socket address.
+The socket is automatically bound to a free port if unbound.
+.PP
+A process can listen for incoming connections by first binding to a socket
+address using
+.BR bind (2)
+and then calling
+.BR listen (2).
+.PP
+Data is transmitted using the
+.BR send (2)
+or
+.BR write (2)
+families of system calls and data is received using the
+.BR recv (2)
+or
+.BR read (2)
+families of system calls.
+.SS Address format
+A socket address is defined as a combination of a 32-bit Context Identifier
+(CID) and a 32-bit port number.
+The CID identifies the source or destination,
+which is either a virtual machine or the host.
+The port number differentiates between multiple services running on
+a single machine.
+.PP
+.in +4n
+.EX
+struct sockaddr_vm {
+ sa_family_t svm_family; /* Address family: AF_VSOCK */
+ unsigned short svm_reserved1;
+ unsigned int svm_port; /* Port # in host byte order */
+ unsigned int svm_cid; /* Address in host byte order */
+ unsigned char svm_zero[sizeof(struct sockaddr) \-
+ sizeof(sa_family_t) \-
+ sizeof(unsigned short) \-
+ sizeof(unsigned int) \-
+ sizeof(unsigned int)];
+};
+.EE
+.in
+.PP
+.I svm_family
+is always set to
+.BR AF_VSOCK .
+.I svm_reserved1
+is always set to 0.
+.I svm_port
+contains the port number in host byte order.
+The port numbers below 1024 are called
+.IR "privileged ports" .
+Only a process with the
+.B CAP_NET_BIND_SERVICE
+capability may
+.BR bind (2)
+to these port numbers.
+.I svm_zero
+must be zero-filled.
+.PP
+There are several special addresses:
+.B VMADDR_CID_ANY
+(\-1U)
+means any address for binding;
+.B VMADDR_CID_HYPERVISOR
+(0) is reserved for services built into the hypervisor;
+.B VMADDR_CID_LOCAL
+(1) is the well-known address for local communication (loopback);
+.B VMADDR_CID_HOST
+(2)
+is the well-known address of the host.
+.PP
+The special constant
+.B VMADDR_PORT_ANY
+(\-1U)
+means any port number for binding.
+.SS Live migration
+Sockets are affected by live migration of virtual machines.
+Connected
+.B SOCK_STREAM
+sockets become disconnected when the virtual machine migrates to a new host.
+Applications must reconnect when this happens.
+.PP
+The local CID may change across live migration if the old CID is
+not available on the new host.
+Bound sockets are automatically updated to the new CID.
+.SS Ioctls
+The following ioctls are available on the
+.I /dev/vsock
+device.
+.TP
+.B IOCTL_VM_SOCKETS_GET_LOCAL_CID
+Get the CID of the local machine.
+The argument is a pointer to an
+.IR "unsigned int" .
+.IP
+.in +4n
+.EX
+ioctl(fd, IOCTL_VM_SOCKETS_GET_LOCAL_CID, &cid);
+.EE
+.in
+.IP
+Consider using
+.B VMADDR_CID_ANY
+when binding instead of getting the local CID with
+.BR IOCTL_VM_SOCKETS_GET_LOCAL_CID .
+.SS Local communication
+.B VMADDR_CID_LOCAL
+(1) directs packets to the same host that generated them.
+This is useful
+for testing applications on a single host and for debugging.
+.PP
+The local CID obtained with
+.B IOCTL_VM_SOCKETS_GET_LOCAL_CID
+can be used for the same purpose, but it is preferable to use
+.B VMADDR_CID_LOCAL .
+.SH ERRORS
+.TP
+.B EACCES
+Unable to bind to a privileged port without the
+.B CAP_NET_BIND_SERVICE
+capability.
+.TP
+.B EADDRINUSE
+Unable to bind to a port that is already in use.
+.TP
+.B EADDRNOTAVAIL
+Unable to find a free port for binding or unable to bind to a nonlocal CID.
+.TP
+.B EINVAL
+Invalid parameters.
+This includes:
+attempting to bind a socket that is already bound, providing an invalid struct
+.IR sockaddr_vm ,
+and other input validation errors.
+.TP
+.B ENOPROTOOPT
+Invalid socket option in
+.BR setsockopt (2)
+or
+.BR getsockopt (2).
+.TP
+.B ENOTCONN
+Unable to perform operation on an unconnected socket.
+.TP
+.B EOPNOTSUPP
+Operation not supported.
+This includes:
+the
+.B MSG_OOB
+flag that is not implemented for the
+.BR send (2)
+family of syscalls and
+.B MSG_PEEK
+for the
+.BR recv (2)
+family of syscalls.
+.TP
+.B EPROTONOSUPPORT
+Invalid socket protocol number.
+The protocol should always be 0.
+.TP
+.B ESOCKTNOSUPPORT
+Unsupported socket type in
+.BR socket (2).
+Only
+.B SOCK_STREAM
+and
+.B SOCK_DGRAM
+are valid.
+.SH VERSIONS
+Support for VMware (VMCI) has been available since Linux 3.9.
+KVM (virtio) is supported since Linux 4.8.
+Hyper-V is supported since Linux 4.14.
+.PP
+.B VMADDR_CID_LOCAL
+is supported since Linux 5.6.
+.\" commit ef343b35d46667668a099655fca4a5b2e43a5dfe
+Local communication in the guest and on the host is available since Linux 5.6.
+Previous versions supported only local communication within a guest
+(not on the host), and with only some transports (VMCI and virtio).
+.SH SEE ALSO
+.BR bind (2),
+.BR connect (2),
+.BR listen (2),
+.BR recv (2),
+.BR send (2),
+.BR socket (2),
+.BR capabilities (7)
diff --git a/man7/x25.7 b/man7/x25.7
new file mode 100644
index 0000000..1dc498e
--- /dev/null
+++ b/man7/x25.7
@@ -0,0 +1,122 @@
+.\" SPDX-License-Identifier: Linux-man-pages-1-para
+.\"
+.\" This man page is Copyright (C) 1998 Heiner Eisen.
+.\"
+.\" $Id: x25.7,v 1.4 1999/05/18 10:35:12 freitag Exp $
+.\"
+.TH x25 7 2023-07-15 "Linux man-pages 6.05.01"
+.SH NAME
+x25 \- ITU-T X.25 / ISO-8208 protocol interface
+.SH SYNOPSIS
+.nf
+.B #include <sys/socket.h>
+.B #include <linux/x25.h>
+.PP
+.IB x25_socket " = socket(AF_X25, SOCK_SEQPACKET, 0);"
+.fi
+.SH DESCRIPTION
+X25 sockets provide an interface to the X.25 packet layer protocol.
+This allows applications to
+communicate over a public X.25 data network as standardized by
+International Telecommunication Union's recommendation X.25
+(X.25 DTE-DCE mode).
+X25 sockets can also be used for communication
+without an intermediate X.25 network (X.25 DTE-DTE mode) as described
+in ISO-8208.
+.PP
+Message boundaries are preserved \[em] a
+.BR read (2)
+from a socket will
+retrieve the same chunk of data as output with the corresponding
+.BR write (2)
+to the peer socket.
+When necessary, the kernel takes care
+of segmenting and reassembling long messages by means of
+the X.25 M-bit.
+There is no hard-coded upper limit for the
+message size.
+However, reassembling of a long message might fail if
+there is a temporary lack of system resources or when other constraints
+(such as socket memory or buffer size limits) become effective.
+If that
+occurs, the X.25 connection will be reset.
+.SS Socket addresses
+The
+.B AF_X25
+socket address family uses the
+.I struct sockaddr_x25
+for representing network addresses as defined in ITU-T
+recommendation X.121.
+.PP
+.in +4n
+.EX
+struct sockaddr_x25 {
+ sa_family_t sx25_family; /* must be AF_X25 */
+ x25_address sx25_addr; /* X.121 Address */
+};
+.EE
+.in
+.PP
+.I sx25_addr
+contains a char array
+.I x25_addr[]
+to be interpreted as a null-terminated string.
+.I sx25_addr.x25_addr[]
+consists of up to 15 (not counting the terminating null byte) ASCII
+characters forming the X.121 address.
+Only the decimal digit characters from \[aq]0\[aq] to \[aq]9\[aq] are allowed.
+.SS Socket options
+The following X.25-specific socket options can be set by using
+.BR setsockopt (2)
+and read with
+.BR getsockopt (2)
+with the
+.I level
+argument set to
+.BR SOL_X25 .
+.TP
+.B X25_QBITINCL
+Controls whether the X.25 Q-bit (Qualified Data Bit) is accessible by the
+user.
+It expects an integer argument.
+If set to 0 (default),
+the Q-bit is never set for outgoing packets and the Q-bit of incoming
+packets is ignored.
+If set to 1, an additional first byte is prepended
+to each message read from or written to the socket.
+For data read from
+the socket, a 0 first byte indicates that the Q-bits of the corresponding
+incoming data packets were not set.
+A first byte with value 1 indicates
+that the Q-bit of the corresponding incoming data packets was set.
+If the first byte of the data written to the socket is 1, the Q-bit of the
+corresponding outgoing data packets will be set.
+If the first byte is 0,
+the Q-bit will not be set.
+.SH VERSIONS
+The AF_X25 protocol family is a new feature of Linux 2.2.
+.SH BUGS
+Plenty, as the X.25 PLP implementation is
+.BR CONFIG_EXPERIMENTAL .
+.PP
+This man page is incomplete.
+.PP
+There is no dedicated application programmer's header file yet;
+you need to include the kernel header file
+.IR <linux/x25.h> .
+.B CONFIG_EXPERIMENTAL
+might also imply that future versions of the
+interface are not binary compatible.
+.PP
+X.25 N-Reset events are not propagated to the user process yet.
+Thus,
+if a reset occurred, data might be lost without notice.
+.SH SEE ALSO
+.BR socket (2),
+.BR socket (7)
+.PP
+Jonathan Simon Naylor:
+\[lq]The Re-Analysis and Re-Implementation of X.25.\[rq]
+The URL is
+.UR ftp://ftp.pspt.fi\:/pub\:/ham\:/linux\:/ax25\:/x25doc.tgz
+.UE .
diff --git a/man7/xattr.7 b/man7/xattr.7
new file mode 100644
index 0000000..c2f12c9
--- /dev/null
+++ b/man7/xattr.7
@@ -0,0 +1,180 @@
+.\" Extended attributes manual page
+.\"
+.\" Copyright (C) 2000, 2002, 2007 Andreas Gruenbacher <agruen@suse.de>
+.\" Copyright (C) 2001, 2002, 2004, 2007 Silicon Graphics, Inc.
+.\" All rights reserved.
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.TH xattr 7 2023-02-05 "Linux man-pages 6.05.01"
+.SH NAME
+xattr \- Extended attributes
+.SH DESCRIPTION
+Extended attributes are name:value pairs associated permanently with
+files and directories, similar to the environment strings associated
+with a process.
+An attribute may be defined or undefined.
+If it is defined, its value may be empty or non-empty.
+.PP
+Extended attributes are extensions to the normal attributes which are
+associated with all inodes in the system (i.e., the
+.BR stat (2)
+data).
+They are often used to provide additional functionality
+to a filesystem\[em]for example, additional security features such as
+Access Control Lists (ACLs) may be implemented using extended attributes.
+.PP
+Users with search access to a file or directory may use
+.BR listxattr (2)
+to retrieve a list of attribute names defined for that file or directory.
+.PP
+Extended attributes are accessed as atomic objects.
+Reading
+.RB ( getxattr (2))
+retrieves the whole value of an attribute and stores it in a buffer.
+Writing
+.RB ( setxattr (2))
+replaces any previous value with the new value.
+.PP
+Space consumed for extended attributes may be counted towards the disk quotas
+of the file owner and file group.
+.SS Extended attribute namespaces
+Attribute names are null-terminated strings.
+The attribute name is always specified in the fully qualified
+.I namespace.attribute
+form, for example,
+.IR user.mime_type ,
+.IR trusted.md5sum ,
+.IR system.posix_acl_access ,
+or
+.IR security.selinux .
+.PP
+The namespace mechanism is used to define different classes of extended
+attributes.
+These different classes exist for several reasons;
+for example, the permissions
+and capabilities required for manipulating extended attributes of one
+namespace may differ to another.
+.PP
+Currently, the
+.IR security ,
+.IR system ,
+.IR trusted ,
+and
+.I user
+extended attribute classes are defined as described below.
+Additional classes may be added in the future.
+.SS Extended security attributes
+The security attribute namespace is used by kernel security modules,
+such as Security Enhanced Linux, and also to implement file capabilities (see
+.BR capabilities (7)).
+Read and write access permissions to security attributes depend on the
+policy implemented for each security attribute by the security module.
+When no security module is loaded, all processes have read access to
+extended security attributes, and write access is limited to processes
+that have the
+.B CAP_SYS_ADMIN
+capability.
+.SS System extended attributes
+System extended attributes are used by the kernel to store system
+objects such as Access Control Lists.
+Read and write
+access permissions to system attributes depend on the policy implemented
+for each system attribute implemented by filesystems in the kernel.
+.SS Trusted extended attributes
+Trusted extended attributes are visible and accessible only to processes that
+have the
+.B CAP_SYS_ADMIN
+capability.
+Attributes in this class are used to implement mechanisms in user
+space (i.e., outside the kernel) which keep information in extended attributes
+to which ordinary processes should not have access.
+.SS User extended attributes
+User extended attributes may be assigned to files and directories for
+storing arbitrary additional information such as the mime type,
+character set or encoding of a file.
+The access permissions for user
+attributes are defined by the file permission bits:
+read permission is required to retrieve the attribute value,
+and writer permission is required to change it.
+.PP
+The file permission bits of regular files and directories are
+interpreted differently from the file permission bits of special files
+and symbolic links.
+For regular files and directories the file
+permission bits define access to the file's contents, while for device special
+files they define access to the device described by the special file.
+The file permissions of symbolic links are not used in access checks.
+These differences would allow users to consume filesystem resources in
+a way not controllable by disk quotas for group or world writable
+special files and directories.
+.PP
+For this reason,
+user extended attributes are allowed only for regular files and directories,
+and access to user extended attributes is restricted to the
+owner and to users with appropriate capabilities for directories with the
+sticky bit set (see the
+.BR chmod (1)
+manual page for an explanation of the sticky bit).
+.SS Filesystem differences
+The kernel and the filesystem may place limits on the maximum number
+and size of extended attributes that can be associated with a file.
+The VFS-imposed limits on attribute names and values are 255 bytes
+and 64\ kB, respectively.
+The list of attribute names that
+can be returned is also limited to 64\ kB
+(see BUGS in
+.BR listxattr (2)).
+.PP
+Some filesystems, such as Reiserfs (and, historically, ext2 and ext3),
+require the filesystem to be mounted with the
+.B user_xattr
+mount option in order for user extended attributes to be used.
+.PP
+In the current ext2, ext3, and ext4 filesystem implementations,
+the total bytes used by the names and values of all of a file's
+extended attributes must fit in a single filesystem block (1024, 2048
+or 4096 bytes, depending on the block size specified when the
+filesystem was created).
+.PP
+In the Btrfs, XFS, and Reiserfs filesystem implementations, there is no
+practical limit on the number of extended attributes
+associated with a file, and the algorithms used to store extended
+attribute information on disk are scalable.
+.PP
+In the JFS, XFS, and Reiserfs filesystem implementations,
+the limit on bytes used in an EA value is the ceiling imposed by the VFS.
+.PP
+In the Btrfs filesystem implementation,
+the total bytes used for the name, value, and implementation overhead bytes
+is limited to the filesystem
+.I nodesize
+value (16\ kB by default).
+.SH STANDARDS
+Extended attributes are not specified in POSIX.1, but some other systems
+(e.g., the BSDs and Solaris) provide a similar feature.
+.SH NOTES
+Since the filesystems on which extended attributes are stored might also
+be used on architectures with a different byte order and machine word
+size, care should be taken to store attribute values in an
+architecture-independent format.
+.PP
+This page was formerly named
+.BR attr (5).
+.\" .SH AUTHORS
+.\" Andreas Gruenbacher,
+.\" .RI < a.gruenbacher@bestbits.at >
+.\" and the SGI XFS development team,
+.\" .RI < linux-xfs@oss.sgi.com >.
+.SH SEE ALSO
+.BR attr (1),
+.BR getfattr (1),
+.BR setfattr (1),
+.BR getxattr (2),
+.BR ioctl_iflags (2),
+.BR listxattr (2),
+.BR removexattr (2),
+.BR setxattr (2),
+.BR acl (5),
+.BR capabilities (7),
+.BR selinux (8)