summaryrefslogtreecommitdiffstats
path: root/upstream/opensuse-tumbleweed/man2/clone.2
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--upstream/opensuse-tumbleweed/man2/clone.21944
1 files changed, 1944 insertions, 0 deletions
diff --git a/upstream/opensuse-tumbleweed/man2/clone.2 b/upstream/opensuse-tumbleweed/man2/clone.2
new file mode 100644
index 00000000..38d2b905
--- /dev/null
+++ b/upstream/opensuse-tumbleweed/man2/clone.2
@@ -0,0 +1,1944 @@
+'\" t
+.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
+.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013, 2019
+.\"
+.\" SPDX-License-Identifier: GPL-1.0-or-later
+.\"
+.\" Modified by Michael Haardt <michael@moria.de>
+.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
+.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
+.\" New man page (copied from 'fork.2').
+.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
+.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
+.\" Modified 26 Jun 2001 by Michael Kerrisk
+.\" Mostly upgraded to Linux 2.4.x
+.\" Added prototype for sys_clone() plus description
+.\" Added CLONE_THREAD with a brief description of thread groups
+.\" Added CLONE_PARENT and revised entire page remove ambiguity
+.\" between "calling process" and "parent process"
+.\" Added CLONE_PTRACE and CLONE_VFORK
+.\" Added EPERM and EINVAL error codes
+.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
+.\" various other minor tidy ups and clarifications.
+.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
+.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
+.\" Added description for CLONE_NEWNS, which was added in Linux 2.4.19
+.\" Slightly rephrased, aeb.
+.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
+.\" Modified 1 Jan 2004 - various updates, aeb
+.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
+.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
+.\" wrapper under BUGS.
+.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
+.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
+.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
+.\" 2008-11-18, mtk, document CLONE_NEWPID
+.\" 2008-11-19, mtk, document CLONE_NEWUTS
+.\" 2008-11-19, mtk, document CLONE_NEWIPC
+.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
+.\"
+.TH clone 2 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+clone, __clone2, clone3 \- create a child process
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+/* Prototype for the glibc wrapper function */
+.PP
+.B #define _GNU_SOURCE
+.B #include <sched.h>
+.PP
+.BI "int clone(int (*" "fn" ")(void *_Nullable), void *" stack \
+", int " flags ,
+.BI " void *_Nullable " "arg" ", ..." \
+" \fR/*\fP" " pid_t *_Nullable " parent_tid ,
+.BI " void *_Nullable " tls ,
+.BI " pid_t *_Nullable " child_tid " \fR*/\fP );"
+.PP
+/* For the prototype of the raw clone() system call, see NOTES */
+.PP
+.BR "#include <linux/sched.h>" " /* Definition of " "struct clone_args" " */"
+.BR "#include <sched.h>" " /* Definition of " CLONE_* " constants */"
+.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
+.B #include <unistd.h>
+.PP
+.BI "long syscall(SYS_clone3, struct clone_args *" cl_args ", size_t " size );
+.fi
+.PP
+.IR Note :
+glibc provides no wrapper for
+.BR clone3 (),
+necessitating the use of
+.BR syscall (2).
+.SH DESCRIPTION
+These system calls
+create a new ("child") process, in a manner similar to
+.BR fork (2).
+.PP
+By contrast with
+.BR fork (2),
+these system calls provide more precise control over what pieces of execution
+context are shared between the calling process and the child process.
+For example, using these system calls, the caller can control whether
+or not the two processes share the virtual address space,
+the table of file descriptors, and the table of signal handlers.
+These system calls also allow the new child process to be placed
+in separate
+.BR namespaces (7).
+.PP
+Note that in this manual
+page, "calling process" normally corresponds to "parent process".
+But see the descriptions of
+.B CLONE_PARENT
+and
+.B CLONE_THREAD
+below.
+.PP
+This page describes the following interfaces:
+.IP \[bu] 3
+The glibc
+.BR clone ()
+wrapper function and the underlying system call on which it is based.
+The main text describes the wrapper function;
+the differences for the raw system call
+are described toward the end of this page.
+.IP \[bu]
+The newer
+.BR clone3 ()
+system call.
+.PP
+In the remainder of this page, the terminology "the clone call" is used
+when noting details that apply to all of these interfaces,
+.\"
+.SS The clone() wrapper function
+When the child process is created with the
+.BR clone ()
+wrapper function,
+it commences execution by calling the function pointed to by the argument
+.IR fn .
+(This differs from
+.BR fork (2),
+where execution continues in the child from the point
+of the
+.BR fork (2)
+call.)
+The
+.I arg
+argument is passed as the argument of the function
+.IR fn .
+.PP
+When the
+.IR fn ( arg )
+function returns, the child process terminates.
+The integer returned by
+.I fn
+is the exit status for the child process.
+The child process may also terminate explicitly by calling
+.BR exit (2)
+or after receiving a fatal signal.
+.PP
+The
+.I stack
+argument specifies the location of the stack used by the child process.
+Since the child and calling process may share memory,
+it is not possible for the child process to execute in the
+same stack as the calling process.
+The calling process must therefore
+set up memory space for the child stack and pass a pointer to this
+space to
+.BR clone ().
+Stacks grow downward on all processors that run Linux
+(except the HP PA processors), so
+.I stack
+usually points to the topmost address of the memory space set up for
+the child stack.
+Note that
+.BR clone ()
+does not provide a means whereby the caller can inform the kernel of the
+size of the stack area.
+.PP
+The remaining arguments to
+.BR clone ()
+are discussed below.
+.\"
+.SS clone3()
+The
+.BR clone3 ()
+system call provides a superset of the functionality of the older
+.BR clone ()
+interface.
+It also provides a number of API improvements, including:
+space for additional flags bits;
+cleaner separation in the use of various arguments;
+and the ability to specify the size of the child's stack area.
+.PP
+As with
+.BR fork (2),
+.BR clone3 ()
+returns in both the parent and the child.
+It returns 0 in the child process and returns the PID of the child
+in the parent.
+.PP
+The
+.I cl_args
+argument of
+.BR clone3 ()
+is a structure of the following form:
+.PP
+.in +4n
+.EX
+struct clone_args {
+ u64 flags; /* Flags bit mask */
+ u64 pidfd; /* Where to store PID file descriptor
+ (\fIint *\fP) */
+ u64 child_tid; /* Where to store child TID,
+ in child\[aq]s memory (\fIpid_t *\fP) */
+ u64 parent_tid; /* Where to store child TID,
+ in parent\[aq]s memory (\fIpid_t *\fP) */
+ u64 exit_signal; /* Signal to deliver to parent on
+ child termination */
+ u64 stack; /* Pointer to lowest byte of stack */
+ u64 stack_size; /* Size of stack */
+ u64 tls; /* Location of new TLS */
+ u64 set_tid; /* Pointer to a \fIpid_t\fP array
+ (since Linux 5.5) */
+ u64 set_tid_size; /* Number of elements in \fIset_tid\fP
+ (since Linux 5.5) */
+ u64 cgroup; /* File descriptor for target cgroup
+ of child (since Linux 5.7) */
+};
+.EE
+.in
+.PP
+The
+.I size
+argument that is supplied to
+.BR clone3 ()
+should be initialized to the size of this structure.
+(The existence of the
+.I size
+argument permits future extensions to the
+.I clone_args
+structure.)
+.PP
+The stack for the child process is specified via
+.IR cl_args.stack ,
+which points to the lowest byte of the stack area,
+and
+.IR cl_args.stack_size ,
+which specifies the size of the stack in bytes.
+In the case where the
+.B CLONE_VM
+flag (see below) is specified, a stack must be explicitly allocated
+and specified.
+Otherwise, these two fields can be specified as NULL and 0,
+which causes the child to use the same stack area as the parent
+(in the child's own virtual address space).
+.PP
+The remaining fields in the
+.I cl_args
+argument are discussed below.
+.\"
+.SS Equivalence between clone() and clone3() arguments
+Unlike the older
+.BR clone ()
+interface, where arguments are passed individually, in the newer
+.BR clone3 ()
+interface the arguments are packaged into the
+.I clone_args
+structure shown above.
+This structure allows for a superset of the information passed via the
+.BR clone ()
+arguments.
+.PP
+The following table shows the equivalence between the arguments of
+.BR clone ()
+and the fields in the
+.I clone_args
+argument supplied to
+.BR clone3 ():
+.RS 4
+.TS
+lb lb lb
+l l l
+li li l.
+clone() clone3() Notes
+ \fIcl_args\fP field
+flags & \[ti]0xff flags T{
+For most flags; details below
+T}
+parent_tid pidfd See CLONE_PIDFD
+child_tid child_tid See CLONE_CHILD_SETTID
+parent_tid parent_tid See CLONE_PARENT_SETTID
+flags & 0xff exit_signal
+stack stack
+\fP---\fP stack_size
+tls tls See CLONE_SETTLS
+\fP---\fP set_tid See below for details
+\fP---\fP set_tid_size
+\fP---\fP cgroup See CLONE_INTO_CGROUP
+.TE
+.RE
+.\"
+.SS The child termination signal
+When the child process terminates, a signal may be sent to the parent.
+The termination signal is specified in the low byte of
+.I flags
+.RB ( clone ())
+or in
+.I cl_args.exit_signal
+.RB ( clone3 ()).
+If this signal is specified as anything other than
+.BR SIGCHLD ,
+then the parent process must specify the
+.B __WALL
+or
+.B __WCLONE
+options when waiting for the child with
+.BR wait (2).
+If no signal (i.e., zero) is specified, then the parent process is not signaled
+when the child terminates.
+.\"
+.SS The set_tid array
+By default, the kernel chooses the next sequential PID for the new
+process in each of the PID namespaces where it is present.
+When creating a process with
+.BR clone3 (),
+the
+.I set_tid
+array (available since Linux 5.5)
+can be used to select specific PIDs for the process in some
+or all of the PID namespaces where it is present.
+If the PID of the newly created process should be set only for the current
+PID namespace or in the newly created PID namespace (if
+.I flags
+contains
+.BR CLONE_NEWPID )
+then the first element in the
+.I set_tid
+array has to be the desired PID and
+.I set_tid_size
+needs to be 1.
+.PP
+If the PID of the newly created process should have a certain value in
+multiple PID namespaces, then the
+.I set_tid
+array can have multiple entries.
+The first entry defines the PID in the most
+deeply nested PID namespace and each of the following entries contains
+the PID in the
+corresponding ancestor PID namespace.
+The number of PID namespaces in which a PID
+should be set is defined by
+.I set_tid_size
+which cannot be larger than the number of currently nested PID namespaces.
+.PP
+To create a process with the following PIDs in a PID namespace hierarchy:
+.RS 4
+.TS
+lb lb lb
+l l l.
+PID NS level Requested PID Notes
+0 31496 Outermost PID namespace
+1 42
+2 7 Innermost PID namespace
+.TE
+.RE
+.PP
+Set the array to:
+.PP
+.in +4n
+.EX
+set_tid[0] = 7;
+set_tid[1] = 42;
+set_tid[2] = 31496;
+set_tid_size = 3;
+.EE
+.in
+.PP
+If only the PIDs in the two innermost PID namespaces
+need to be specified, set the array to:
+.PP
+.in +4n
+.EX
+set_tid[0] = 7;
+set_tid[1] = 42;
+set_tid_size = 2;
+.EE
+.in
+.PP
+The PID in the PID namespaces outside the two innermost PID namespaces
+is selected the same way as any other PID is selected.
+.PP
+The
+.I set_tid
+feature requires
+.B CAP_SYS_ADMIN
+or
+(since Linux 5.9)
+.\" commit 124ea650d3072b005457faed69909221c2905a1f
+.\" commit 1caef81da05a84a40dbf02110e967ce6d1135ff6
+.B CAP_CHECKPOINT_RESTORE
+in all owning user namespaces of the target PID namespaces.
+.PP
+Callers may only choose a PID greater than 1 in a given PID namespace
+if an
+.B init
+process (i.e., a process with PID 1) already exists in that namespace.
+Otherwise the PID
+entry for this PID namespace must be 1.
+.\"
+.SS The flags mask
+Both
+.BR clone ()
+and
+.BR clone3 ()
+allow a flags bit mask that modifies their behavior
+and allows the caller to specify what is shared between the calling process
+and the child process.
+This bit mask\[em]the
+.I flags
+argument of
+.BR clone ()
+or the
+.I cl_args.flags
+field passed to
+.BR clone3 ()\[em]is
+referred to as the
+.I flags
+mask in the remainder of this page.
+.PP
+The
+.I flags
+mask is specified as a bitwise OR of zero or more of
+the constants listed below.
+Except as noted below, these flags are available
+(and have the same effect) in both
+.BR clone ()
+and
+.BR clone3 ().
+.TP
+.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
+Clear (zero) the child thread ID at the location pointed to by
+.I child_tid
+.RB ( clone ())
+or
+.I cl_args.child_tid
+.RB ( clone3 ())
+in child memory when the child exits, and do a wakeup on the futex
+at that address.
+The address involved may be changed by the
+.BR set_tid_address (2)
+system call.
+This is used by threading libraries.
+.TP
+.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
+Store the child thread ID at the location pointed to by
+.I child_tid
+.RB ( clone ())
+or
+.I cl_args.child_tid
+.RB ( clone3 ())
+in the child's memory.
+The store operation completes before the clone call
+returns control to user space in the child process.
+(Note that the store operation may not have completed before the clone call
+returns in the parent process, which is relevant if the
+.B CLONE_VM
+flag is also employed.)
+.TP
+.BR CLONE_CLEAR_SIGHAND " (since Linux 5.5)"
+.\" commit b612e5df4587c934bd056bf05f4a1deca4de4f75
+By default, signal dispositions in the child thread are the same as
+in the parent.
+If this flag is specified,
+then all signals that are handled in the parent
+are reset to their default dispositions
+.RB ( SIG_DFL )
+in the child.
+.IP
+Specifying this flag together with
+.B CLONE_SIGHAND
+is nonsensical and disallowed.
+.TP
+.BR CLONE_DETACHED " (historical)"
+For a while (during the Linux 2.5 development series)
+.\" added in Linux 2.5.32; removed in Linux 2.6.0-test4
+there was a
+.B CLONE_DETACHED
+flag,
+which caused the parent not to receive a signal when the child terminated.
+Ultimately, the effect of this flag was subsumed under the
+.B CLONE_THREAD
+flag and by the time Linux 2.6.0 was released, this flag had no effect.
+Starting in Linux 2.6.2, the need to give this flag together with
+.B CLONE_THREAD
+disappeared.
+.IP
+This flag is still defined, but it is usually ignored when calling
+.BR clone ().
+However, see the description of
+.B CLONE_PIDFD
+for some exceptions.
+.TP
+.BR CLONE_FILES " (since Linux 2.0)"
+If
+.B CLONE_FILES
+is set, the calling process and the child process share the same file
+descriptor table.
+Any file descriptor created by the calling process or by the child
+process is also valid in the other process.
+Similarly, if one of the processes closes a file descriptor,
+or changes its associated flags (using the
+.BR fcntl (2)
+.B F_SETFD
+operation), the other process is also affected.
+If a process sharing a file descriptor table calls
+.BR execve (2),
+its file descriptor table is duplicated (unshared).
+.IP
+If
+.B CLONE_FILES
+is not set, the child process inherits a copy of all file descriptors
+opened in the calling process at the time of the clone call.
+Subsequent operations that open or close file descriptors,
+or change file descriptor flags,
+performed by either the calling
+process or the child process do not affect the other process.
+Note, however,
+that the duplicated file descriptors in the child refer to the same
+open file descriptions as the corresponding file descriptors
+in the calling process,
+and thus share file offsets and file status flags (see
+.BR open (2)).
+.TP
+.BR CLONE_FS " (since Linux 2.0)"
+If
+.B CLONE_FS
+is set, the caller and the child process share the same filesystem
+information.
+This includes the root of the filesystem, the current
+working directory, and the umask.
+Any call to
+.BR chroot (2),
+.BR chdir (2),
+or
+.BR umask (2)
+performed by the calling process or the child process also affects the
+other process.
+.IP
+If
+.B CLONE_FS
+is not set, the child process works on a copy of the filesystem
+information of the calling process at the time of the clone call.
+Calls to
+.BR chroot (2),
+.BR chdir (2),
+or
+.BR umask (2)
+performed later by one of the processes do not affect the other process.
+.TP
+.BR CLONE_INTO_CGROUP " (since Linux 5.7)"
+.\" commit ef2c41cf38a7559bbf91af42d5b6a4429db8fc68
+By default, a child process is placed in the same version 2
+cgroup as its parent.
+The
+.B CLONE_INTO_CGROUP
+flag allows the child process to be created in a different version 2 cgroup.
+(Note that
+.B CLONE_INTO_CGROUP
+has effect only for version 2 cgroups.)
+.IP
+In order to place the child process in a different cgroup,
+the caller specifies
+.B CLONE_INTO_CGROUP
+in
+.I cl_args.flags
+and passes a file descriptor that refers to a version 2 cgroup in the
+.I cl_args.cgroup
+field.
+(This file descriptor can be obtained by opening a cgroup v2 directory
+using either the
+.B O_RDONLY
+or the
+.B O_PATH
+flag.)
+Note that all of the usual restrictions (described in
+.BR cgroups (7))
+on placing a process into a version 2 cgroup apply.
+.IP
+Among the possible use cases for
+.B CLONE_INTO_CGROUP
+are the following:
+.RS
+.IP \[bu] 3
+Spawning a process into a cgroup different from the parent's cgroup
+makes it possible for a service manager to directly spawn new
+services into dedicated cgroups.
+This eliminates the accounting
+jitter that would be caused if the child process was first created in the
+same cgroup as the parent and then
+moved into the target cgroup.
+Furthermore, spawning the child process directly into a target cgroup
+is significantly cheaper than moving the child process into
+the target cgroup after it has been created.
+.IP \[bu]
+The
+.B CLONE_INTO_CGROUP
+flag also allows the creation of
+frozen child processes by spawning them into a frozen cgroup.
+(See
+.BR cgroups (7)
+for a description of the freezer controller.)
+.IP \[bu]
+For threaded applications (or even thread implementations which
+make use of cgroups to limit individual threads), it is possible to
+establish a fixed cgroup layout before spawning each thread
+directly into its target cgroup.
+.RE
+.TP
+.BR CLONE_IO " (since Linux 2.6.25)"
+If
+.B CLONE_IO
+is set, then the new process shares an I/O context with
+the calling process.
+If this flag is not set, then (as with
+.BR fork (2))
+the new process has its own I/O context.
+.IP
+.\" The following based on text from Jens Axboe
+The I/O context is the I/O scope of the disk scheduler (i.e.,
+what the I/O scheduler uses to model scheduling of a process's I/O).
+If processes share the same I/O context,
+they are treated as one by the I/O scheduler.
+As a consequence, they get to share disk time.
+For some I/O schedulers,
+.\" the anticipatory and CFQ scheduler
+if two processes share an I/O context,
+they will be allowed to interleave their disk access.
+If several threads are doing I/O on behalf of the same process
+.RB ( aio_read (3),
+for instance), they should employ
+.B CLONE_IO
+to get better I/O performance.
+.\" with CFQ and AS.
+.IP
+If the kernel is not configured with the
+.B CONFIG_BLOCK
+option, this flag is a no-op.
+.TP
+.BR CLONE_NEWCGROUP " (since Linux 4.6)"
+Create the process in a new cgroup namespace.
+If this flag is not set, then (as with
+.BR fork (2))
+the process is created in the same cgroup namespaces as the calling process.
+.IP
+For further information on cgroup namespaces, see
+.BR cgroup_namespaces (7).
+.IP
+Only a privileged process
+.RB ( CAP_SYS_ADMIN )
+can employ
+.BR CLONE_NEWCGROUP .
+.\"
+.TP
+.BR CLONE_NEWIPC " (since Linux 2.6.19)"
+If
+.B CLONE_NEWIPC
+is set, then create the process in a new IPC namespace.
+If this flag is not set, then (as with
+.BR fork (2)),
+the process is created in the same IPC namespace as
+the calling process.
+.IP
+For further information on IPC namespaces, see
+.BR ipc_namespaces (7).
+.IP
+Only a privileged process
+.RB ( CAP_SYS_ADMIN )
+can employ
+.BR CLONE_NEWIPC .
+This flag can't be specified in conjunction with
+.BR CLONE_SYSVSEM .
+.TP
+.BR CLONE_NEWNET " (since Linux 2.6.24)"
+(The implementation of this flag was completed only
+by about Linux 2.6.29.)
+.IP
+If
+.B CLONE_NEWNET
+is set, then create the process in a new network namespace.
+If this flag is not set, then (as with
+.BR fork (2))
+the process is created in the same network namespace as
+the calling process.
+.IP
+For further information on network namespaces, see
+.BR network_namespaces (7).
+.IP
+Only a privileged process
+.RB ( CAP_SYS_ADMIN )
+can employ
+.BR CLONE_NEWNET .
+.TP
+.BR CLONE_NEWNS " (since Linux 2.4.19)"
+If
+.B CLONE_NEWNS
+is set, the cloned child is started in a new mount namespace,
+initialized with a copy of the namespace of the parent.
+If
+.B CLONE_NEWNS
+is not set, the child lives in the same mount
+namespace as the parent.
+.IP
+For further information on mount namespaces, see
+.BR namespaces (7)
+and
+.BR mount_namespaces (7).
+.IP
+Only a privileged process
+.RB ( CAP_SYS_ADMIN )
+can employ
+.BR CLONE_NEWNS .
+It is not permitted to specify both
+.B CLONE_NEWNS
+and
+.B CLONE_FS
+.\" See https://lwn.net/Articles/543273/
+in the same clone call.
+.TP
+.BR CLONE_NEWPID " (since Linux 2.6.24)"
+.\" This explanation draws a lot of details from
+.\" http://lwn.net/Articles/259217/
+.\" Authors: Pavel Emelyanov <xemul@openvz.org>
+.\" and Kir Kolyshkin <kir@openvz.org>
+.\"
+.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
+.\" Author: Pavel Emelyanov <xemul@openvz.org>
+If
+.B CLONE_NEWPID
+is set, then create the process in a new PID namespace.
+If this flag is not set, then (as with
+.BR fork (2))
+the process is created in the same PID namespace as
+the calling process.
+.IP
+For further information on PID namespaces, see
+.BR namespaces (7)
+and
+.BR pid_namespaces (7).
+.IP
+Only a privileged process
+.RB ( CAP_SYS_ADMIN )
+can employ
+.BR CLONE_NEWPID .
+This flag can't be specified in conjunction with
+.B CLONE_THREAD
+or
+.BR CLONE_PARENT .
+.TP
+.B CLONE_NEWUSER
+(This flag first became meaningful for
+.BR clone ()
+in Linux 2.6.23,
+the current
+.BR clone ()
+semantics were merged in Linux 3.5,
+and the final pieces to make the user namespaces completely usable were
+merged in Linux 3.8.)
+.IP
+If
+.B CLONE_NEWUSER
+is set, then create the process in a new user namespace.
+If this flag is not set, then (as with
+.BR fork (2))
+the process is created in the same user namespace as the calling process.
+.IP
+For further information on user namespaces, see
+.BR namespaces (7)
+and
+.BR user_namespaces (7).
+.IP
+Before Linux 3.8, use of
+.B CLONE_NEWUSER
+required that the caller have three capabilities:
+.BR CAP_SYS_ADMIN ,
+.BR CAP_SETUID ,
+and
+.BR CAP_SETGID .
+.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
+Starting with Linux 3.8,
+no privileges are needed to create a user namespace.
+.IP
+This flag can't be specified in conjunction with
+.B CLONE_THREAD
+or
+.BR CLONE_PARENT .
+For security reasons,
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
+.\" https://lwn.net/Articles/543273/
+.\" The fix actually went into Linux 3.9 and into Linux 3.8.3. However, user namespaces
+.\" were, for practical purposes, unusable in earlier Linux 3.8.x because of the
+.\" various filesystems that didn't support userns.
+.B CLONE_NEWUSER
+cannot be specified in conjunction with
+.BR CLONE_FS .
+.TP
+.BR CLONE_NEWUTS " (since Linux 2.6.19)"
+If
+.B CLONE_NEWUTS
+is set, then create the process in a new UTS namespace,
+whose identifiers are initialized by duplicating the identifiers
+from the UTS namespace of the calling process.
+If this flag is not set, then (as with
+.BR fork (2))
+the process is created in the same UTS namespace as
+the calling process.
+.IP
+For further information on UTS namespaces, see
+.BR uts_namespaces (7).
+.IP
+Only a privileged process
+.RB ( CAP_SYS_ADMIN )
+can employ
+.BR CLONE_NEWUTS .
+.TP
+.BR CLONE_PARENT " (since Linux 2.3.12)"
+If
+.B CLONE_PARENT
+is set, then the parent of the new child (as returned by
+.BR getppid (2))
+will be the same as that of the calling process.
+.IP
+If
+.B CLONE_PARENT
+is not set, then (as with
+.BR fork (2))
+the child's parent is the calling process.
+.IP
+Note that it is the parent process, as returned by
+.BR getppid (2),
+which is signaled when the child terminates, so that
+if
+.B CLONE_PARENT
+is set, then the parent of the calling process, rather than the
+calling process itself, is signaled.
+.IP
+The
+.B CLONE_PARENT
+flag can't be used in clone calls by the
+global init process (PID 1 in the initial PID namespace)
+and init processes in other PID namespaces.
+This restriction prevents the creation of multi-rooted process trees
+as well as the creation of unreapable zombies in the initial PID namespace.
+.TP
+.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
+Store the child thread ID at the location pointed to by
+.I parent_tid
+.RB ( clone ())
+or
+.I cl_args.parent_tid
+.RB ( clone3 ())
+in the parent's memory.
+(In Linux 2.5.32-2.5.48 there was a flag
+.B CLONE_SETTID
+that did this.)
+The store operation completes before the clone call
+returns control to user space.
+.TP
+.BR CLONE_PID " (Linux 2.0 to Linux 2.5.15)"
+If
+.B CLONE_PID
+is set, the child process is created with the same process ID as
+the calling process.
+This is good for hacking the system, but otherwise
+of not much use.
+From Linux 2.3.21 onward, this flag could be
+specified only by the system boot process (PID 0).
+The flag disappeared completely from the kernel sources in Linux 2.5.16.
+Subsequently, the kernel silently ignored this bit if it was specified in the
+.I flags
+mask.
+Much later, the same bit was recycled for use as the
+.B CLONE_PIDFD
+flag.
+.TP
+.BR CLONE_PIDFD " (since Linux 5.2)"
+.\" commit b3e5838252665ee4cfa76b82bdf1198dca81e5be
+If this flag is specified,
+a PID file descriptor referring to the child process is allocated
+and placed at a specified location in the parent's memory.
+The close-on-exec flag is set on this new file descriptor.
+PID file descriptors can be used for the purposes described in
+.BR pidfd_open (2).
+.RS
+.IP \[bu] 3
+When using
+.BR clone3 (),
+the PID file descriptor is placed at the location pointed to by
+.IR cl_args.pidfd .
+.IP \[bu]
+When using
+.BR clone (),
+the PID file descriptor is placed at the location pointed to by
+.IR parent_tid .
+Since the
+.I parent_tid
+argument is used to return the PID file descriptor,
+.B CLONE_PIDFD
+cannot be used with
+.B CLONE_PARENT_SETTID
+when calling
+.BR clone ().
+.RE
+.IP
+It is currently not possible to use this flag together with
+.B CLONE_THREAD.
+This means that the process identified by the PID file descriptor
+will always be a thread group leader.
+.IP
+If the obsolete
+.B CLONE_DETACHED
+flag is specified alongside
+.B CLONE_PIDFD
+when calling
+.BR clone (),
+an error is returned.
+An error also results if
+.B CLONE_DETACHED
+is specified when calling
+.BR clone3 ().
+This error behavior ensures that the bit corresponding to
+.B CLONE_DETACHED
+can be reused for further PID file descriptor features in the future.
+.TP
+.BR CLONE_PTRACE " (since Linux 2.2)"
+If
+.B CLONE_PTRACE
+is specified, and the calling process is being traced,
+then trace the child also (see
+.BR ptrace (2)).
+.TP
+.BR CLONE_SETTLS " (since Linux 2.5.32)"
+The TLS (Thread Local Storage) descriptor is set to
+.IR tls .
+.IP
+The interpretation of
+.I tls
+and the resulting effect is architecture dependent.
+On x86,
+.I tls
+is interpreted as a
+.I struct user_desc\~*
+(see
+.BR set_thread_area (2)).
+On x86-64 it is the new value to be set for the %fs base register
+(see the
+.B ARCH_SET_FS
+argument to
+.BR arch_prctl (2)).
+On architectures with a dedicated TLS register, it is the new value
+of that register.
+.IP
+Use of this flag requires detailed knowledge and generally it
+should not be used except in libraries implementing threading.
+.TP
+.BR CLONE_SIGHAND " (since Linux 2.0)"
+If
+.B CLONE_SIGHAND
+is set, the calling process and the child process share the same table of
+signal handlers.
+If the calling process or child process calls
+.BR sigaction (2)
+to change the behavior associated with a signal, the behavior is
+changed in the other process as well.
+However, the calling process and child
+processes still have distinct signal masks and sets of pending
+signals.
+So, one of them may block or unblock signals using
+.BR sigprocmask (2)
+without affecting the other process.
+.IP
+If
+.B CLONE_SIGHAND
+is not set, the child process inherits a copy of the signal handlers
+of the calling process at the time of the clone call.
+Calls to
+.BR sigaction (2)
+performed later by one of the processes have no effect on the other
+process.
+.IP
+Since Linux 2.6.0,
+.\" Precisely: Linux 2.6.0-test6
+the
+.I flags
+mask must also include
+.B CLONE_VM
+if
+.B CLONE_SIGHAND
+is specified.
+.TP
+.BR CLONE_STOPPED " (since Linux 2.6.0)"
+.\" Precisely: Linux 2.6.0-test2
+If
+.B CLONE_STOPPED
+is set, then the child is initially stopped (as though it was sent a
+.B SIGSTOP
+signal), and must be resumed by sending it a
+.B SIGCONT
+signal.
+.IP
+This flag was
+.I deprecated
+from Linux 2.6.25 onward,
+and was
+.I removed
+altogether in Linux 2.6.38.
+Since then, the kernel silently ignores it without error.
+.\" glibc 2.8 removed this defn from bits/sched.h
+Starting with Linux 4.6, the same bit was reused for the
+.B CLONE_NEWCGROUP
+flag.
+.TP
+.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
+If
+.B CLONE_SYSVSEM
+is set, then the child and the calling process share
+a single list of System V semaphore adjustment
+.RI ( semadj )
+values (see
+.BR semop (2)).
+In this case, the shared list accumulates
+.I semadj
+values across all processes sharing the list,
+and semaphore adjustments are performed only when the last process
+that is sharing the list terminates (or ceases sharing the list using
+.BR unshare (2)).
+If this flag is not set, then the child has a separate
+.I semadj
+list that is initially empty.
+.TP
+.BR CLONE_THREAD " (since Linux 2.4.0)"
+.\" Precisely: Linux 2.6.0-test8
+If
+.B CLONE_THREAD
+is set, the child is placed in the same thread group as the calling process.
+To make the remainder of the discussion of
+.B CLONE_THREAD
+more readable, the term "thread" is used to refer to the
+processes within a thread group.
+.IP
+Thread groups were a feature added in Linux 2.4 to support the
+POSIX threads notion of a set of threads that share a single PID.
+Internally, this shared PID is the so-called
+thread group identifier (TGID) for the thread group.
+Since Linux 2.4, calls to
+.BR getpid (2)
+return the TGID of the caller.
+.IP
+The threads within a group can be distinguished by their (system-wide)
+unique thread IDs (TID).
+A new thread's TID is available as the function result
+returned to the caller,
+and a thread can obtain
+its own TID using
+.BR gettid (2).
+.IP
+When a clone call is made without specifying
+.BR CLONE_THREAD ,
+then the resulting thread is placed in a new thread group
+whose TGID is the same as the thread's TID.
+This thread is the
+.I leader
+of the new thread group.
+.IP
+A new thread created with
+.B CLONE_THREAD
+has the same parent process as the process that made the clone call
+(i.e., like
+.BR CLONE_PARENT ),
+so that calls to
+.BR getppid (2)
+return the same value for all of the threads in a thread group.
+When a
+.B CLONE_THREAD
+thread terminates, the thread that created it is not sent a
+.B SIGCHLD
+(or other termination) signal;
+nor can the status of such a thread be obtained
+using
+.BR wait (2).
+(The thread is said to be
+.IR detached .)
+.IP
+After all of the threads in a thread group terminate
+the parent process of the thread group is sent a
+.B SIGCHLD
+(or other termination) signal.
+.IP
+If any of the threads in a thread group performs an
+.BR execve (2),
+then all threads other than the thread group leader are terminated,
+and the new program is executed in the thread group leader.
+.IP
+If one of the threads in a thread group creates a child using
+.BR fork (2),
+then any thread in the group can
+.BR wait (2)
+for that child.
+.IP
+Since Linux 2.5.35, the
+.I flags
+mask must also include
+.B CLONE_SIGHAND
+if
+.B CLONE_THREAD
+is specified
+(and note that, since Linux 2.6.0,
+.\" Precisely: Linux 2.6.0-test6
+.B CLONE_SIGHAND
+also requires
+.B CLONE_VM
+to be included).
+.IP
+Signal dispositions and actions are process-wide:
+if an unhandled signal is delivered to a thread, then
+it will affect (terminate, stop, continue, be ignored in)
+all members of the thread group.
+.IP
+Each thread has its own signal mask, as set by
+.BR sigprocmask (2).
+.IP
+A signal may be process-directed or thread-directed.
+A process-directed signal is targeted at a thread group (i.e., a TGID),
+and is delivered to an arbitrarily selected thread from among those
+that are not blocking the signal.
+A signal may be process-directed because it was generated by the kernel
+for reasons other than a hardware exception, or because it was sent using
+.BR kill (2)
+or
+.BR sigqueue (3).
+A thread-directed signal is targeted at (i.e., delivered to)
+a specific thread.
+A signal may be thread directed because it was sent using
+.BR tgkill (2)
+or
+.BR pthread_sigqueue (3),
+or because the thread executed a machine language instruction that triggered
+a hardware exception
+(e.g., invalid memory access triggering
+.B SIGSEGV
+or a floating-point exception triggering
+.BR SIGFPE ).
+.IP
+A call to
+.BR sigpending (2)
+returns a signal set that is the union of the pending process-directed
+signals and the signals that are pending for the calling thread.
+.IP
+If a process-directed signal is delivered to a thread group,
+and the thread group has installed a handler for the signal, then
+the handler is invoked in exactly one, arbitrarily selected
+member of the thread group that has not blocked the signal.
+If multiple threads in a group are waiting to accept the same signal using
+.BR sigwaitinfo (2),
+the kernel will arbitrarily select one of these threads
+to receive the signal.
+.TP
+.BR CLONE_UNTRACED " (since Linux 2.5.46)"
+If
+.B CLONE_UNTRACED
+is specified, then a tracing process cannot force
+.B CLONE_PTRACE
+on this child process.
+.TP
+.BR CLONE_VFORK " (since Linux 2.2)"
+If
+.B CLONE_VFORK
+is set, the execution of the calling process is suspended
+until the child releases its virtual memory
+resources via a call to
+.BR execve (2)
+or
+.BR _exit (2)
+(as with
+.BR vfork (2)).
+.IP
+If
+.B CLONE_VFORK
+is not set, then both the calling process and the child are schedulable
+after the call, and an application should not rely on execution occurring
+in any particular order.
+.TP
+.BR CLONE_VM " (since Linux 2.0)"
+If
+.B CLONE_VM
+is set, the calling process and the child process run in the same memory
+space.
+In particular, memory writes performed by the calling process
+or by the child process are also visible in the other process.
+Moreover, any memory mapping or unmapping performed with
+.BR mmap (2)
+or
+.BR munmap (2)
+by the child or calling process also affects the other process.
+.IP
+If
+.B CLONE_VM
+is not set, the child process runs in a separate copy of the memory
+space of the calling process at the time of the clone call.
+Memory writes or file mappings/unmappings performed by one of the
+processes do not affect the other, as with
+.BR fork (2).
+.IP
+If the
+.B CLONE_VM
+flag is specified and the
+.B CLONE_VFORK
+flag is not specified,
+then any alternate signal stack that was established by
+.BR sigaltstack (2)
+is cleared in the child process.
+.SH RETURN VALUE
+.\" gettid(2) returns current->pid;
+.\" getpid(2) returns current->tgid;
+On success, the thread ID of the child process is returned
+in the caller's thread of execution.
+On failure, \-1 is returned
+in the caller's context, no child process is created, and
+.I errno
+is set to indicate the error.
+.SH ERRORS
+.TP
+.BR EACCES " (" clone3 "() only)"
+.B CLONE_INTO_CGROUP
+was specified in
+.IR cl_args.flags ,
+but the restrictions (described in
+.BR cgroups (7))
+on placing the child process into the version 2 cgroup referred to by
+.I cl_args.cgroup
+are not met.
+.TP
+.B EAGAIN
+Too many processes are already running; see
+.BR fork (2).
+.TP
+.BR EBUSY " (" clone3 "() only)"
+.B CLONE_INTO_CGROUP
+was specified in
+.IR cl_args.flags ,
+but the file descriptor specified in
+.I cl_args.cgroup
+refers to a version 2 cgroup in which a domain controller is enabled.
+.TP
+.BR EEXIST " (" clone3 "() only)"
+One (or more) of the PIDs specified in
+.I set_tid
+already exists in the corresponding PID namespace.
+.TP
+.B EINVAL
+Both
+.B CLONE_SIGHAND
+and
+.B CLONE_CLEAR_SIGHAND
+were specified in the
+.I flags
+mask.
+.TP
+.B EINVAL
+.B CLONE_SIGHAND
+was specified in the
+.I flags
+mask, but
+.B CLONE_VM
+was not.
+(Since Linux 2.6.0.)
+.\" Precisely: Linux 2.6.0-test6
+.TP
+.B EINVAL
+.B CLONE_THREAD
+was specified in the
+.I flags
+mask, but
+.B CLONE_SIGHAND
+was not.
+(Since Linux 2.5.35.)
+.\" .TP
+.\" .B EINVAL
+.\" Precisely one of
+.\" .B CLONE_DETACHED
+.\" and
+.\" .B CLONE_THREAD
+.\" was specified.
+.\" (Since Linux 2.6.0-test6.)
+.TP
+.B EINVAL
+.B CLONE_THREAD
+was specified in the
+.I flags
+mask, but the current process previously called
+.BR unshare (2)
+with the
+.B CLONE_NEWPID
+flag or used
+.BR setns (2)
+to reassociate itself with a PID namespace.
+.TP
+.B EINVAL
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
+Both
+.B CLONE_FS
+and
+.B CLONE_NEWNS
+were specified in the
+.I flags
+mask.
+.TP
+.BR EINVAL " (since Linux 3.9)"
+Both
+.B CLONE_NEWUSER
+and
+.B CLONE_FS
+were specified in the
+.I flags
+mask.
+.TP
+.B EINVAL
+Both
+.B CLONE_NEWIPC
+and
+.B CLONE_SYSVSEM
+were specified in the
+.I flags
+mask.
+.TP
+.B EINVAL
+One (or both) of
+.B CLONE_NEWPID
+or
+.B CLONE_NEWUSER
+and one (or both) of
+.B CLONE_THREAD
+or
+.B CLONE_PARENT
+were specified in the
+.I flags
+mask.
+.TP
+.BR EINVAL " (since Linux 2.6.32)"
+.\" commit 123be07b0b399670a7cc3d82fef0cb4f93ef885c
+.B CLONE_PARENT
+was specified, and the caller is an init process.
+.TP
+.B EINVAL
+Returned by the glibc
+.BR clone ()
+wrapper function when
+.I fn
+or
+.I stack
+is specified as NULL.
+.TP
+.B EINVAL
+.B CLONE_NEWIPC
+was specified in the
+.I flags
+mask,
+but the kernel was not configured with the
+.B CONFIG_SYSVIPC
+and
+.B CONFIG_IPC_NS
+options.
+.TP
+.B EINVAL
+.B CLONE_NEWNET
+was specified in the
+.I flags
+mask,
+but the kernel was not configured with the
+.B CONFIG_NET_NS
+option.
+.TP
+.B EINVAL
+.B CLONE_NEWPID
+was specified in the
+.I flags
+mask,
+but the kernel was not configured with the
+.B CONFIG_PID_NS
+option.
+.TP
+.B EINVAL
+.B CLONE_NEWUSER
+was specified in the
+.I flags
+mask,
+but the kernel was not configured with the
+.B CONFIG_USER_NS
+option.
+.TP
+.B EINVAL
+.B CLONE_NEWUTS
+was specified in the
+.I flags
+mask,
+but the kernel was not configured with the
+.B CONFIG_UTS_NS
+option.
+.TP
+.B EINVAL
+.I stack
+is not aligned to a suitable boundary for this architecture.
+For example, on aarch64,
+.I stack
+must be a multiple of 16.
+.TP
+.BR EINVAL " (" clone3 "() only)"
+.B CLONE_DETACHED
+was specified in the
+.I flags
+mask.
+.TP
+.BR EINVAL " (" clone "() only)"
+.B CLONE_PIDFD
+was specified together with
+.B CLONE_DETACHED
+in the
+.I flags
+mask.
+.TP
+.B EINVAL
+.B CLONE_PIDFD
+was specified together with
+.B CLONE_THREAD
+in the
+.I flags
+mask.
+.TP
+.BR "EINVAL " "(" clone "() only)"
+.B CLONE_PIDFD
+was specified together with
+.B CLONE_PARENT_SETTID
+in the
+.I flags
+mask.
+.TP
+.BR EINVAL " (" clone3 "() only)"
+.I set_tid_size
+is greater than the number of nested PID namespaces.
+.TP
+.BR EINVAL " (" clone3 "() only)"
+One of the PIDs specified in
+.I set_tid
+was an invalid.
+.TP
+.BR EINVAL " (" clone3 "() only)"
+.\" commit 7f192e3cd316ba58c88dfa26796cf77789dd9872
+.B CLONE_THREAD
+or
+.B CLONE_PARENT
+was specified in the
+.I flags
+mask, but a signal was specified in
+.I exit_signal.
+.TP
+.BR EINVAL " (AArch64 only, Linux 4.6 and earlier)"
+.I stack
+was not aligned to a 128-bit boundary.
+.TP
+.B ENOMEM
+Cannot allocate sufficient memory to allocate a task structure for the
+child, or to copy those parts of the caller's context that need to be
+copied.
+.TP
+.BR ENOSPC " (since Linux 3.7)"
+.\" commit f2302505775fd13ba93f034206f1e2a587017929
+.B CLONE_NEWPID
+was specified in the
+.I flags
+mask,
+but the limit on the nesting depth of PID namespaces
+would have been exceeded; see
+.BR pid_namespaces (7).
+.TP
+.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
+.B CLONE_NEWUSER
+was specified in the
+.I flags
+mask, and the call would cause the limit on the number of
+nested user namespaces to be exceeded.
+See
+.BR user_namespaces (7).
+.IP
+From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
+.BR EUSERS .
+.TP
+.BR ENOSPC " (since Linux 4.9)"
+One of the values in the
+.I flags
+mask specified the creation of a new user namespace,
+but doing so would have caused the limit defined by the corresponding file in
+.I /proc/sys/user
+to be exceeded.
+For further details, see
+.BR namespaces (7).
+.TP
+.BR EOPNOTSUPP " (" clone3 "() only)"
+.B CLONE_INTO_CGROUP
+was specified in
+.IR cl_args.flags ,
+but the file descriptor specified in
+.I cl_args.cgroup
+refers to a version 2 cgroup that is in the
+.I domain invalid
+state.
+.TP
+.B EPERM
+.BR CLONE_NEWCGROUP ,
+.BR CLONE_NEWIPC ,
+.BR CLONE_NEWNET ,
+.BR CLONE_NEWNS ,
+.BR CLONE_NEWPID ,
+or
+.B CLONE_NEWUTS
+was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
+.TP
+.B EPERM
+.B CLONE_PID
+was specified by a process other than process 0.
+(This error occurs only on Linux 2.5.15 and earlier.)
+.TP
+.B EPERM
+.B CLONE_NEWUSER
+was specified in the
+.I flags
+mask,
+but either the effective user ID or the effective group ID of the caller
+does not have a mapping in the parent namespace (see
+.BR user_namespaces (7)).
+.TP
+.BR EPERM " (since Linux 3.9)"
+.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
+.B CLONE_NEWUSER
+was specified in the
+.I flags
+mask and the caller is in a chroot environment
+.\" FIXME What is the rationale for this restriction?
+(i.e., the caller's root directory does not match the root directory
+of the mount namespace in which it resides).
+.TP
+.BR EPERM " (" clone3 "() only)"
+.I set_tid_size
+was greater than zero, and the caller lacks the
+.B CAP_SYS_ADMIN
+capability in one or more of the user namespaces that own the
+corresponding PID namespaces.
+.TP
+.BR ERESTARTNOINTR " (since Linux 2.6.17)"
+.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
+System call was interrupted by a signal and will be restarted.
+(This can be seen only during a trace.)
+.TP
+.BR EUSERS " (Linux 3.11 to Linux 4.8)"
+.B CLONE_NEWUSER
+was specified in the
+.I flags
+mask,
+and the limit on the number of nested user namespaces would be exceeded.
+See the discussion of the
+.B ENOSPC
+error above.
+.SH VERSIONS
+The glibc
+.BR clone ()
+wrapper function makes some changes
+in the memory pointed to by
+.I stack
+(changes required to set the stack up correctly for the child)
+.I before
+invoking the
+.BR clone ()
+system call.
+So, in cases where
+.BR clone ()
+is used to recursively create children,
+do not use the buffer employed for the parent's stack
+as the stack of the child.
+.PP
+On i386,
+.BR clone ()
+should not be called through vsyscall, but directly through
+.IR "int $0x80" .
+.SS C library/kernel differences
+The raw
+.BR clone ()
+system call corresponds more closely to
+.BR fork (2)
+in that execution in the child continues from the point of the
+call.
+As such, the
+.I fn
+and
+.I arg
+arguments of the
+.BR clone ()
+wrapper function are omitted.
+.PP
+In contrast to the glibc wrapper, the raw
+.BR clone ()
+system call accepts NULL as a
+.I stack
+argument (and
+.BR clone3 ()
+likewise allows
+.I cl_args.stack
+to be NULL).
+In this case, the child uses a duplicate of the parent's stack.
+(Copy-on-write semantics ensure that the child gets separate copies
+of stack pages when either process modifies the stack.)
+In this case, for correct operation, the
+.B CLONE_VM
+option should not be specified.
+(If the child
+.I shares
+the parent's memory because of the use of the
+.B CLONE_VM
+flag,
+then no copy-on-write duplication occurs and chaos is likely to result.)
+.PP
+The order of the arguments also differs in the raw system call,
+and there are variations in the arguments across architectures,
+as detailed in the following paragraphs.
+.PP
+The raw system call interface on x86-64 and some other architectures
+(including sh, tile, and alpha) is:
+.PP
+.in +4n
+.EX
+.BI "long clone(unsigned long " flags ", void *" stack ,
+.BI " int *" parent_tid ", int *" child_tid ,
+.BI " unsigned long " tls );
+.EE
+.in
+.PP
+On x86-32, and several other common architectures
+(including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
+and MIPS),
+.\" CONFIG_CLONE_BACKWARDS
+the order of the last two arguments is reversed:
+.PP
+.in +4n
+.EX
+.BI "long clone(unsigned long " flags ", void *" stack ,
+.BI " int *" parent_tid ", unsigned long " tls ,
+.BI " int *" child_tid );
+.EE
+.in
+.PP
+On the cris and s390 architectures,
+.\" CONFIG_CLONE_BACKWARDS2
+the order of the first two arguments is reversed:
+.PP
+.in +4n
+.EX
+.BI "long clone(void *" stack ", unsigned long " flags ,
+.BI " int *" parent_tid ", int *" child_tid ,
+.BI " unsigned long " tls );
+.EE
+.in
+.PP
+On the microblaze architecture,
+.\" CONFIG_CLONE_BACKWARDS3
+an additional argument is supplied:
+.PP
+.in +4n
+.EX
+.BI "long clone(unsigned long " flags ", void *" stack ,
+.BI " int " stack_size , "\fR /* Size of stack */"
+.BI " int *" parent_tid ", int *" child_tid ,
+.BI " unsigned long " tls );
+.EE
+.in
+.\"
+.SS blackfin, m68k, and sparc
+.\" Mike Frysinger noted in a 2013 mail:
+.\" these arches don't define __ARCH_WANT_SYS_CLONE:
+.\" blackfin ia64 m68k sparc
+The argument-passing conventions on
+blackfin, m68k, and sparc are different from the descriptions above.
+For details, see the kernel (and glibc) source.
+.SS ia64
+On ia64, a different interface is used:
+.PP
+.in +4n
+.EX
+.BI "int __clone2(int (*" "fn" ")(void *),"
+.BI " void *" stack_base ", size_t " stack_size ,
+.BI " int " flags ", void *" "arg" ", ..."
+.BI " /* pid_t *" parent_tid ", struct user_desc *" tls ,
+.BI " pid_t *" child_tid " */ );"
+.EE
+.in
+.PP
+The prototype shown above is for the glibc wrapper function;
+for the system call itself,
+the prototype can be described as follows (it is identical to the
+.BR clone ()
+prototype on microblaze):
+.PP
+.in +4n
+.EX
+.BI "long clone2(unsigned long " flags ", void *" stack_base ,
+.BI " int " stack_size , "\fR /* Size of stack */"
+.BI " int *" parent_tid ", int *" child_tid ,
+.BI " unsigned long " tls );
+.EE
+.in
+.PP
+.BR __clone2 ()
+operates in the same way as
+.BR clone (),
+except that
+.I stack_base
+points to the lowest address of the child's stack area,
+and
+.I stack_size
+specifies the size of the stack pointed to by
+.IR stack_base .
+.SH STANDARDS
+Linux.
+.SH HISTORY
+.TP
+.BR clone3 ()
+Linux 5.3.
+.\" There is no entry for
+.\" .BR clone ()
+.\" in libc5.
+.\" glibc2 provides
+.\" .BR clone ()
+.\" as described in this manual page.
+.SS Linux 2.4 and earlier
+In the Linux 2.4.x series,
+.B CLONE_THREAD
+generally does not make the parent of the new thread the same
+as the parent of the calling process.
+However, from Linux 2.4.7 to Linux 2.4.18 the
+.B CLONE_THREAD
+flag implied the
+.B CLONE_PARENT
+flag (as in Linux 2.6.0 and later).
+.PP
+In Linux 2.4 and earlier,
+.BR clone ()
+does not take arguments
+.IR parent_tid ,
+.IR tls ,
+and
+.IR child_tid .
+.SH NOTES
+One use of these systems calls
+is to implement threads: multiple flows of control in a program that
+run concurrently in a shared address space.
+.PP
+The
+.BR kcmp (2)
+system call can be used to test whether two processes share various
+resources such as a file descriptor table,
+System V semaphore undo operations, or a virtual address space.
+.PP
+Handlers registered using
+.BR pthread_atfork (3)
+are not executed during a clone call.
+.SH BUGS
+GNU C library versions 2.3.4 up to and including 2.24
+contained a wrapper function for
+.BR getpid (2)
+that performed caching of PIDs.
+This caching relied on support in the glibc wrapper for
+.BR clone (),
+but limitations in the implementation
+meant that the cache was not up to date in some circumstances.
+In particular,
+if a signal was delivered to the child immediately after the
+.BR clone ()
+call, then a call to
+.BR getpid (2)
+in a handler for the signal could return the PID
+of the calling process ("the parent"),
+if the clone wrapper had not yet had a chance to update the PID
+cache in the child.
+(This discussion ignores the case where the child was created using
+.BR CLONE_THREAD ,
+when
+.BR getpid (2)
+.I should
+return the same value in the child and in the process that called
+.BR clone (),
+since the caller and the child are in the same thread group.
+The stale-cache problem also does not occur if the
+.I flags
+argument includes
+.BR CLONE_VM .)
+To get the truth, it was sometimes necessary to use code such as the following:
+.PP
+.in +4n
+.EX
+#include <syscall.h>
+\&
+pid_t mypid;
+\&
+mypid = syscall(SYS_getpid);
+.EE
+.in
+.\" See also the following bug reports
+.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
+.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
+.PP
+Because of the stale-cache problem, as well as other problems noted in
+.BR getpid (2),
+the PID caching feature was removed in glibc 2.25.
+.SH EXAMPLES
+The following program demonstrates the use of
+.BR clone ()
+to create a child process that executes in a separate UTS namespace.
+The child changes the hostname in its UTS namespace.
+Both parent and child then display the system hostname,
+making it possible to see that the hostname
+differs in the UTS namespaces of the parent and child.
+For an example of the use of this program, see
+.BR setns (2).
+.PP
+Within the sample program, we allocate the memory that is to
+be used for the child's stack using
+.BR mmap (2)
+rather than
+.BR malloc (3)
+for the following reasons:
+.IP \[bu] 3
+.BR mmap (2)
+allocates a block of memory that starts on a page
+boundary and is a multiple of the page size.
+This is useful if we want to establish a guard page (a page with protection
+.BR PROT_NONE )
+at the end of the stack using
+.BR mprotect (2).
+.IP \[bu]
+We can specify the
+.B MAP_STACK
+flag to request a mapping that is suitable for a stack.
+For the moment, this flag is a no-op on Linux,
+but it exists and has effect on some other systems,
+so we should include it for portability.
+.SS Program source
+.\" SRC BEGIN (clone.c)
+.EX
+#define _GNU_SOURCE
+#include <err.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <unistd.h>
+\&
+static int /* Start function for cloned child */
+childFunc(void *arg)
+{
+ struct utsname uts;
+\&
+ /* Change hostname in UTS namespace of child. */
+\&
+ if (sethostname(arg, strlen(arg)) == \-1)
+ err(EXIT_FAILURE, "sethostname");
+\&
+ /* Retrieve and display hostname. */
+\&
+ if (uname(&uts) == \-1)
+ err(EXIT_FAILURE, "uname");
+ printf("uts.nodename in child: %s\en", uts.nodename);
+\&
+ /* Keep the namespace open for a while, by sleeping.
+ This allows some experimentation\-\-for example, another
+ process might join the namespace. */
+\&
+ sleep(200);
+\&
+ return 0; /* Child terminates now */
+}
+\&
+#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
+\&
+int
+main(int argc, char *argv[])
+{
+ char *stack; /* Start of stack buffer */
+ char *stackTop; /* End of stack buffer */
+ pid_t pid;
+ struct utsname uts;
+\&
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <child\-hostname>\en", argv[0]);
+ exit(EXIT_SUCCESS);
+ }
+\&
+ /* Allocate memory to be used for the stack of the child. */
+\&
+ stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
+ if (stack == MAP_FAILED)
+ err(EXIT_FAILURE, "mmap");
+\&
+ stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
+\&
+ /* Create child that has its own UTS namespace;
+ child commences execution in childFunc(). */
+\&
+ pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
+ if (pid == \-1)
+ err(EXIT_FAILURE, "clone");
+ printf("clone() returned %jd\en", (intmax_t) pid);
+\&
+ /* Parent falls through to here */
+\&
+ sleep(1); /* Give child time to change its hostname */
+\&
+ /* Display hostname in parent\[aq]s UTS namespace. This will be
+ different from hostname in child\[aq]s UTS namespace. */
+\&
+ if (uname(&uts) == \-1)
+ err(EXIT_FAILURE, "uname");
+ printf("uts.nodename in parent: %s\en", uts.nodename);
+\&
+ if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
+ err(EXIT_FAILURE, "waitpid");
+ printf("child has terminated\en");
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.BR fork (2),
+.BR futex (2),
+.BR getpid (2),
+.BR gettid (2),
+.BR kcmp (2),
+.BR mmap (2),
+.BR pidfd_open (2),
+.BR set_thread_area (2),
+.BR set_tid_address (2),
+.BR setns (2),
+.BR tkill (2),
+.BR unshare (2),
+.BR wait (2),
+.BR capabilities (7),
+.BR namespaces (7),
+.BR pthreads (7)