diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 19:43:11 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 19:43:11 +0000 |
commit | fc22b3d6507c6745911b9dfcc68f1e665ae13dbc (patch) | |
tree | ce1e3bce06471410239a6f41282e328770aa404a /upstream/opensuse-leap-15-6/man2/clone.2 | |
parent | Initial commit. (diff) | |
download | manpages-l10n-fc22b3d6507c6745911b9dfcc68f1e665ae13dbc.tar.xz manpages-l10n-fc22b3d6507c6745911b9dfcc68f1e665ae13dbc.zip |
Adding upstream version 4.22.0.upstream/4.22.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'upstream/opensuse-leap-15-6/man2/clone.2')
-rw-r--r-- | upstream/opensuse-leap-15-6/man2/clone.2 | 1944 |
1 files changed, 1944 insertions, 0 deletions
diff --git a/upstream/opensuse-leap-15-6/man2/clone.2 b/upstream/opensuse-leap-15-6/man2/clone.2 new file mode 100644 index 00000000..a64fde33 --- /dev/null +++ b/upstream/opensuse-leap-15-6/man2/clone.2 @@ -0,0 +1,1944 @@ +'\" t +.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992 +.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013, 2019 +.\" +.\" SPDX-License-Identifier: GPL-1.0-or-later +.\" +.\" Modified by Michael Haardt <michael@moria.de> +.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu> +.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>: +.\" New man page (copied from 'fork.2'). +.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl> +.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr> +.\" Modified 26 Jun 2001 by Michael Kerrisk +.\" Mostly upgraded to Linux 2.4.x +.\" Added prototype for sys_clone() plus description +.\" Added CLONE_THREAD with a brief description of thread groups +.\" Added CLONE_PARENT and revised entire page remove ambiguity +.\" between "calling process" and "parent process" +.\" Added CLONE_PTRACE and CLONE_VFORK +.\" Added EPERM and EINVAL error codes +.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>) +.\" various other minor tidy ups and clarifications. +.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com> +.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD +.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com> +.\" Added description for CLONE_NEWNS, which was added in Linux 2.4.19 +.\" Slightly rephrased, aeb. +.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb. +.\" Modified 1 Jan 2004 - various updates, aeb +.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb. +.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid() +.\" wrapper under BUGS. +.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED. +.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD. +.\" 2008-11-18, mtk, order CLONE_* flags alphabetically +.\" 2008-11-18, mtk, document CLONE_NEWPID +.\" 2008-11-19, mtk, document CLONE_NEWUTS +.\" 2008-11-19, mtk, document CLONE_NEWIPC +.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO +.\" +.TH clone 2 2023-03-30 "Linux man-pages 6.04" +.SH NAME +clone, __clone2, clone3 \- create a child process +.SH LIBRARY +Standard C library +.RI ( libc ", " \-lc ) +.SH SYNOPSIS +.nf +/* Prototype for the glibc wrapper function */ +.PP +.B #define _GNU_SOURCE +.B #include <sched.h> +.PP +.BI "int clone(int (*" "fn" ")(void *_Nullable), void *" stack \ +", int " flags , +.BI " void *_Nullable " "arg" ", ..." \ +" \fR/*\fP" " pid_t *_Nullable " parent_tid , +.BI " void *_Nullable " tls , +.BI " pid_t *_Nullable " child_tid " \fR*/\fP );" +.PP +/* For the prototype of the raw clone() system call, see NOTES */ +.PP +.BR "#include <linux/sched.h>" " /* Definition of " "struct clone_args" " */" +.BR "#include <sched.h>" " /* Definition of " CLONE_* " constants */" +.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */" +.B #include <unistd.h> +.PP +.BI "long syscall(SYS_clone3, struct clone_args *" cl_args ", size_t " size ); +.fi +.PP +.IR Note : +glibc provides no wrapper for +.BR clone3 (), +necessitating the use of +.BR syscall (2). +.SH DESCRIPTION +These system calls +create a new ("child") process, in a manner similar to +.BR fork (2). +.PP +By contrast with +.BR fork (2), +these system calls provide more precise control over what pieces of execution +context are shared between the calling process and the child process. +For example, using these system calls, the caller can control whether +or not the two processes share the virtual address space, +the table of file descriptors, and the table of signal handlers. +These system calls also allow the new child process to be placed +in separate +.BR namespaces (7). +.PP +Note that in this manual +page, "calling process" normally corresponds to "parent process". +But see the descriptions of +.B CLONE_PARENT +and +.B CLONE_THREAD +below. +.PP +This page describes the following interfaces: +.IP \[bu] 3 +The glibc +.BR clone () +wrapper function and the underlying system call on which it is based. +The main text describes the wrapper function; +the differences for the raw system call +are described toward the end of this page. +.IP \[bu] +The newer +.BR clone3 () +system call. +.PP +In the remainder of this page, the terminology "the clone call" is used +when noting details that apply to all of these interfaces, +.\" +.SS The clone() wrapper function +When the child process is created with the +.BR clone () +wrapper function, +it commences execution by calling the function pointed to by the argument +.IR fn . +(This differs from +.BR fork (2), +where execution continues in the child from the point +of the +.BR fork (2) +call.) +The +.I arg +argument is passed as the argument of the function +.IR fn . +.PP +When the +.IR fn ( arg ) +function returns, the child process terminates. +The integer returned by +.I fn +is the exit status for the child process. +The child process may also terminate explicitly by calling +.BR exit (2) +or after receiving a fatal signal. +.PP +The +.I stack +argument specifies the location of the stack used by the child process. +Since the child and calling process may share memory, +it is not possible for the child process to execute in the +same stack as the calling process. +The calling process must therefore +set up memory space for the child stack and pass a pointer to this +space to +.BR clone (). +Stacks grow downward on all processors that run Linux +(except the HP PA processors), so +.I stack +usually points to the topmost address of the memory space set up for +the child stack. +Note that +.BR clone () +does not provide a means whereby the caller can inform the kernel of the +size of the stack area. +.PP +The remaining arguments to +.BR clone () +are discussed below. +.\" +.SS clone3() +The +.BR clone3 () +system call provides a superset of the functionality of the older +.BR clone () +interface. +It also provides a number of API improvements, including: +space for additional flags bits; +cleaner separation in the use of various arguments; +and the ability to specify the size of the child's stack area. +.PP +As with +.BR fork (2), +.BR clone3 () +returns in both the parent and the child. +It returns 0 in the child process and returns the PID of the child +in the parent. +.PP +The +.I cl_args +argument of +.BR clone3 () +is a structure of the following form: +.PP +.in +4n +.EX +struct clone_args { + u64 flags; /* Flags bit mask */ + u64 pidfd; /* Where to store PID file descriptor + (\fIint *\fP) */ + u64 child_tid; /* Where to store child TID, + in child\[aq]s memory (\fIpid_t *\fP) */ + u64 parent_tid; /* Where to store child TID, + in parent\[aq]s memory (\fIpid_t *\fP) */ + u64 exit_signal; /* Signal to deliver to parent on + child termination */ + u64 stack; /* Pointer to lowest byte of stack */ + u64 stack_size; /* Size of stack */ + u64 tls; /* Location of new TLS */ + u64 set_tid; /* Pointer to a \fIpid_t\fP array + (since Linux 5.5) */ + u64 set_tid_size; /* Number of elements in \fIset_tid\fP + (since Linux 5.5) */ + u64 cgroup; /* File descriptor for target cgroup + of child (since Linux 5.7) */ +}; +.EE +.in +.PP +The +.I size +argument that is supplied to +.BR clone3 () +should be initialized to the size of this structure. +(The existence of the +.I size +argument permits future extensions to the +.I clone_args +structure.) +.PP +The stack for the child process is specified via +.IR cl_args.stack , +which points to the lowest byte of the stack area, +and +.IR cl_args.stack_size , +which specifies the size of the stack in bytes. +In the case where the +.B CLONE_VM +flag (see below) is specified, a stack must be explicitly allocated +and specified. +Otherwise, these two fields can be specified as NULL and 0, +which causes the child to use the same stack area as the parent +(in the child's own virtual address space). +.PP +The remaining fields in the +.I cl_args +argument are discussed below. +.\" +.SS Equivalence between clone() and clone3() arguments +Unlike the older +.BR clone () +interface, where arguments are passed individually, in the newer +.BR clone3 () +interface the arguments are packaged into the +.I clone_args +structure shown above. +This structure allows for a superset of the information passed via the +.BR clone () +arguments. +.PP +The following table shows the equivalence between the arguments of +.BR clone () +and the fields in the +.I clone_args +argument supplied to +.BR clone3 (): +.RS 4 +.TS +lb lb lb +l l l +li li l. +clone() clone3() Notes + \fIcl_args\fP field +flags & \[ti]0xff flags T{ +For most flags; details below +T} +parent_tid pidfd See CLONE_PIDFD +child_tid child_tid See CLONE_CHILD_SETTID +parent_tid parent_tid See CLONE_PARENT_SETTID +flags & 0xff exit_signal +stack stack +\fP---\fP stack_size +tls tls See CLONE_SETTLS +\fP---\fP set_tid See below for details +\fP---\fP set_tid_size +\fP---\fP cgroup See CLONE_INTO_CGROUP +.TE +.RE +.\" +.SS The child termination signal +When the child process terminates, a signal may be sent to the parent. +The termination signal is specified in the low byte of +.I flags +.RB ( clone ()) +or in +.I cl_args.exit_signal +.RB ( clone3 ()). +If this signal is specified as anything other than +.BR SIGCHLD , +then the parent process must specify the +.B __WALL +or +.B __WCLONE +options when waiting for the child with +.BR wait (2). +If no signal (i.e., zero) is specified, then the parent process is not signaled +when the child terminates. +.\" +.SS The set_tid array +By default, the kernel chooses the next sequential PID for the new +process in each of the PID namespaces where it is present. +When creating a process with +.BR clone3 (), +the +.I set_tid +array (available since Linux 5.5) +can be used to select specific PIDs for the process in some +or all of the PID namespaces where it is present. +If the PID of the newly created process should be set only for the current +PID namespace or in the newly created PID namespace (if +.I flags +contains +.BR CLONE_NEWPID ) +then the first element in the +.I set_tid +array has to be the desired PID and +.I set_tid_size +needs to be 1. +.PP +If the PID of the newly created process should have a certain value in +multiple PID namespaces, then the +.I set_tid +array can have multiple entries. +The first entry defines the PID in the most +deeply nested PID namespace and each of the following entries contains +the PID in the +corresponding ancestor PID namespace. +The number of PID namespaces in which a PID +should be set is defined by +.I set_tid_size +which cannot be larger than the number of currently nested PID namespaces. +.PP +To create a process with the following PIDs in a PID namespace hierarchy: +.RS 4 +.TS +lb lb lb +l l l. +PID NS level Requested PID Notes +0 31496 Outermost PID namespace +1 42 +2 7 Innermost PID namespace +.TE +.RE +.PP +Set the array to: +.PP +.in +4n +.EX +set_tid[0] = 7; +set_tid[1] = 42; +set_tid[2] = 31496; +set_tid_size = 3; +.EE +.in +.PP +If only the PIDs in the two innermost PID namespaces +need to be specified, set the array to: +.PP +.in +4n +.EX +set_tid[0] = 7; +set_tid[1] = 42; +set_tid_size = 2; +.EE +.in +.PP +The PID in the PID namespaces outside the two innermost PID namespaces +is selected the same way as any other PID is selected. +.PP +The +.I set_tid +feature requires +.B CAP_SYS_ADMIN +or +(since Linux 5.9) +.\" commit 124ea650d3072b005457faed69909221c2905a1f +.\" commit 1caef81da05a84a40dbf02110e967ce6d1135ff6 +.B CAP_CHECKPOINT_RESTORE +in all owning user namespaces of the target PID namespaces. +.PP +Callers may only choose a PID greater than 1 in a given PID namespace +if an +.B init +process (i.e., a process with PID 1) already exists in that namespace. +Otherwise the PID +entry for this PID namespace must be 1. +.\" +.SS The flags mask +Both +.BR clone () +and +.BR clone3 () +allow a flags bit mask that modifies their behavior +and allows the caller to specify what is shared between the calling process +and the child process. +This bit mask\[em]the +.I flags +argument of +.BR clone () +or the +.I cl_args.flags +field passed to +.BR clone3 ()\[em]is +referred to as the +.I flags +mask in the remainder of this page. +.PP +The +.I flags +mask is specified as a bitwise-OR of zero or more of +the constants listed below. +Except as noted below, these flags are available +(and have the same effect) in both +.BR clone () +and +.BR clone3 (). +.TP +.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)" +Clear (zero) the child thread ID at the location pointed to by +.I child_tid +.RB ( clone ()) +or +.I cl_args.child_tid +.RB ( clone3 ()) +in child memory when the child exits, and do a wakeup on the futex +at that address. +The address involved may be changed by the +.BR set_tid_address (2) +system call. +This is used by threading libraries. +.TP +.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)" +Store the child thread ID at the location pointed to by +.I child_tid +.RB ( clone ()) +or +.I cl_args.child_tid +.RB ( clone3 ()) +in the child's memory. +The store operation completes before the clone call +returns control to user space in the child process. +(Note that the store operation may not have completed before the clone call +returns in the parent process, which is relevant if the +.B CLONE_VM +flag is also employed.) +.TP +.BR CLONE_CLEAR_SIGHAND " (since Linux 5.5)" +.\" commit b612e5df4587c934bd056bf05f4a1deca4de4f75 +By default, signal dispositions in the child thread are the same as +in the parent. +If this flag is specified, +then all signals that are handled in the parent +are reset to their default dispositions +.RB ( SIG_DFL ) +in the child. +.IP +Specifying this flag together with +.B CLONE_SIGHAND +is nonsensical and disallowed. +.TP +.BR CLONE_DETACHED " (historical)" +For a while (during the Linux 2.5 development series) +.\" added in Linux 2.5.32; removed in Linux 2.6.0-test4 +there was a +.B CLONE_DETACHED +flag, +which caused the parent not to receive a signal when the child terminated. +Ultimately, the effect of this flag was subsumed under the +.B CLONE_THREAD +flag and by the time Linux 2.6.0 was released, this flag had no effect. +Starting in Linux 2.6.2, the need to give this flag together with +.B CLONE_THREAD +disappeared. +.IP +This flag is still defined, but it is usually ignored when calling +.BR clone (). +However, see the description of +.B CLONE_PIDFD +for some exceptions. +.TP +.BR CLONE_FILES " (since Linux 2.0)" +If +.B CLONE_FILES +is set, the calling process and the child process share the same file +descriptor table. +Any file descriptor created by the calling process or by the child +process is also valid in the other process. +Similarly, if one of the processes closes a file descriptor, +or changes its associated flags (using the +.BR fcntl (2) +.B F_SETFD +operation), the other process is also affected. +If a process sharing a file descriptor table calls +.BR execve (2), +its file descriptor table is duplicated (unshared). +.IP +If +.B CLONE_FILES +is not set, the child process inherits a copy of all file descriptors +opened in the calling process at the time of the clone call. +Subsequent operations that open or close file descriptors, +or change file descriptor flags, +performed by either the calling +process or the child process do not affect the other process. +Note, however, +that the duplicated file descriptors in the child refer to the same +open file descriptions as the corresponding file descriptors +in the calling process, +and thus share file offsets and file status flags (see +.BR open (2)). +.TP +.BR CLONE_FS " (since Linux 2.0)" +If +.B CLONE_FS +is set, the caller and the child process share the same filesystem +information. +This includes the root of the filesystem, the current +working directory, and the umask. +Any call to +.BR chroot (2), +.BR chdir (2), +or +.BR umask (2) +performed by the calling process or the child process also affects the +other process. +.IP +If +.B CLONE_FS +is not set, the child process works on a copy of the filesystem +information of the calling process at the time of the clone call. +Calls to +.BR chroot (2), +.BR chdir (2), +or +.BR umask (2) +performed later by one of the processes do not affect the other process. +.TP +.BR CLONE_INTO_CGROUP " (since Linux 5.7)" +.\" commit ef2c41cf38a7559bbf91af42d5b6a4429db8fc68 +By default, a child process is placed in the same version 2 +cgroup as its parent. +The +.B CLONE_INTO_CGROUP +flag allows the child process to be created in a different version 2 cgroup. +(Note that +.B CLONE_INTO_CGROUP +has effect only for version 2 cgroups.) +.IP +In order to place the child process in a different cgroup, +the caller specifies +.B CLONE_INTO_CGROUP +in +.I cl_args.flags +and passes a file descriptor that refers to a version 2 cgroup in the +.I cl_args.cgroup +field. +(This file descriptor can be obtained by opening a cgroup v2 directory +using either the +.B O_RDONLY +or the +.B O_PATH +flag.) +Note that all of the usual restrictions (described in +.BR cgroups (7)) +on placing a process into a version 2 cgroup apply. +.IP +Among the possible use cases for +.B CLONE_INTO_CGROUP +are the following: +.RS +.IP \[bu] 3 +Spawning a process into a cgroup different from the parent's cgroup +makes it possible for a service manager to directly spawn new +services into dedicated cgroups. +This eliminates the accounting +jitter that would be caused if the child process was first created in the +same cgroup as the parent and then +moved into the target cgroup. +Furthermore, spawning the child process directly into a target cgroup +is significantly cheaper than moving the child process into +the target cgroup after it has been created. +.IP \[bu] +The +.B CLONE_INTO_CGROUP +flag also allows the creation of +frozen child processes by spawning them into a frozen cgroup. +(See +.BR cgroups (7) +for a description of the freezer controller.) +.IP \[bu] +For threaded applications (or even thread implementations which +make use of cgroups to limit individual threads), it is possible to +establish a fixed cgroup layout before spawning each thread +directly into its target cgroup. +.RE +.TP +.BR CLONE_IO " (since Linux 2.6.25)" +If +.B CLONE_IO +is set, then the new process shares an I/O context with +the calling process. +If this flag is not set, then (as with +.BR fork (2)) +the new process has its own I/O context. +.IP +.\" The following based on text from Jens Axboe +The I/O context is the I/O scope of the disk scheduler (i.e., +what the I/O scheduler uses to model scheduling of a process's I/O). +If processes share the same I/O context, +they are treated as one by the I/O scheduler. +As a consequence, they get to share disk time. +For some I/O schedulers, +.\" the anticipatory and CFQ scheduler +if two processes share an I/O context, +they will be allowed to interleave their disk access. +If several threads are doing I/O on behalf of the same process +.RB ( aio_read (3), +for instance), they should employ +.B CLONE_IO +to get better I/O performance. +.\" with CFQ and AS. +.IP +If the kernel is not configured with the +.B CONFIG_BLOCK +option, this flag is a no-op. +.TP +.BR CLONE_NEWCGROUP " (since Linux 4.6)" +Create the process in a new cgroup namespace. +If this flag is not set, then (as with +.BR fork (2)) +the process is created in the same cgroup namespaces as the calling process. +.IP +For further information on cgroup namespaces, see +.BR cgroup_namespaces (7). +.IP +Only a privileged process +.RB ( CAP_SYS_ADMIN ) +can employ +.BR CLONE_NEWCGROUP . +.\" +.TP +.BR CLONE_NEWIPC " (since Linux 2.6.19)" +If +.B CLONE_NEWIPC +is set, then create the process in a new IPC namespace. +If this flag is not set, then (as with +.BR fork (2)), +the process is created in the same IPC namespace as +the calling process. +.IP +For further information on IPC namespaces, see +.BR ipc_namespaces (7). +.IP +Only a privileged process +.RB ( CAP_SYS_ADMIN ) +can employ +.BR CLONE_NEWIPC . +This flag can't be specified in conjunction with +.BR CLONE_SYSVSEM . +.TP +.BR CLONE_NEWNET " (since Linux 2.6.24)" +(The implementation of this flag was completed only +by about Linux 2.6.29.) +.IP +If +.B CLONE_NEWNET +is set, then create the process in a new network namespace. +If this flag is not set, then (as with +.BR fork (2)) +the process is created in the same network namespace as +the calling process. +.IP +For further information on network namespaces, see +.BR network_namespaces (7). +.IP +Only a privileged process +.RB ( CAP_SYS_ADMIN ) +can employ +.BR CLONE_NEWNET . +.TP +.BR CLONE_NEWNS " (since Linux 2.4.19)" +If +.B CLONE_NEWNS +is set, the cloned child is started in a new mount namespace, +initialized with a copy of the namespace of the parent. +If +.B CLONE_NEWNS +is not set, the child lives in the same mount +namespace as the parent. +.IP +For further information on mount namespaces, see +.BR namespaces (7) +and +.BR mount_namespaces (7). +.IP +Only a privileged process +.RB ( CAP_SYS_ADMIN ) +can employ +.BR CLONE_NEWNS . +It is not permitted to specify both +.B CLONE_NEWNS +and +.B CLONE_FS +.\" See https://lwn.net/Articles/543273/ +in the same clone call. +.TP +.BR CLONE_NEWPID " (since Linux 2.6.24)" +.\" This explanation draws a lot of details from +.\" http://lwn.net/Articles/259217/ +.\" Authors: Pavel Emelyanov <xemul@openvz.org> +.\" and Kir Kolyshkin <kir@openvz.org> +.\" +.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264 +.\" Author: Pavel Emelyanov <xemul@openvz.org> +If +.B CLONE_NEWPID +is set, then create the process in a new PID namespace. +If this flag is not set, then (as with +.BR fork (2)) +the process is created in the same PID namespace as +the calling process. +.IP +For further information on PID namespaces, see +.BR namespaces (7) +and +.BR pid_namespaces (7). +.IP +Only a privileged process +.RB ( CAP_SYS_ADMIN ) +can employ +.BR CLONE_NEWPID . +This flag can't be specified in conjunction with +.B CLONE_THREAD +or +.BR CLONE_PARENT . +.TP +.B CLONE_NEWUSER +(This flag first became meaningful for +.BR clone () +in Linux 2.6.23, +the current +.BR clone () +semantics were merged in Linux 3.5, +and the final pieces to make the user namespaces completely usable were +merged in Linux 3.8.) +.IP +If +.B CLONE_NEWUSER +is set, then create the process in a new user namespace. +If this flag is not set, then (as with +.BR fork (2)) +the process is created in the same user namespace as the calling process. +.IP +For further information on user namespaces, see +.BR namespaces (7) +and +.BR user_namespaces (7). +.IP +Before Linux 3.8, use of +.B CLONE_NEWUSER +required that the caller have three capabilities: +.BR CAP_SYS_ADMIN , +.BR CAP_SETUID , +and +.BR CAP_SETGID . +.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed +Starting with Linux 3.8, +no privileges are needed to create a user namespace. +.IP +This flag can't be specified in conjunction with +.B CLONE_THREAD +or +.BR CLONE_PARENT . +For security reasons, +.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71 +.\" https://lwn.net/Articles/543273/ +.\" The fix actually went into Linux 3.9 and into Linux 3.8.3. However, user namespaces +.\" were, for practical purposes, unusable in earlier Linux 3.8.x because of the +.\" various filesystems that didn't support userns. +.B CLONE_NEWUSER +cannot be specified in conjunction with +.BR CLONE_FS . +.TP +.BR CLONE_NEWUTS " (since Linux 2.6.19)" +If +.B CLONE_NEWUTS +is set, then create the process in a new UTS namespace, +whose identifiers are initialized by duplicating the identifiers +from the UTS namespace of the calling process. +If this flag is not set, then (as with +.BR fork (2)) +the process is created in the same UTS namespace as +the calling process. +.IP +For further information on UTS namespaces, see +.BR uts_namespaces (7). +.IP +Only a privileged process +.RB ( CAP_SYS_ADMIN ) +can employ +.BR CLONE_NEWUTS . +.TP +.BR CLONE_PARENT " (since Linux 2.3.12)" +If +.B CLONE_PARENT +is set, then the parent of the new child (as returned by +.BR getppid (2)) +will be the same as that of the calling process. +.IP +If +.B CLONE_PARENT +is not set, then (as with +.BR fork (2)) +the child's parent is the calling process. +.IP +Note that it is the parent process, as returned by +.BR getppid (2), +which is signaled when the child terminates, so that +if +.B CLONE_PARENT +is set, then the parent of the calling process, rather than the +calling process itself, is signaled. +.IP +The +.B CLONE_PARENT +flag can't be used in clone calls by the +global init process (PID 1 in the initial PID namespace) +and init processes in other PID namespaces. +This restriction prevents the creation of multi-rooted process trees +as well as the creation of unreapable zombies in the initial PID namespace. +.TP +.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)" +Store the child thread ID at the location pointed to by +.I parent_tid +.RB ( clone ()) +or +.I cl_args.parent_tid +.RB ( clone3 ()) +in the parent's memory. +(In Linux 2.5.32-2.5.48 there was a flag +.B CLONE_SETTID +that did this.) +The store operation completes before the clone call +returns control to user space. +.TP +.BR CLONE_PID " (Linux 2.0 to Linux 2.5.15)" +If +.B CLONE_PID +is set, the child process is created with the same process ID as +the calling process. +This is good for hacking the system, but otherwise +of not much use. +From Linux 2.3.21 onward, this flag could be +specified only by the system boot process (PID 0). +The flag disappeared completely from the kernel sources in Linux 2.5.16. +Subsequently, the kernel silently ignored this bit if it was specified in the +.I flags +mask. +Much later, the same bit was recycled for use as the +.B CLONE_PIDFD +flag. +.TP +.BR CLONE_PIDFD " (since Linux 5.2)" +.\" commit b3e5838252665ee4cfa76b82bdf1198dca81e5be +If this flag is specified, +a PID file descriptor referring to the child process is allocated +and placed at a specified location in the parent's memory. +The close-on-exec flag is set on this new file descriptor. +PID file descriptors can be used for the purposes described in +.BR pidfd_open (2). +.RS +.IP \[bu] 3 +When using +.BR clone3 (), +the PID file descriptor is placed at the location pointed to by +.IR cl_args.pidfd . +.IP \[bu] +When using +.BR clone (), +the PID file descriptor is placed at the location pointed to by +.IR parent_tid . +Since the +.I parent_tid +argument is used to return the PID file descriptor, +.B CLONE_PIDFD +cannot be used with +.B CLONE_PARENT_SETTID +when calling +.BR clone (). +.RE +.IP +It is currently not possible to use this flag together with +.B CLONE_THREAD. +This means that the process identified by the PID file descriptor +will always be a thread group leader. +.IP +If the obsolete +.B CLONE_DETACHED +flag is specified alongside +.B CLONE_PIDFD +when calling +.BR clone (), +an error is returned. +An error also results if +.B CLONE_DETACHED +is specified when calling +.BR clone3 (). +This error behavior ensures that the bit corresponding to +.B CLONE_DETACHED +can be reused for further PID file descriptor features in the future. +.TP +.BR CLONE_PTRACE " (since Linux 2.2)" +If +.B CLONE_PTRACE +is specified, and the calling process is being traced, +then trace the child also (see +.BR ptrace (2)). +.TP +.BR CLONE_SETTLS " (since Linux 2.5.32)" +The TLS (Thread Local Storage) descriptor is set to +.IR tls . +.IP +The interpretation of +.I tls +and the resulting effect is architecture dependent. +On x86, +.I tls +is interpreted as a +.I struct user_desc\~* +(see +.BR set_thread_area (2)). +On x86-64 it is the new value to be set for the %fs base register +(see the +.B ARCH_SET_FS +argument to +.BR arch_prctl (2)). +On architectures with a dedicated TLS register, it is the new value +of that register. +.IP +Use of this flag requires detailed knowledge and generally it +should not be used except in libraries implementing threading. +.TP +.BR CLONE_SIGHAND " (since Linux 2.0)" +If +.B CLONE_SIGHAND +is set, the calling process and the child process share the same table of +signal handlers. +If the calling process or child process calls +.BR sigaction (2) +to change the behavior associated with a signal, the behavior is +changed in the other process as well. +However, the calling process and child +processes still have distinct signal masks and sets of pending +signals. +So, one of them may block or unblock signals using +.BR sigprocmask (2) +without affecting the other process. +.IP +If +.B CLONE_SIGHAND +is not set, the child process inherits a copy of the signal handlers +of the calling process at the time of the clone call. +Calls to +.BR sigaction (2) +performed later by one of the processes have no effect on the other +process. +.IP +Since Linux 2.6.0, +.\" Precisely: Linux 2.6.0-test6 +the +.I flags +mask must also include +.B CLONE_VM +if +.B CLONE_SIGHAND +is specified. +.TP +.BR CLONE_STOPPED " (since Linux 2.6.0)" +.\" Precisely: Linux 2.6.0-test2 +If +.B CLONE_STOPPED +is set, then the child is initially stopped (as though it was sent a +.B SIGSTOP +signal), and must be resumed by sending it a +.B SIGCONT +signal. +.IP +This flag was +.I deprecated +from Linux 2.6.25 onward, +and was +.I removed +altogether in Linux 2.6.38. +Since then, the kernel silently ignores it without error. +.\" glibc 2.8 removed this defn from bits/sched.h +Starting with Linux 4.6, the same bit was reused for the +.B CLONE_NEWCGROUP +flag. +.TP +.BR CLONE_SYSVSEM " (since Linux 2.5.10)" +If +.B CLONE_SYSVSEM +is set, then the child and the calling process share +a single list of System V semaphore adjustment +.RI ( semadj ) +values (see +.BR semop (2)). +In this case, the shared list accumulates +.I semadj +values across all processes sharing the list, +and semaphore adjustments are performed only when the last process +that is sharing the list terminates (or ceases sharing the list using +.BR unshare (2)). +If this flag is not set, then the child has a separate +.I semadj +list that is initially empty. +.TP +.BR CLONE_THREAD " (since Linux 2.4.0)" +.\" Precisely: Linux 2.6.0-test8 +If +.B CLONE_THREAD +is set, the child is placed in the same thread group as the calling process. +To make the remainder of the discussion of +.B CLONE_THREAD +more readable, the term "thread" is used to refer to the +processes within a thread group. +.IP +Thread groups were a feature added in Linux 2.4 to support the +POSIX threads notion of a set of threads that share a single PID. +Internally, this shared PID is the so-called +thread group identifier (TGID) for the thread group. +Since Linux 2.4, calls to +.BR getpid (2) +return the TGID of the caller. +.IP +The threads within a group can be distinguished by their (system-wide) +unique thread IDs (TID). +A new thread's TID is available as the function result +returned to the caller, +and a thread can obtain +its own TID using +.BR gettid (2). +.IP +When a clone call is made without specifying +.BR CLONE_THREAD , +then the resulting thread is placed in a new thread group +whose TGID is the same as the thread's TID. +This thread is the +.I leader +of the new thread group. +.IP +A new thread created with +.B CLONE_THREAD +has the same parent process as the process that made the clone call +(i.e., like +.BR CLONE_PARENT ), +so that calls to +.BR getppid (2) +return the same value for all of the threads in a thread group. +When a +.B CLONE_THREAD +thread terminates, the thread that created it is not sent a +.B SIGCHLD +(or other termination) signal; +nor can the status of such a thread be obtained +using +.BR wait (2). +(The thread is said to be +.IR detached .) +.IP +After all of the threads in a thread group terminate +the parent process of the thread group is sent a +.B SIGCHLD +(or other termination) signal. +.IP +If any of the threads in a thread group performs an +.BR execve (2), +then all threads other than the thread group leader are terminated, +and the new program is executed in the thread group leader. +.IP +If one of the threads in a thread group creates a child using +.BR fork (2), +then any thread in the group can +.BR wait (2) +for that child. +.IP +Since Linux 2.5.35, the +.I flags +mask must also include +.B CLONE_SIGHAND +if +.B CLONE_THREAD +is specified +(and note that, since Linux 2.6.0, +.\" Precisely: Linux 2.6.0-test6 +.B CLONE_SIGHAND +also requires +.B CLONE_VM +to be included). +.IP +Signal dispositions and actions are process-wide: +if an unhandled signal is delivered to a thread, then +it will affect (terminate, stop, continue, be ignored in) +all members of the thread group. +.IP +Each thread has its own signal mask, as set by +.BR sigprocmask (2). +.IP +A signal may be process-directed or thread-directed. +A process-directed signal is targeted at a thread group (i.e., a TGID), +and is delivered to an arbitrarily selected thread from among those +that are not blocking the signal. +A signal may be process-directed because it was generated by the kernel +for reasons other than a hardware exception, or because it was sent using +.BR kill (2) +or +.BR sigqueue (3). +A thread-directed signal is targeted at (i.e., delivered to) +a specific thread. +A signal may be thread directed because it was sent using +.BR tgkill (2) +or +.BR pthread_sigqueue (3), +or because the thread executed a machine language instruction that triggered +a hardware exception +(e.g., invalid memory access triggering +.B SIGSEGV +or a floating-point exception triggering +.BR SIGFPE ). +.IP +A call to +.BR sigpending (2) +returns a signal set that is the union of the pending process-directed +signals and the signals that are pending for the calling thread. +.IP +If a process-directed signal is delivered to a thread group, +and the thread group has installed a handler for the signal, then +the handler is invoked in exactly one, arbitrarily selected +member of the thread group that has not blocked the signal. +If multiple threads in a group are waiting to accept the same signal using +.BR sigwaitinfo (2), +the kernel will arbitrarily select one of these threads +to receive the signal. +.TP +.BR CLONE_UNTRACED " (since Linux 2.5.46)" +If +.B CLONE_UNTRACED +is specified, then a tracing process cannot force +.B CLONE_PTRACE +on this child process. +.TP +.BR CLONE_VFORK " (since Linux 2.2)" +If +.B CLONE_VFORK +is set, the execution of the calling process is suspended +until the child releases its virtual memory +resources via a call to +.BR execve (2) +or +.BR _exit (2) +(as with +.BR vfork (2)). +.IP +If +.B CLONE_VFORK +is not set, then both the calling process and the child are schedulable +after the call, and an application should not rely on execution occurring +in any particular order. +.TP +.BR CLONE_VM " (since Linux 2.0)" +If +.B CLONE_VM +is set, the calling process and the child process run in the same memory +space. +In particular, memory writes performed by the calling process +or by the child process are also visible in the other process. +Moreover, any memory mapping or unmapping performed with +.BR mmap (2) +or +.BR munmap (2) +by the child or calling process also affects the other process. +.IP +If +.B CLONE_VM +is not set, the child process runs in a separate copy of the memory +space of the calling process at the time of the clone call. +Memory writes or file mappings/unmappings performed by one of the +processes do not affect the other, as with +.BR fork (2). +.IP +If the +.B CLONE_VM +flag is specified and the +.B CLONE_VFORK +flag is not specified, +then any alternate signal stack that was established by +.BR sigaltstack (2) +is cleared in the child process. +.SH RETURN VALUE +.\" gettid(2) returns current->pid; +.\" getpid(2) returns current->tgid; +On success, the thread ID of the child process is returned +in the caller's thread of execution. +On failure, \-1 is returned +in the caller's context, no child process is created, and +.I errno +is set to indicate the error. +.SH ERRORS +.TP +.BR EACCES " (" clone3 "() only)" +.B CLONE_INTO_CGROUP +was specified in +.IR cl_args.flags , +but the restrictions (described in +.BR cgroups (7)) +on placing the child process into the version 2 cgroup referred to by +.I cl_args.cgroup +are not met. +.TP +.B EAGAIN +Too many processes are already running; see +.BR fork (2). +.TP +.BR EBUSY " (" clone3 "() only)" +.B CLONE_INTO_CGROUP +was specified in +.IR cl_args.flags , +but the file descriptor specified in +.I cl_args.cgroup +refers to a version 2 cgroup in which a domain controller is enabled. +.TP +.BR EEXIST " (" clone3 "() only)" +One (or more) of the PIDs specified in +.I set_tid +already exists in the corresponding PID namespace. +.TP +.B EINVAL +Both +.B CLONE_SIGHAND +and +.B CLONE_CLEAR_SIGHAND +were specified in the +.I flags +mask. +.TP +.B EINVAL +.B CLONE_SIGHAND +was specified in the +.I flags +mask, but +.B CLONE_VM +was not. +(Since Linux 2.6.0.) +.\" Precisely: Linux 2.6.0-test6 +.TP +.B EINVAL +.B CLONE_THREAD +was specified in the +.I flags +mask, but +.B CLONE_SIGHAND +was not. +(Since Linux 2.5.35.) +.\" .TP +.\" .B EINVAL +.\" Precisely one of +.\" .B CLONE_DETACHED +.\" and +.\" .B CLONE_THREAD +.\" was specified. +.\" (Since Linux 2.6.0-test6.) +.TP +.B EINVAL +.B CLONE_THREAD +was specified in the +.I flags +mask, but the current process previously called +.BR unshare (2) +with the +.B CLONE_NEWPID +flag or used +.BR setns (2) +to reassociate itself with a PID namespace. +.TP +.B EINVAL +.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71 +Both +.B CLONE_FS +and +.B CLONE_NEWNS +were specified in the +.I flags +mask. +.TP +.BR EINVAL " (since Linux 3.9)" +Both +.B CLONE_NEWUSER +and +.B CLONE_FS +were specified in the +.I flags +mask. +.TP +.B EINVAL +Both +.B CLONE_NEWIPC +and +.B CLONE_SYSVSEM +were specified in the +.I flags +mask. +.TP +.B EINVAL +One (or both) of +.B CLONE_NEWPID +or +.B CLONE_NEWUSER +and one (or both) of +.B CLONE_THREAD +or +.B CLONE_PARENT +were specified in the +.I flags +mask. +.TP +.BR EINVAL " (since Linux 2.6.32)" +.\" commit 123be07b0b399670a7cc3d82fef0cb4f93ef885c +.B CLONE_PARENT +was specified, and the caller is an init process. +.TP +.B EINVAL +Returned by the glibc +.BR clone () +wrapper function when +.I fn +or +.I stack +is specified as NULL. +.TP +.B EINVAL +.B CLONE_NEWIPC +was specified in the +.I flags +mask, +but the kernel was not configured with the +.B CONFIG_SYSVIPC +and +.B CONFIG_IPC_NS +options. +.TP +.B EINVAL +.B CLONE_NEWNET +was specified in the +.I flags +mask, +but the kernel was not configured with the +.B CONFIG_NET_NS +option. +.TP +.B EINVAL +.B CLONE_NEWPID +was specified in the +.I flags +mask, +but the kernel was not configured with the +.B CONFIG_PID_NS +option. +.TP +.B EINVAL +.B CLONE_NEWUSER +was specified in the +.I flags +mask, +but the kernel was not configured with the +.B CONFIG_USER_NS +option. +.TP +.B EINVAL +.B CLONE_NEWUTS +was specified in the +.I flags +mask, +but the kernel was not configured with the +.B CONFIG_UTS_NS +option. +.TP +.B EINVAL +.I stack +is not aligned to a suitable boundary for this architecture. +For example, on aarch64, +.I stack +must be a multiple of 16. +.TP +.BR EINVAL " (" clone3 "() only)" +.B CLONE_DETACHED +was specified in the +.I flags +mask. +.TP +.BR EINVAL " (" clone "() only)" +.B CLONE_PIDFD +was specified together with +.B CLONE_DETACHED +in the +.I flags +mask. +.TP +.B EINVAL +.B CLONE_PIDFD +was specified together with +.B CLONE_THREAD +in the +.I flags +mask. +.TP +.BR "EINVAL " "(" clone "() only)" +.B CLONE_PIDFD +was specified together with +.B CLONE_PARENT_SETTID +in the +.I flags +mask. +.TP +.BR EINVAL " (" clone3 "() only)" +.I set_tid_size +is greater than the number of nested PID namespaces. +.TP +.BR EINVAL " (" clone3 "() only)" +One of the PIDs specified in +.I set_tid +was an invalid. +.TP +.BR EINVAL " (" clone3 "() only)" +.\" commit 7f192e3cd316ba58c88dfa26796cf77789dd9872 +.B CLONE_THREAD +or +.B CLONE_PARENT +was specified in the +.I flags +mask, but a signal was specified in +.I exit_signal. +.TP +.BR EINVAL " (AArch64 only, Linux 4.6 and earlier)" +.I stack +was not aligned to a 128-bit boundary. +.TP +.B ENOMEM +Cannot allocate sufficient memory to allocate a task structure for the +child, or to copy those parts of the caller's context that need to be +copied. +.TP +.BR ENOSPC " (since Linux 3.7)" +.\" commit f2302505775fd13ba93f034206f1e2a587017929 +.B CLONE_NEWPID +was specified in the +.I flags +mask, +but the limit on the nesting depth of PID namespaces +would have been exceeded; see +.BR pid_namespaces (7). +.TP +.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS ) +.B CLONE_NEWUSER +was specified in the +.I flags +mask, and the call would cause the limit on the number of +nested user namespaces to be exceeded. +See +.BR user_namespaces (7). +.IP +From Linux 3.11 to Linux 4.8, the error diagnosed in this case was +.BR EUSERS . +.TP +.BR ENOSPC " (since Linux 4.9)" +One of the values in the +.I flags +mask specified the creation of a new user namespace, +but doing so would have caused the limit defined by the corresponding file in +.I /proc/sys/user +to be exceeded. +For further details, see +.BR namespaces (7). +.TP +.BR EOPNOTSUPP " (" clone3 "() only)" +.B CLONE_INTO_CGROUP +was specified in +.IR cl_args.flags , +but the file descriptor specified in +.I cl_args.cgroup +refers to a version 2 cgroup that is in the +.I domain invalid +state. +.TP +.B EPERM +.BR CLONE_NEWCGROUP , +.BR CLONE_NEWIPC , +.BR CLONE_NEWNET , +.BR CLONE_NEWNS , +.BR CLONE_NEWPID , +or +.B CLONE_NEWUTS +was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP). +.TP +.B EPERM +.B CLONE_PID +was specified by a process other than process 0. +(This error occurs only on Linux 2.5.15 and earlier.) +.TP +.B EPERM +.B CLONE_NEWUSER +was specified in the +.I flags +mask, +but either the effective user ID or the effective group ID of the caller +does not have a mapping in the parent namespace (see +.BR user_namespaces (7)). +.TP +.BR EPERM " (since Linux 3.9)" +.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d +.B CLONE_NEWUSER +was specified in the +.I flags +mask and the caller is in a chroot environment +.\" FIXME What is the rationale for this restriction? +(i.e., the caller's root directory does not match the root directory +of the mount namespace in which it resides). +.TP +.BR EPERM " (" clone3 "() only)" +.I set_tid_size +was greater than zero, and the caller lacks the +.B CAP_SYS_ADMIN +capability in one or more of the user namespaces that own the +corresponding PID namespaces. +.TP +.BR ERESTARTNOINTR " (since Linux 2.6.17)" +.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56 +System call was interrupted by a signal and will be restarted. +(This can be seen only during a trace.) +.TP +.BR EUSERS " (Linux 3.11 to Linux 4.8)" +.B CLONE_NEWUSER +was specified in the +.I flags +mask, +and the limit on the number of nested user namespaces would be exceeded. +See the discussion of the +.B ENOSPC +error above. +.SH VERSIONS +The glibc +.BR clone () +wrapper function makes some changes +in the memory pointed to by +.I stack +(changes required to set the stack up correctly for the child) +.I before +invoking the +.BR clone () +system call. +So, in cases where +.BR clone () +is used to recursively create children, +do not use the buffer employed for the parent's stack +as the stack of the child. +.PP +On i386, +.BR clone () +should not be called through vsyscall, but directly through +.IR "int $0x80" . +.SS C library/kernel differences +The raw +.BR clone () +system call corresponds more closely to +.BR fork (2) +in that execution in the child continues from the point of the +call. +As such, the +.I fn +and +.I arg +arguments of the +.BR clone () +wrapper function are omitted. +.PP +In contrast to the glibc wrapper, the raw +.BR clone () +system call accepts NULL as a +.I stack +argument (and +.BR clone3 () +likewise allows +.I cl_args.stack +to be NULL). +In this case, the child uses a duplicate of the parent's stack. +(Copy-on-write semantics ensure that the child gets separate copies +of stack pages when either process modifies the stack.) +In this case, for correct operation, the +.B CLONE_VM +option should not be specified. +(If the child +.I shares +the parent's memory because of the use of the +.B CLONE_VM +flag, +then no copy-on-write duplication occurs and chaos is likely to result.) +.PP +The order of the arguments also differs in the raw system call, +and there are variations in the arguments across architectures, +as detailed in the following paragraphs. +.PP +The raw system call interface on x86-64 and some other architectures +(including sh, tile, and alpha) is: +.PP +.in +4n +.EX +.BI "long clone(unsigned long " flags ", void *" stack , +.BI " int *" parent_tid ", int *" child_tid , +.BI " unsigned long " tls ); +.EE +.in +.PP +On x86-32, and several other common architectures +(including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa, +and MIPS), +.\" CONFIG_CLONE_BACKWARDS +the order of the last two arguments is reversed: +.PP +.in +4n +.EX +.BI "long clone(unsigned long " flags ", void *" stack , +.BI " int *" parent_tid ", unsigned long " tls , +.BI " int *" child_tid ); +.EE +.in +.PP +On the cris and s390 architectures, +.\" CONFIG_CLONE_BACKWARDS2 +the order of the first two arguments is reversed: +.PP +.in +4n +.EX +.BI "long clone(void *" stack ", unsigned long " flags , +.BI " int *" parent_tid ", int *" child_tid , +.BI " unsigned long " tls ); +.EE +.in +.PP +On the microblaze architecture, +.\" CONFIG_CLONE_BACKWARDS3 +an additional argument is supplied: +.PP +.in +4n +.EX +.BI "long clone(unsigned long " flags ", void *" stack , +.BI " int " stack_size , "\fR /* Size of stack */" +.BI " int *" parent_tid ", int *" child_tid , +.BI " unsigned long " tls ); +.EE +.in +.\" +.SS blackfin, m68k, and sparc +.\" Mike Frysinger noted in a 2013 mail: +.\" these arches don't define __ARCH_WANT_SYS_CLONE: +.\" blackfin ia64 m68k sparc +The argument-passing conventions on +blackfin, m68k, and sparc are different from the descriptions above. +For details, see the kernel (and glibc) source. +.SS ia64 +On ia64, a different interface is used: +.PP +.in +4n +.EX +.BI "int __clone2(int (*" "fn" ")(void *)," +.BI " void *" stack_base ", size_t " stack_size , +.BI " int " flags ", void *" "arg" ", ..." +.BI " /* pid_t *" parent_tid ", struct user_desc *" tls , +.BI " pid_t *" child_tid " */ );" +.EE +.in +.PP +The prototype shown above is for the glibc wrapper function; +for the system call itself, +the prototype can be described as follows (it is identical to the +.BR clone () +prototype on microblaze): +.PP +.in +4n +.EX +.BI "long clone2(unsigned long " flags ", void *" stack_base , +.BI " int " stack_size , "\fR /* Size of stack */" +.BI " int *" parent_tid ", int *" child_tid , +.BI " unsigned long " tls ); +.EE +.in +.PP +.BR __clone2 () +operates in the same way as +.BR clone (), +except that +.I stack_base +points to the lowest address of the child's stack area, +and +.I stack_size +specifies the size of the stack pointed to by +.IR stack_base . +.SH STANDARDS +Linux. +.SH HISTORY +.TP +.BR clone3 () +Linux 5.3. +.\" There is no entry for +.\" .BR clone () +.\" in libc5. +.\" glibc2 provides +.\" .BR clone () +.\" as described in this manual page. +.SS Linux 2.4 and earlier +In the Linux 2.4.x series, +.B CLONE_THREAD +generally does not make the parent of the new thread the same +as the parent of the calling process. +However, from Linux 2.4.7 to Linux 2.4.18 the +.B CLONE_THREAD +flag implied the +.B CLONE_PARENT +flag (as in Linux 2.6.0 and later). +.PP +In Linux 2.4 and earlier, +.BR clone () +does not take arguments +.IR parent_tid , +.IR tls , +and +.IR child_tid . +.SH NOTES +One use of these systems calls +is to implement threads: multiple flows of control in a program that +run concurrently in a shared address space. +.PP +The +.BR kcmp (2) +system call can be used to test whether two processes share various +resources such as a file descriptor table, +System V semaphore undo operations, or a virtual address space. +.PP +Handlers registered using +.BR pthread_atfork (3) +are not executed during a clone call. +.SH BUGS +GNU C library versions 2.3.4 up to and including 2.24 +contained a wrapper function for +.BR getpid (2) +that performed caching of PIDs. +This caching relied on support in the glibc wrapper for +.BR clone (), +but limitations in the implementation +meant that the cache was not up to date in some circumstances. +In particular, +if a signal was delivered to the child immediately after the +.BR clone () +call, then a call to +.BR getpid (2) +in a handler for the signal could return the PID +of the calling process ("the parent"), +if the clone wrapper had not yet had a chance to update the PID +cache in the child. +(This discussion ignores the case where the child was created using +.BR CLONE_THREAD , +when +.BR getpid (2) +.I should +return the same value in the child and in the process that called +.BR clone (), +since the caller and the child are in the same thread group. +The stale-cache problem also does not occur if the +.I flags +argument includes +.BR CLONE_VM .) +To get the truth, it was sometimes necessary to use code such as the following: +.PP +.in +4n +.EX +#include <syscall.h> + +pid_t mypid; + +mypid = syscall(SYS_getpid); +.EE +.in +.\" See also the following bug reports +.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521 +.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910 +.PP +Because of the stale-cache problem, as well as other problems noted in +.BR getpid (2), +the PID caching feature was removed in glibc 2.25. +.SH EXAMPLES +The following program demonstrates the use of +.BR clone () +to create a child process that executes in a separate UTS namespace. +The child changes the hostname in its UTS namespace. +Both parent and child then display the system hostname, +making it possible to see that the hostname +differs in the UTS namespaces of the parent and child. +For an example of the use of this program, see +.BR setns (2). +.PP +Within the sample program, we allocate the memory that is to +be used for the child's stack using +.BR mmap (2) +rather than +.BR malloc (3) +for the following reasons: +.IP \[bu] 3 +.BR mmap (2) +allocates a block of memory that starts on a page +boundary and is a multiple of the page size. +This is useful if we want to establish a guard page (a page with protection +.BR PROT_NONE ) +at the end of the stack using +.BR mprotect (2). +.IP \[bu] +We can specify the +.B MAP_STACK +flag to request a mapping that is suitable for a stack. +For the moment, this flag is a no-op on Linux, +but it exists and has effect on some other systems, +so we should include it for portability. +.SS Program source +.\" SRC BEGIN (clone.c) +.EX +#define _GNU_SOURCE +#include <err.h> +#include <sched.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <sys/wait.h> +#include <unistd.h> + +static int /* Start function for cloned child */ +childFunc(void *arg) +{ + struct utsname uts; + + /* Change hostname in UTS namespace of child. */ + + if (sethostname(arg, strlen(arg)) == \-1) + err(EXIT_FAILURE, "sethostname"); + + /* Retrieve and display hostname. */ + + if (uname(&uts) == \-1) + err(EXIT_FAILURE, "uname"); + printf("uts.nodename in child: %s\en", uts.nodename); + + /* Keep the namespace open for a while, by sleeping. + This allows some experimentation\-\-for example, another + process might join the namespace. */ + + sleep(200); + + return 0; /* Child terminates now */ +} + +#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */ + +int +main(int argc, char *argv[]) +{ + char *stack; /* Start of stack buffer */ + char *stackTop; /* End of stack buffer */ + pid_t pid; + struct utsname uts; + + if (argc < 2) { + fprintf(stderr, "Usage: %s <child\-hostname>\en", argv[0]); + exit(EXIT_SUCCESS); + } + + /* Allocate memory to be used for the stack of the child. */ + + stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0); + if (stack == MAP_FAILED) + err(EXIT_FAILURE, "mmap"); + + stackTop = stack + STACK_SIZE; /* Assume stack grows downward */ + + /* Create child that has its own UTS namespace; + child commences execution in childFunc(). */ + + pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]); + if (pid == \-1) + err(EXIT_FAILURE, "clone"); + printf("clone() returned %jd\en", (intmax_t) pid); + + /* Parent falls through to here */ + + sleep(1); /* Give child time to change its hostname */ + + /* Display hostname in parent\[aq]s UTS namespace. This will be + different from hostname in child\[aq]s UTS namespace. */ + + if (uname(&uts) == \-1) + err(EXIT_FAILURE, "uname"); + printf("uts.nodename in parent: %s\en", uts.nodename); + + if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */ + err(EXIT_FAILURE, "waitpid"); + printf("child has terminated\en"); + + exit(EXIT_SUCCESS); +} +.EE +.\" SRC END +.SH SEE ALSO +.BR fork (2), +.BR futex (2), +.BR getpid (2), +.BR gettid (2), +.BR kcmp (2), +.BR mmap (2), +.BR pidfd_open (2), +.BR set_thread_area (2), +.BR set_tid_address (2), +.BR setns (2), +.BR tkill (2), +.BR unshare (2), +.BR wait (2), +.BR capabilities (7), +.BR namespaces (7), +.BR pthreads (7) |