summaryrefslogtreecommitdiffstats
path: root/man2/unshare.2
diff options
context:
space:
mode:
Diffstat (limited to 'man2/unshare.2')
-rw-r--r--man2/unshare.2572
1 files changed, 572 insertions, 0 deletions
diff --git a/man2/unshare.2 b/man2/unshare.2
new file mode 100644
index 0000000..b12afb5
--- /dev/null
+++ b/man2/unshare.2
@@ -0,0 +1,572 @@
+.\" Copyright (C) 2006, Janak Desai <janak@us.ibm.com>
+.\" and Copyright (C) 2006, 2012 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: GPL-1.0-or-later
+.\"
+.\" Patch Justification:
+.\" unshare system call is needed to implement, using PAM,
+.\" per-security_context and/or per-user namespace to provide
+.\" polyinstantiated directories. Using unshare and bind mounts, a
+.\" PAM module can create private namespace with appropriate
+.\" directories(based on user's security context) bind mounted on
+.\" public directories such as /tmp, thus providing an instance of
+.\" /tmp that is based on user's security context. Without the
+.\" unshare system call, namespace separation can only be achieved
+.\" by clone, which would require porting and maintaining all commands
+.\" such as login, and su, that establish a user session.
+.\"
+.TH unshare 2 2023-05-26 "Linux man-pages 6.05.01"
+.SH NAME
+unshare \- disassociate parts of the process execution context
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+.B #define _GNU_SOURCE
+.B #include <sched.h>
+.PP
+.BI "int unshare(int " flags );
+.fi
+.SH DESCRIPTION
+.BR unshare ()
+allows a process (or thread) to disassociate parts of its execution
+context that are currently being shared with other processes (or threads).
+Part of the execution context, such as the mount namespace, is shared
+implicitly when a new process is created using
+.BR fork (2)
+or
+.BR vfork (2),
+while other parts, such as virtual memory, may be
+shared by explicit request when creating a process or thread using
+.BR clone (2).
+.PP
+The main use of
+.BR unshare ()
+is to allow a process to control its
+shared execution context without creating a new process.
+.PP
+The
+.I flags
+argument is a bit mask that specifies which parts of
+the execution context should be unshared.
+This argument is specified by ORing together zero or more
+of the following constants:
+.TP
+.B CLONE_FILES
+Reverse the effect of the
+.BR clone (2)
+.B CLONE_FILES
+flag.
+Unshare the file descriptor table, so that the calling process
+no longer shares its file descriptors with any other process.
+.TP
+.B CLONE_FS
+Reverse the effect of the
+.BR clone (2)
+.B CLONE_FS
+flag.
+Unshare filesystem attributes, so that the calling process
+no longer shares its root directory
+.RB ( chroot (2)),
+current directory
+.RB ( chdir (2)),
+or umask
+.RB ( umask (2))
+attributes with any other process.
+.TP
+.BR CLONE_NEWCGROUP " (since Linux 4.6)"
+This flag has the same effect as the
+.BR clone (2)
+.B CLONE_NEWCGROUP
+flag.
+Unshare the cgroup namespace.
+Use of
+.B CLONE_NEWCGROUP
+requires the
+.B CAP_SYS_ADMIN
+capability.
+.TP
+.BR CLONE_NEWIPC " (since Linux 2.6.19)"
+This flag has the same effect as the
+.BR clone (2)
+.B CLONE_NEWIPC
+flag.
+Unshare the IPC namespace,
+so that the calling process has a private copy of the
+IPC namespace which is not shared with any other process.
+Specifying this flag automatically implies
+.B CLONE_SYSVSEM
+as well.
+Use of
+.B CLONE_NEWIPC
+requires the
+.B CAP_SYS_ADMIN
+capability.
+.TP
+.BR CLONE_NEWNET " (since Linux 2.6.24)"
+This flag has the same effect as the
+.BR clone (2)
+.B CLONE_NEWNET
+flag.
+Unshare the network namespace,
+so that the calling process is moved into a
+new network namespace which is not shared
+with any previously existing process.
+Use of
+.B CLONE_NEWNET
+requires the
+.B CAP_SYS_ADMIN
+capability.
+.TP
+.B CLONE_NEWNS
+.\" These flag name are inconsistent:
+.\" CLONE_NEWNS does the same thing in clone(), but CLONE_VM,
+.\" CLONE_FS, and CLONE_FILES reverse the action of the clone()
+.\" flags of the same name.
+This flag has the same effect as the
+.BR clone (2)
+.B CLONE_NEWNS
+flag.
+Unshare the mount namespace,
+so that the calling process has a private copy of
+its namespace which is not shared with any other process.
+Specifying this flag automatically implies
+.B CLONE_FS
+as well.
+Use of
+.B CLONE_NEWNS
+requires the
+.B CAP_SYS_ADMIN
+capability.
+For further information, see
+.BR mount_namespaces (7).
+.TP
+.BR CLONE_NEWPID " (since Linux 3.8)"
+This flag has the same effect as the
+.BR clone (2)
+.B CLONE_NEWPID
+flag.
+Unshare the PID namespace,
+so that the calling process has a new PID namespace for its children
+which is not shared with any previously existing process.
+The calling process is
+.I not
+moved into the new namespace.
+The first child created by the calling process will have
+the process ID 1 and will assume the role of
+.BR init (1)
+in the new namespace.
+.B CLONE_NEWPID
+automatically implies
+.B CLONE_THREAD
+as well.
+Use of
+.B CLONE_NEWPID
+requires the
+.B CAP_SYS_ADMIN
+capability.
+For further information, see
+.BR pid_namespaces (7).
+.TP
+.BR CLONE_NEWTIME " (since Linux 5.6)"
+Unshare the time namespace,
+so that the calling process has a new time namespace for its children
+which is not shared with any previously existing process.
+The calling process is
+.I not
+moved into the new namespace.
+Use of
+.B CLONE_NEWTIME
+requires the
+.B CAP_SYS_ADMIN
+capability.
+For further information, see
+.BR time_namespaces (7).
+.TP
+.BR CLONE_NEWUSER " (since Linux 3.8)"
+This flag has the same effect as the
+.BR clone (2)
+.B CLONE_NEWUSER
+flag.
+Unshare the user namespace,
+so that the calling process is moved into a new user namespace
+which is not shared with any previously existing process.
+As with the child process created by
+.BR clone (2)
+with the
+.B CLONE_NEWUSER
+flag, the caller obtains a full set of capabilities in the new namespace.
+.IP
+.B CLONE_NEWUSER
+requires that the calling process is not threaded; specifying
+.B CLONE_NEWUSER
+automatically implies
+.BR CLONE_THREAD .
+Since Linux 3.9,
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
+.\" https://lwn.net/Articles/543273/
+.B CLONE_NEWUSER
+also automatically implies
+.BR CLONE_FS .
+.B CLONE_NEWUSER
+requires that the user ID and group ID
+of the calling process are mapped to user IDs and group IDs in the
+user namespace of the calling process at the time of the call.
+.IP
+For further information on user namespaces, see
+.BR user_namespaces (7).
+.TP
+.BR CLONE_NEWUTS " (since Linux 2.6.19)"
+This flag has the same effect as the
+.BR clone (2)
+.B CLONE_NEWUTS
+flag.
+Unshare the UTS IPC namespace,
+so that the calling process has a private copy of the
+UTS namespace which is not shared with any other process.
+Use of
+.B CLONE_NEWUTS
+requires the
+.B CAP_SYS_ADMIN
+capability.
+.TP
+.BR CLONE_SYSVSEM " (since Linux 2.6.26)"
+.\" commit 9edff4ab1f8d82675277a04e359d0ed8bf14a7b7
+This flag reverses the effect of the
+.BR clone (2)
+.B CLONE_SYSVSEM
+flag.
+Unshare System\ V semaphore adjustment
+.RI ( semadj )
+values,
+so that the calling process has a new empty
+.I semadj
+list that is not shared with any other process.
+If this is the last process that has a reference to the process's current
+.I semadj
+list, then the adjustments in that list are applied
+to the corresponding semaphores, as described in
+.BR semop (2).
+.\" CLONE_NEWNS If CLONE_SIGHAND is set and signals are also being shared
+.\" (i.e., current->signal->count > 1), force CLONE_THREAD.
+.PP
+In addition,
+.BR CLONE_THREAD ,
+.BR CLONE_SIGHAND ,
+and
+.B CLONE_VM
+can be specified in
+.I flags
+if the caller is single threaded (i.e., it is not sharing
+its address space with another process or thread).
+In this case, these flags have no effect.
+(Note also that specifying
+.B CLONE_THREAD
+automatically implies
+.BR CLONE_VM ,
+and specifying
+.B CLONE_VM
+automatically implies
+.BR CLONE_SIGHAND .)
+.\" As at 3.9, the following forced implications also apply,
+.\" although the relevant flags are not yet implemented.
+.\" If CLONE_THREAD is set force CLONE_VM.
+.\" If CLONE_VM is set, force CLONE_SIGHAND.
+.\"
+If the process is multithreaded, then
+the use of these flags results in an error.
+.\" See kernel/fork.c::check_unshare_flags()
+.PP
+If
+.I flags
+is specified as zero, then
+.BR unshare ()
+is a no-op;
+no changes are made to the calling process's execution context.
+.SH RETURN VALUE
+On success, zero returned.
+On failure, \-1 is returned and
+.I errno
+is set to indicate the error.
+.SH ERRORS
+.TP
+.B EINVAL
+An invalid bit was specified in
+.IR flags .
+.TP
+.B EINVAL
+.BR CLONE_THREAD ,
+.BR CLONE_SIGHAND ,
+or
+.B CLONE_VM
+was specified in
+.IR flags ,
+and the caller is multithreaded.
+.TP
+.B EINVAL
+.B CLONE_NEWIPC
+was specified in
+.IR flags ,
+but the kernel was not configured with the
+.B CONFIG_SYSVIPC
+and
+.B CONFIG_IPC_NS
+options.
+.TP
+.B EINVAL
+.B CLONE_NEWNET
+was specified in
+.IR flags ,
+but the kernel was not configured with the
+.B CONFIG_NET_NS
+option.
+.TP
+.B EINVAL
+.B CLONE_NEWPID
+was specified in
+.IR flags ,
+but the kernel was not configured with the
+.B CONFIG_PID_NS
+option.
+.TP
+.B EINVAL
+.B CLONE_NEWUSER
+was specified in
+.IR flags ,
+but the kernel was not configured with the
+.B CONFIG_USER_NS
+option.
+.TP
+.B EINVAL
+.B CLONE_NEWUTS
+was specified in
+.IR flags ,
+but the kernel was not configured with the
+.B CONFIG_UTS_NS
+option.
+.TP
+.B EINVAL
+.B CLONE_NEWPID
+was specified in
+.IR flags ,
+but the process has previously called
+.BR unshare ()
+with the
+.B CLONE_NEWPID
+flag.
+.TP
+.B ENOMEM
+Cannot allocate sufficient memory to copy parts of caller's
+context that need to be unshared.
+.TP
+.BR ENOSPC " (since Linux 3.7)"
+.\" commit f2302505775fd13ba93f034206f1e2a587017929
+.B CLONE_NEWPID
+was specified in flags,
+but the limit on the nesting depth of PID namespaces
+would have been exceeded; see
+.BR pid_namespaces (7).
+.TP
+.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
+.B CLONE_NEWUSER
+was specified in
+.IR flags ,
+and the call would cause the limit on the number of
+nested user namespaces to be exceeded.
+See
+.BR user_namespaces (7).
+.IP
+From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
+.BR EUSERS .
+.TP
+.BR ENOSPC " (since Linux 4.9)"
+One of the values in
+.I flags
+specified the creation of a new user namespace,
+but doing so would have caused the limit defined by the corresponding file in
+.I /proc/sys/user
+to be exceeded.
+For further details, see
+.BR namespaces (7).
+.TP
+.B EPERM
+The calling process did not have the required privileges for this operation.
+.TP
+.B EPERM
+.B CLONE_NEWUSER
+was specified in
+.IR flags ,
+but either the effective user ID or the effective group ID of the caller
+does not have a mapping in the parent namespace (see
+.BR user_namespaces (7)).
+.TP
+.BR EPERM " (since Linux 3.9)"
+.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
+.B CLONE_NEWUSER
+was specified in
+.I flags
+and the caller is in a chroot environment
+.\" FIXME What is the rationale for this restriction?
+(i.e., the caller's root directory does not match the root directory
+of the mount namespace in which it resides).
+.TP
+.BR EUSERS " (from Linux 3.11 to Linux 4.8)"
+.B CLONE_NEWUSER
+was specified in
+.IR flags ,
+and the limit on the number of nested user namespaces would be exceeded.
+See the discussion of the
+.B ENOSPC
+error above.
+.SH STANDARDS
+Linux.
+.SH HISTORY
+Linux 2.6.16.
+.SH NOTES
+Not all of the process attributes that can be shared when
+a new process is created using
+.BR clone (2)
+can be unshared using
+.BR unshare ().
+In particular, as at kernel 3.8,
+.\" FIXME all of the following needs to be reviewed for the current kernel
+.BR unshare ()
+does not implement flags that reverse the effects of
+.BR CLONE_SIGHAND ,
+.\" However, we can do unshare(CLONE_SIGHAND) if CLONE_SIGHAND
+.\" was not specified when doing clone(); i.e., unsharing
+.\" signal handlers is permitted if we are not actually
+.\" sharing signal handlers. mtk
+.BR CLONE_THREAD ,
+or
+.BR CLONE_VM .
+.\" However, we can do unshare(CLONE_VM) if CLONE_VM
+.\" was not specified when doing clone(); i.e., unsharing
+.\" virtual memory is permitted if we are not actually
+.\" sharing virtual memory. mtk
+Such functionality may be added in the future, if required.
+.\"
+.\"9) Future Work
+.\"--------------
+.\"The current implementation of unshare does not allow unsharing of
+.\"signals and signal handlers. Signals are complex to begin with and
+.\"to unshare signals and/or signal handlers of a currently running
+.\"process is even more complex. If in the future there is a specific
+.\"need to allow unsharing of signals and/or signal handlers, it can
+.\"be incrementally added to unshare without affecting legacy
+.\"applications using unshare.
+.\"
+.PP
+Creating all kinds of namespace, except user namespaces, requires the
+.B CAP_SYS_ADMIN
+capability.
+However, since creating a user namespace automatically confers a full set of
+capabilities,
+creating both a user namespace and any other type of namespace in the same
+.BR unshare ()
+call does not require the
+.B CAP_SYS_ADMIN
+capability in the original namespace.
+.SH EXAMPLES
+The program below provides a simple implementation of the
+.BR unshare (1)
+command, which unshares one or more namespaces and executes the
+command supplied in its command-line arguments.
+Here's an example of the use of this program,
+running a shell in a new mount namespace,
+and verifying that the original shell and the
+new shell are in separate mount namespaces:
+.PP
+.in +4n
+.EX
+$ \fBreadlink /proc/$$/ns/mnt\fP
+mnt:[4026531840]
+$ \fBsudo ./unshare \-m /bin/bash\fP
+# \fBreadlink /proc/$$/ns/mnt\fP
+mnt:[4026532325]
+.EE
+.in
+.PP
+The differing output of the two
+.BR readlink (1)
+commands shows that the two shells are in different mount namespaces.
+.SS Program source
+\&
+.\" SRC BEGIN (unshare.c)
+.EX
+/* unshare.c
+\&
+ A simple implementation of the unshare(1) command: unshare
+ namespaces and execute a command.
+*/
+#define _GNU_SOURCE
+#include <err.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+\&
+static void
+usage(char *pname)
+{
+ fprintf(stderr, "Usage: %s [options] program [arg...]\en", pname);
+ fprintf(stderr, "Options can be:\en");
+ fprintf(stderr, " \-C unshare cgroup namespace\en");
+ fprintf(stderr, " \-i unshare IPC namespace\en");
+ fprintf(stderr, " \-m unshare mount namespace\en");
+ fprintf(stderr, " \-n unshare network namespace\en");
+ fprintf(stderr, " \-p unshare PID namespace\en");
+ fprintf(stderr, " \-t unshare time namespace\en");
+ fprintf(stderr, " \-u unshare UTS namespace\en");
+ fprintf(stderr, " \-U unshare user namespace\en");
+ exit(EXIT_FAILURE);
+}
+\&
+int
+main(int argc, char *argv[])
+{
+ int flags, opt;
+\&
+ flags = 0;
+\&
+ while ((opt = getopt(argc, argv, "CimnptuU")) != \-1) {
+ switch (opt) {
+ case \[aq]C\[aq]: flags |= CLONE_NEWCGROUP; break;
+ case \[aq]i\[aq]: flags |= CLONE_NEWIPC; break;
+ case \[aq]m\[aq]: flags |= CLONE_NEWNS; break;
+ case \[aq]n\[aq]: flags |= CLONE_NEWNET; break;
+ case \[aq]p\[aq]: flags |= CLONE_NEWPID; break;
+ case \[aq]t\[aq]: flags |= CLONE_NEWTIME; break;
+ case \[aq]u\[aq]: flags |= CLONE_NEWUTS; break;
+ case \[aq]U\[aq]: flags |= CLONE_NEWUSER; break;
+ default: usage(argv[0]);
+ }
+ }
+\&
+ if (optind >= argc)
+ usage(argv[0]);
+\&
+ if (unshare(flags) == \-1)
+ err(EXIT_FAILURE, "unshare");
+\&
+ execvp(argv[optind], &argv[optind]);
+ err(EXIT_FAILURE, "execvp");
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.BR unshare (1),
+.BR clone (2),
+.BR fork (2),
+.BR kcmp (2),
+.BR setns (2),
+.BR vfork (2),
+.BR namespaces (7)
+.PP
+.I Documentation/userspace\-api/unshare.rst
+in the Linux kernel source tree
+.\" commit f504d47be5e8fa7ecf2bf660b18b42e6960c0eb2
+(or
+.I Documentation/unshare.txt
+before Linux 4.12)