diff options
Diffstat (limited to 'man2/unshare.2')
-rw-r--r-- | man2/unshare.2 | 572 |
1 files changed, 572 insertions, 0 deletions
diff --git a/man2/unshare.2 b/man2/unshare.2 new file mode 100644 index 0000000..b12afb5 --- /dev/null +++ b/man2/unshare.2 @@ -0,0 +1,572 @@ +.\" Copyright (C) 2006, Janak Desai <janak@us.ibm.com> +.\" and Copyright (C) 2006, 2012 Michael Kerrisk <mtk.manpages@gmail.com> +.\" +.\" SPDX-License-Identifier: GPL-1.0-or-later +.\" +.\" Patch Justification: +.\" unshare system call is needed to implement, using PAM, +.\" per-security_context and/or per-user namespace to provide +.\" polyinstantiated directories. Using unshare and bind mounts, a +.\" PAM module can create private namespace with appropriate +.\" directories(based on user's security context) bind mounted on +.\" public directories such as /tmp, thus providing an instance of +.\" /tmp that is based on user's security context. Without the +.\" unshare system call, namespace separation can only be achieved +.\" by clone, which would require porting and maintaining all commands +.\" such as login, and su, that establish a user session. +.\" +.TH unshare 2 2023-05-26 "Linux man-pages 6.05.01" +.SH NAME +unshare \- disassociate parts of the process execution context +.SH LIBRARY +Standard C library +.RI ( libc ", " \-lc ) +.SH SYNOPSIS +.nf +.B #define _GNU_SOURCE +.B #include <sched.h> +.PP +.BI "int unshare(int " flags ); +.fi +.SH DESCRIPTION +.BR unshare () +allows a process (or thread) to disassociate parts of its execution +context that are currently being shared with other processes (or threads). +Part of the execution context, such as the mount namespace, is shared +implicitly when a new process is created using +.BR fork (2) +or +.BR vfork (2), +while other parts, such as virtual memory, may be +shared by explicit request when creating a process or thread using +.BR clone (2). +.PP +The main use of +.BR unshare () +is to allow a process to control its +shared execution context without creating a new process. +.PP +The +.I flags +argument is a bit mask that specifies which parts of +the execution context should be unshared. +This argument is specified by ORing together zero or more +of the following constants: +.TP +.B CLONE_FILES +Reverse the effect of the +.BR clone (2) +.B CLONE_FILES +flag. +Unshare the file descriptor table, so that the calling process +no longer shares its file descriptors with any other process. +.TP +.B CLONE_FS +Reverse the effect of the +.BR clone (2) +.B CLONE_FS +flag. +Unshare filesystem attributes, so that the calling process +no longer shares its root directory +.RB ( chroot (2)), +current directory +.RB ( chdir (2)), +or umask +.RB ( umask (2)) +attributes with any other process. +.TP +.BR CLONE_NEWCGROUP " (since Linux 4.6)" +This flag has the same effect as the +.BR clone (2) +.B CLONE_NEWCGROUP +flag. +Unshare the cgroup namespace. +Use of +.B CLONE_NEWCGROUP +requires the +.B CAP_SYS_ADMIN +capability. +.TP +.BR CLONE_NEWIPC " (since Linux 2.6.19)" +This flag has the same effect as the +.BR clone (2) +.B CLONE_NEWIPC +flag. +Unshare the IPC namespace, +so that the calling process has a private copy of the +IPC namespace which is not shared with any other process. +Specifying this flag automatically implies +.B CLONE_SYSVSEM +as well. +Use of +.B CLONE_NEWIPC +requires the +.B CAP_SYS_ADMIN +capability. +.TP +.BR CLONE_NEWNET " (since Linux 2.6.24)" +This flag has the same effect as the +.BR clone (2) +.B CLONE_NEWNET +flag. +Unshare the network namespace, +so that the calling process is moved into a +new network namespace which is not shared +with any previously existing process. +Use of +.B CLONE_NEWNET +requires the +.B CAP_SYS_ADMIN +capability. +.TP +.B CLONE_NEWNS +.\" These flag name are inconsistent: +.\" CLONE_NEWNS does the same thing in clone(), but CLONE_VM, +.\" CLONE_FS, and CLONE_FILES reverse the action of the clone() +.\" flags of the same name. +This flag has the same effect as the +.BR clone (2) +.B CLONE_NEWNS +flag. +Unshare the mount namespace, +so that the calling process has a private copy of +its namespace which is not shared with any other process. +Specifying this flag automatically implies +.B CLONE_FS +as well. +Use of +.B CLONE_NEWNS +requires the +.B CAP_SYS_ADMIN +capability. +For further information, see +.BR mount_namespaces (7). +.TP +.BR CLONE_NEWPID " (since Linux 3.8)" +This flag has the same effect as the +.BR clone (2) +.B CLONE_NEWPID +flag. +Unshare the PID namespace, +so that the calling process has a new PID namespace for its children +which is not shared with any previously existing process. +The calling process is +.I not +moved into the new namespace. +The first child created by the calling process will have +the process ID 1 and will assume the role of +.BR init (1) +in the new namespace. +.B CLONE_NEWPID +automatically implies +.B CLONE_THREAD +as well. +Use of +.B CLONE_NEWPID +requires the +.B CAP_SYS_ADMIN +capability. +For further information, see +.BR pid_namespaces (7). +.TP +.BR CLONE_NEWTIME " (since Linux 5.6)" +Unshare the time namespace, +so that the calling process has a new time namespace for its children +which is not shared with any previously existing process. +The calling process is +.I not +moved into the new namespace. +Use of +.B CLONE_NEWTIME +requires the +.B CAP_SYS_ADMIN +capability. +For further information, see +.BR time_namespaces (7). +.TP +.BR CLONE_NEWUSER " (since Linux 3.8)" +This flag has the same effect as the +.BR clone (2) +.B CLONE_NEWUSER +flag. +Unshare the user namespace, +so that the calling process is moved into a new user namespace +which is not shared with any previously existing process. +As with the child process created by +.BR clone (2) +with the +.B CLONE_NEWUSER +flag, the caller obtains a full set of capabilities in the new namespace. +.IP +.B CLONE_NEWUSER +requires that the calling process is not threaded; specifying +.B CLONE_NEWUSER +automatically implies +.BR CLONE_THREAD . +Since Linux 3.9, +.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71 +.\" https://lwn.net/Articles/543273/ +.B CLONE_NEWUSER +also automatically implies +.BR CLONE_FS . +.B CLONE_NEWUSER +requires that the user ID and group ID +of the calling process are mapped to user IDs and group IDs in the +user namespace of the calling process at the time of the call. +.IP +For further information on user namespaces, see +.BR user_namespaces (7). +.TP +.BR CLONE_NEWUTS " (since Linux 2.6.19)" +This flag has the same effect as the +.BR clone (2) +.B CLONE_NEWUTS +flag. +Unshare the UTS IPC namespace, +so that the calling process has a private copy of the +UTS namespace which is not shared with any other process. +Use of +.B CLONE_NEWUTS +requires the +.B CAP_SYS_ADMIN +capability. +.TP +.BR CLONE_SYSVSEM " (since Linux 2.6.26)" +.\" commit 9edff4ab1f8d82675277a04e359d0ed8bf14a7b7 +This flag reverses the effect of the +.BR clone (2) +.B CLONE_SYSVSEM +flag. +Unshare System\ V semaphore adjustment +.RI ( semadj ) +values, +so that the calling process has a new empty +.I semadj +list that is not shared with any other process. +If this is the last process that has a reference to the process's current +.I semadj +list, then the adjustments in that list are applied +to the corresponding semaphores, as described in +.BR semop (2). +.\" CLONE_NEWNS If CLONE_SIGHAND is set and signals are also being shared +.\" (i.e., current->signal->count > 1), force CLONE_THREAD. +.PP +In addition, +.BR CLONE_THREAD , +.BR CLONE_SIGHAND , +and +.B CLONE_VM +can be specified in +.I flags +if the caller is single threaded (i.e., it is not sharing +its address space with another process or thread). +In this case, these flags have no effect. +(Note also that specifying +.B CLONE_THREAD +automatically implies +.BR CLONE_VM , +and specifying +.B CLONE_VM +automatically implies +.BR CLONE_SIGHAND .) +.\" As at 3.9, the following forced implications also apply, +.\" although the relevant flags are not yet implemented. +.\" If CLONE_THREAD is set force CLONE_VM. +.\" If CLONE_VM is set, force CLONE_SIGHAND. +.\" +If the process is multithreaded, then +the use of these flags results in an error. +.\" See kernel/fork.c::check_unshare_flags() +.PP +If +.I flags +is specified as zero, then +.BR unshare () +is a no-op; +no changes are made to the calling process's execution context. +.SH RETURN VALUE +On success, zero returned. +On failure, \-1 is returned and +.I errno +is set to indicate the error. +.SH ERRORS +.TP +.B EINVAL +An invalid bit was specified in +.IR flags . +.TP +.B EINVAL +.BR CLONE_THREAD , +.BR CLONE_SIGHAND , +or +.B CLONE_VM +was specified in +.IR flags , +and the caller is multithreaded. +.TP +.B EINVAL +.B CLONE_NEWIPC +was specified in +.IR flags , +but the kernel was not configured with the +.B CONFIG_SYSVIPC +and +.B CONFIG_IPC_NS +options. +.TP +.B EINVAL +.B CLONE_NEWNET +was specified in +.IR flags , +but the kernel was not configured with the +.B CONFIG_NET_NS +option. +.TP +.B EINVAL +.B CLONE_NEWPID +was specified in +.IR flags , +but the kernel was not configured with the +.B CONFIG_PID_NS +option. +.TP +.B EINVAL +.B CLONE_NEWUSER +was specified in +.IR flags , +but the kernel was not configured with the +.B CONFIG_USER_NS +option. +.TP +.B EINVAL +.B CLONE_NEWUTS +was specified in +.IR flags , +but the kernel was not configured with the +.B CONFIG_UTS_NS +option. +.TP +.B EINVAL +.B CLONE_NEWPID +was specified in +.IR flags , +but the process has previously called +.BR unshare () +with the +.B CLONE_NEWPID +flag. +.TP +.B ENOMEM +Cannot allocate sufficient memory to copy parts of caller's +context that need to be unshared. +.TP +.BR ENOSPC " (since Linux 3.7)" +.\" commit f2302505775fd13ba93f034206f1e2a587017929 +.B CLONE_NEWPID +was specified in flags, +but the limit on the nesting depth of PID namespaces +would have been exceeded; see +.BR pid_namespaces (7). +.TP +.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS ) +.B CLONE_NEWUSER +was specified in +.IR flags , +and the call would cause the limit on the number of +nested user namespaces to be exceeded. +See +.BR user_namespaces (7). +.IP +From Linux 3.11 to Linux 4.8, the error diagnosed in this case was +.BR EUSERS . +.TP +.BR ENOSPC " (since Linux 4.9)" +One of the values in +.I flags +specified the creation of a new user namespace, +but doing so would have caused the limit defined by the corresponding file in +.I /proc/sys/user +to be exceeded. +For further details, see +.BR namespaces (7). +.TP +.B EPERM +The calling process did not have the required privileges for this operation. +.TP +.B EPERM +.B CLONE_NEWUSER +was specified in +.IR flags , +but either the effective user ID or the effective group ID of the caller +does not have a mapping in the parent namespace (see +.BR user_namespaces (7)). +.TP +.BR EPERM " (since Linux 3.9)" +.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d +.B CLONE_NEWUSER +was specified in +.I flags +and the caller is in a chroot environment +.\" FIXME What is the rationale for this restriction? +(i.e., the caller's root directory does not match the root directory +of the mount namespace in which it resides). +.TP +.BR EUSERS " (from Linux 3.11 to Linux 4.8)" +.B CLONE_NEWUSER +was specified in +.IR flags , +and the limit on the number of nested user namespaces would be exceeded. +See the discussion of the +.B ENOSPC +error above. +.SH STANDARDS +Linux. +.SH HISTORY +Linux 2.6.16. +.SH NOTES +Not all of the process attributes that can be shared when +a new process is created using +.BR clone (2) +can be unshared using +.BR unshare (). +In particular, as at kernel 3.8, +.\" FIXME all of the following needs to be reviewed for the current kernel +.BR unshare () +does not implement flags that reverse the effects of +.BR CLONE_SIGHAND , +.\" However, we can do unshare(CLONE_SIGHAND) if CLONE_SIGHAND +.\" was not specified when doing clone(); i.e., unsharing +.\" signal handlers is permitted if we are not actually +.\" sharing signal handlers. mtk +.BR CLONE_THREAD , +or +.BR CLONE_VM . +.\" However, we can do unshare(CLONE_VM) if CLONE_VM +.\" was not specified when doing clone(); i.e., unsharing +.\" virtual memory is permitted if we are not actually +.\" sharing virtual memory. mtk +Such functionality may be added in the future, if required. +.\" +.\"9) Future Work +.\"-------------- +.\"The current implementation of unshare does not allow unsharing of +.\"signals and signal handlers. Signals are complex to begin with and +.\"to unshare signals and/or signal handlers of a currently running +.\"process is even more complex. If in the future there is a specific +.\"need to allow unsharing of signals and/or signal handlers, it can +.\"be incrementally added to unshare without affecting legacy +.\"applications using unshare. +.\" +.PP +Creating all kinds of namespace, except user namespaces, requires the +.B CAP_SYS_ADMIN +capability. +However, since creating a user namespace automatically confers a full set of +capabilities, +creating both a user namespace and any other type of namespace in the same +.BR unshare () +call does not require the +.B CAP_SYS_ADMIN +capability in the original namespace. +.SH EXAMPLES +The program below provides a simple implementation of the +.BR unshare (1) +command, which unshares one or more namespaces and executes the +command supplied in its command-line arguments. +Here's an example of the use of this program, +running a shell in a new mount namespace, +and verifying that the original shell and the +new shell are in separate mount namespaces: +.PP +.in +4n +.EX +$ \fBreadlink /proc/$$/ns/mnt\fP +mnt:[4026531840] +$ \fBsudo ./unshare \-m /bin/bash\fP +# \fBreadlink /proc/$$/ns/mnt\fP +mnt:[4026532325] +.EE +.in +.PP +The differing output of the two +.BR readlink (1) +commands shows that the two shells are in different mount namespaces. +.SS Program source +\& +.\" SRC BEGIN (unshare.c) +.EX +/* unshare.c +\& + A simple implementation of the unshare(1) command: unshare + namespaces and execute a command. +*/ +#define _GNU_SOURCE +#include <err.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +\& +static void +usage(char *pname) +{ + fprintf(stderr, "Usage: %s [options] program [arg...]\en", pname); + fprintf(stderr, "Options can be:\en"); + fprintf(stderr, " \-C unshare cgroup namespace\en"); + fprintf(stderr, " \-i unshare IPC namespace\en"); + fprintf(stderr, " \-m unshare mount namespace\en"); + fprintf(stderr, " \-n unshare network namespace\en"); + fprintf(stderr, " \-p unshare PID namespace\en"); + fprintf(stderr, " \-t unshare time namespace\en"); + fprintf(stderr, " \-u unshare UTS namespace\en"); + fprintf(stderr, " \-U unshare user namespace\en"); + exit(EXIT_FAILURE); +} +\& +int +main(int argc, char *argv[]) +{ + int flags, opt; +\& + flags = 0; +\& + while ((opt = getopt(argc, argv, "CimnptuU")) != \-1) { + switch (opt) { + case \[aq]C\[aq]: flags |= CLONE_NEWCGROUP; break; + case \[aq]i\[aq]: flags |= CLONE_NEWIPC; break; + case \[aq]m\[aq]: flags |= CLONE_NEWNS; break; + case \[aq]n\[aq]: flags |= CLONE_NEWNET; break; + case \[aq]p\[aq]: flags |= CLONE_NEWPID; break; + case \[aq]t\[aq]: flags |= CLONE_NEWTIME; break; + case \[aq]u\[aq]: flags |= CLONE_NEWUTS; break; + case \[aq]U\[aq]: flags |= CLONE_NEWUSER; break; + default: usage(argv[0]); + } + } +\& + if (optind >= argc) + usage(argv[0]); +\& + if (unshare(flags) == \-1) + err(EXIT_FAILURE, "unshare"); +\& + execvp(argv[optind], &argv[optind]); + err(EXIT_FAILURE, "execvp"); +} +.EE +.\" SRC END +.SH SEE ALSO +.BR unshare (1), +.BR clone (2), +.BR fork (2), +.BR kcmp (2), +.BR setns (2), +.BR vfork (2), +.BR namespaces (7) +.PP +.I Documentation/userspace\-api/unshare.rst +in the Linux kernel source tree +.\" commit f504d47be5e8fa7ecf2bf660b18b42e6960c0eb2 +(or +.I Documentation/unshare.txt +before Linux 4.12) |