diff options
Diffstat (limited to 'man2/setns.2')
-rw-r--r-- | man2/setns.2 | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/man2/setns.2 b/man2/setns.2 new file mode 100644 index 0000000..13565de --- /dev/null +++ b/man2/setns.2 @@ -0,0 +1,419 @@ +.\" Copyright (C) 2011, Eric Biederman <ebiederm@xmission.com> +.\" and Copyright (C) 2011, 2012, Michael Kerrisk <mtk.manpages@gmail.com> +.\" +.\" SPDX-License-Identifier: GPL-2.0-only +.\" +.TH setns 2 2023-05-03 "Linux man-pages 6.05.01" +.SH NAME +setns \- reassociate thread with a namespace +.SH LIBRARY +Standard C library +.RI ( libc ", " \-lc ) +.SH SYNOPSIS +.nf +.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */" +.B #include <sched.h> +.PP +.BI "int setns(int " fd ", int " nstype ); +.fi +.SH DESCRIPTION +The +.BR setns () +system call allows the calling thread to move into different namespaces. +The +.I fd +argument is one of the following: +.IP \[bu] 3 +a file descriptor referring to one of the magic links in a +.IR /proc/ pid /ns/ +directory (or a bind mount to such a link); +.IP \[bu] +a PID file descriptor (see +.BR pidfd_open (2)). +.PP +The +.I nstype +argument is interpreted differently in each case. +.\" +.SS fd refers to a \fI/proc/\fPpid\fI/ns/\fP link +If +.I fd +refers to a +.IR /proc/ pid /ns/ +link, then +.BR setns () +reassociates the calling thread with the namespace associated with that link, +subject to any constraints imposed by the +.I nstype +argument. +In this usage, each call to +.BR setns () +changes just one of the caller's namespace memberships. +.PP +The +.I nstype +argument specifies which type of namespace +the calling thread may be reassociated with. +This argument can have +.I one +of the following values: +.TP +.B 0 +Allow any type of namespace to be joined. +.TP +.BR CLONE_NEWCGROUP " (since Linux 4.6)" +.I fd +must refer to a cgroup namespace. +.TP +.BR CLONE_NEWIPC " (since Linux 3.0)" +.I fd +must refer to an IPC namespace. +.TP +.BR CLONE_NEWNET " (since Linux 3.0)" +.I fd +must refer to a network namespace. +.TP +.BR CLONE_NEWNS " (since Linux 3.8)" +.I fd +must refer to a mount namespace. +.TP +.BR CLONE_NEWPID " (since Linux 3.8)" +.I fd +must refer to a descendant PID namespace. +.TP +.BR CLONE_NEWTIME " (since Linux 5.8)" +.\" commit 76c12881a38aaa83e1eb4ce2fada36c3a732bad4 +.I fd +must refer to a time namespace. +.TP +.BR CLONE_NEWUSER " (since Linux 3.8)" +.I fd +must refer to a user namespace. +.TP +.BR CLONE_NEWUTS " (since Linux 3.0)" +.I fd +must refer to a UTS namespace. +.PP +Specifying +.I nstype +as 0 suffices if the caller knows (or does not care) +what type of namespace is referred to by +.IR fd . +Specifying a nonzero value for +.I nstype +is useful if the caller does not know what type of namespace is referred to by +.I fd +and wants to ensure that the namespace is of a particular type. +(The caller might not know the type of the namespace referred to by +.I fd +if the file descriptor was opened by another process and, for example, +passed to the caller via a UNIX domain socket.) +.\" +.SS fd is a PID file descriptor +Since Linux 5.8, +.I fd +may refer to a PID file descriptor obtained from +.BR pidfd_open (2) +or +.BR clone (2). +In this usage, +.BR setns () +atomically moves the calling thread into one or more of the same namespaces +as the thread referred to by +.IR fd . +.PP +The +.I nstype +argument is a bit mask specified by ORing together +.I "one or more" +of the +.B CLONE_NEW* +namespace constants listed above. +The caller is moved into each of the target thread's namespaces +that is specified in +.IR nstype ; +the caller's memberships in the remaining namespaces are left unchanged. +.PP +For example, the following code would move the caller into the +same user, network, and UTS namespaces as PID 1234, +but would leave the caller's other namespace memberships unchanged: +.PP +.in +4n +.EX +int fd = pidfd_open(1234, 0); +setns(fd, CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS); +.EE +.in +.\" +.SS Details for specific namespace types +Note the following details and restrictions when reassociating with +specific namespace types: +.TP +User namespaces +A process reassociating itself with a user namespace must have the +.B CAP_SYS_ADMIN +.\" See kernel/user_namespace.c:userns_install() [3.8 source] +capability in the target user namespace. +(This necessarily implies that it is only possible to join +a descendant user namespace.) +Upon successfully joining a user namespace, +a process is granted all capabilities in that namespace, +regardless of its user and group IDs. +.IP +A multithreaded process may not change user namespace with +.BR setns (). +.IP +It is not permitted to use +.BR setns () +to reenter the caller's current user namespace. +This prevents a caller that has dropped capabilities from regaining +those capabilities via a call to +.BR setns (). +.IP +For security reasons, +.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71 +.\" https://lwn.net/Articles/543273/ +a process can't join a new user namespace if it is sharing +filesystem-related attributes +(the attributes whose sharing is controlled by the +.BR clone (2) +.B CLONE_FS +flag) with another process. +.IP +For further details on user namespaces, see +.BR user_namespaces (7). +.TP +Mount namespaces +Changing the mount namespace requires that the caller possess both +.B CAP_SYS_CHROOT +and +.B CAP_SYS_ADMIN +capabilities in its own user namespace and +.B CAP_SYS_ADMIN +in the user namespace that owns the target mount namespace. +.IP +A process can't join a new mount namespace if it is sharing +filesystem-related attributes +(the attributes whose sharing is controlled by the +.BR clone (2) +.B CLONE_FS +flag) with another process. +.\" Above check is in fs/namespace.c:mntns_install() [3.8 source] +.IP +See +.BR user_namespaces (7) +for details on the interaction of user namespaces and mount namespaces. +.TP +PID namespaces +In order to reassociate itself with a new PID namespace, +the caller must have the +.B CAP_SYS_ADMIN +capability both in its own user namespace and in the user namespace +that owns the target PID namespace. +.IP +Reassociating the PID namespace has somewhat different +from other namespace types. +Reassociating the calling thread with a PID namespace changes only +the PID namespace that subsequently created child processes of +the caller will be placed in; +it does not change the PID namespace of the caller itself. +.IP +Reassociating with a PID namespace is allowed only if the target +PID namespace is a descendant (child, grandchild, etc.) +of, or is the same as, the current PID namespace of the caller. +.IP +For further details on PID namespaces, see +.BR pid_namespaces (7). +.TP +Cgroup namespaces +In order to reassociate itself with a new cgroup namespace, +the caller must have the +.B CAP_SYS_ADMIN +capability both in its own user namespace and in the user namespace +that owns the target cgroup namespace. +.IP +Using +.BR setns () +to change the caller's cgroup namespace does not change +the caller's cgroup memberships. +.TP +Network, IPC, time, and UTS namespaces +In order to reassociate itself with a new network, IPC, time, or UTS namespace, +the caller must have the +.B CAP_SYS_ADMIN +capability both in its own user namespace and in the user namespace +that owns the target namespace. +.SH RETURN VALUE +On success, +.BR setns () +returns 0. +On failure, \-1 is returned and +.I errno +is set to indicate the error. +.SH ERRORS +.TP +.B EBADF +.I fd +is not a valid file descriptor. +.TP +.B EINVAL +.I fd +refers to a namespace whose type does not match that specified in +.IR nstype . +.TP +.B EINVAL +There is problem with reassociating +the thread with the specified namespace. +.TP +.\" See kernel/pid_namespace.c::pidns_install() [kernel 3.18 sources] +.B EINVAL +The caller tried to join an ancestor (parent, grandparent, and so on) +PID namespace. +.TP +.B EINVAL +The caller attempted to join the user namespace +in which it is already a member. +.TP +.B EINVAL +.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71 +The caller shares filesystem +.RB ( CLONE_FS ) +state (in particular, the root directory) +with other processes and tried to join a new user namespace. +.TP +.B EINVAL +.\" See kernel/user_namespace.c::userns_install() [kernel 3.15 sources] +The caller is multithreaded and tried to join a new user namespace. +.TP +.B EINVAL +.I fd +is a PID file descriptor and +.I nstype +is invalid (e.g., it is 0). +.TP +.B ENOMEM +Cannot allocate sufficient memory to change the specified namespace. +.TP +.B EPERM +The calling thread did not have the required capability +for this operation. +.TP +.B ESRCH +.I fd +is a PID file descriptor but the process it refers to no longer exists +(i.e., it has terminated and been waited on). +.SH STANDARDS +Linux. +.SH VERSIONS +Linux 3.0, +glibc 2.14. +.SH NOTES +For further information on the +.IR /proc/ pid /ns/ +magic links, see +.BR namespaces (7). +.PP +Not all of the attributes that can be shared when +a new thread is created using +.BR clone (2) +can be changed using +.BR setns (). +.SH EXAMPLES +The program below takes two or more arguments. +The first argument specifies the pathname of a namespace file in an existing +.IR /proc/ pid /ns/ +directory. +The remaining arguments specify a command and its arguments. +The program opens the namespace file, joins that namespace using +.BR setns (), +and executes the specified command inside that namespace. +.PP +The following shell session demonstrates the use of this program +(compiled as a binary named +.IR ns_exec ) +in conjunction with the +.B CLONE_NEWUTS +example program in the +.BR clone (2) +man page (complied as a binary named +.IR newuts ). +.PP +We begin by executing the example program in +.BR clone (2) +in the background. +That program creates a child in a separate UTS namespace. +The child changes the hostname in its namespace, +and then both processes display the hostnames in their UTS namespaces, +so that we can see that they are different. +.PP +.in +4n +.EX +$ \fBsu\fP # Need privilege for namespace operations +Password: +# \fB./newuts bizarro &\fP +[1] 3549 +clone() returned 3550 +uts.nodename in child: bizarro +uts.nodename in parent: antero +# \fBuname \-n\fP # Verify hostname in the shell +antero +.EE +.in +.PP +We then run the program shown below, +using it to execute a shell. +Inside that shell, we verify that the hostname is the one +set by the child created by the first program: +.PP +.in +4n +.EX +# \fB./ns_exec /proc/3550/ns/uts /bin/bash\fP +# \fBuname \-n\fP # Executed in shell started by ns_exec +bizarro +.EE +.in +.SS Program source +.\" SRC BEGIN (setns.c) +.EX +#define _GNU_SOURCE +#include <err.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +\& +int +main(int argc, char *argv[]) +{ + int fd; +\& + if (argc < 3) { + fprintf(stderr, "%s /proc/PID/ns/FILE cmd args...\en", argv[0]); + exit(EXIT_FAILURE); + } +\& + /* Get file descriptor for namespace; the file descriptor is opened + with O_CLOEXEC so as to ensure that it is not inherited by the + program that is later executed. */ +\& + fd = open(argv[1], O_RDONLY | O_CLOEXEC); + if (fd == \-1) + err(EXIT_FAILURE, "open"); +\& + if (setns(fd, 0) == \-1) /* Join that namespace */ + err(EXIT_FAILURE, "setns"); +\& + execvp(argv[2], &argv[2]); /* Execute a command in namespace */ + err(EXIT_FAILURE, "execvp"); +} +.EE +.\" SRC END +.SH SEE ALSO +.BR nsenter (1), +.BR clone (2), +.BR fork (2), +.BR unshare (2), +.BR vfork (2), +.BR namespaces (7), +.BR unix (7) |