summaryrefslogtreecommitdiffstats
path: root/man2/unshare.2
blob: b12afb55eb8ec8d9e7aa8070a9e8b2996e6c138d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
.\" Copyright (C) 2006, Janak Desai <janak@us.ibm.com>
.\" and Copyright (C) 2006, 2012 Michael Kerrisk <mtk.manpages@gmail.com>
.\"
.\" SPDX-License-Identifier: GPL-1.0-or-later
.\"
.\" Patch Justification:
.\" unshare system call is needed to implement, using PAM,
.\" per-security_context and/or per-user namespace to provide
.\" polyinstantiated directories. Using unshare and bind mounts, a
.\" PAM module can create private namespace with appropriate
.\" directories(based on user's security context) bind mounted on
.\" public directories such as /tmp, thus providing an instance of
.\" /tmp that is based on user's security context. Without the
.\" unshare system call, namespace separation can only be achieved
.\" by clone, which would require porting and maintaining all commands
.\" such as login, and su, that establish a user session.
.\"
.TH unshare 2 2023-05-26 "Linux man-pages 6.05.01"
.SH NAME
unshare \- disassociate parts of the process execution context
.SH LIBRARY
Standard C library
.RI ( libc ", " \-lc )
.SH SYNOPSIS
.nf
.B #define _GNU_SOURCE
.B #include <sched.h>
.PP
.BI "int unshare(int " flags );
.fi
.SH DESCRIPTION
.BR unshare ()
allows a process (or thread) to disassociate parts of its execution
context that are currently being shared with other processes (or threads).
Part of the execution context, such as the mount namespace, is shared
implicitly when a new process is created using
.BR fork (2)
or
.BR vfork (2),
while other parts, such as virtual memory, may be
shared by explicit request when creating a process or thread using
.BR clone (2).
.PP
The main use of
.BR unshare ()
is to allow a process to control its
shared execution context without creating a new process.
.PP
The
.I flags
argument is a bit mask that specifies which parts of
the execution context should be unshared.
This argument is specified by ORing together zero or more
of the following constants:
.TP
.B CLONE_FILES
Reverse the effect of the
.BR clone (2)
.B CLONE_FILES
flag.
Unshare the file descriptor table, so that the calling process
no longer shares its file descriptors with any other process.
.TP
.B CLONE_FS
Reverse the effect of the
.BR clone (2)
.B CLONE_FS
flag.
Unshare filesystem attributes, so that the calling process
no longer shares its root directory
.RB ( chroot (2)),
current directory
.RB ( chdir (2)),
or umask
.RB ( umask (2))
attributes with any other process.
.TP
.BR CLONE_NEWCGROUP " (since Linux 4.6)"
This flag has the same effect as the
.BR clone (2)
.B CLONE_NEWCGROUP
flag.
Unshare the cgroup namespace.
Use of
.B CLONE_NEWCGROUP
requires the
.B CAP_SYS_ADMIN
capability.
.TP
.BR CLONE_NEWIPC " (since Linux 2.6.19)"
This flag has the same effect as the
.BR clone (2)
.B CLONE_NEWIPC
flag.
Unshare the IPC namespace,
so that the calling process has a private copy of the
IPC namespace which is not shared with any other process.
Specifying this flag automatically implies
.B CLONE_SYSVSEM
as well.
Use of
.B CLONE_NEWIPC
requires the
.B CAP_SYS_ADMIN
capability.
.TP
.BR CLONE_NEWNET " (since Linux 2.6.24)"
This flag has the same effect as the
.BR clone (2)
.B CLONE_NEWNET
flag.
Unshare the network namespace,
so that the calling process is moved into a
new network namespace which is not shared
with any previously existing process.
Use of
.B CLONE_NEWNET
requires the
.B CAP_SYS_ADMIN
capability.
.TP
.B CLONE_NEWNS
.\" These flag name are inconsistent:
.\" CLONE_NEWNS does the same thing in clone(), but CLONE_VM,
.\" CLONE_FS, and CLONE_FILES reverse the action of the clone()
.\" flags of the same name.
This flag has the same effect as the
.BR clone (2)
.B CLONE_NEWNS
flag.
Unshare the mount namespace,
so that the calling process has a private copy of
its namespace which is not shared with any other process.
Specifying this flag automatically implies
.B CLONE_FS
as well.
Use of
.B CLONE_NEWNS
requires the
.B CAP_SYS_ADMIN
capability.
For further information, see
.BR mount_namespaces (7).
.TP
.BR CLONE_NEWPID " (since Linux 3.8)"
This flag has the same effect as the
.BR clone (2)
.B CLONE_NEWPID
flag.
Unshare the PID namespace,
so that the calling process has a new PID namespace for its children
which is not shared with any previously existing process.
The calling process is
.I not
moved into the new namespace.
The first child created by the calling process will have
the process ID 1 and will assume the role of
.BR init (1)
in the new namespace.
.B CLONE_NEWPID
automatically implies
.B CLONE_THREAD
as well.
Use of
.B CLONE_NEWPID
requires the
.B CAP_SYS_ADMIN
capability.
For further information, see
.BR pid_namespaces (7).
.TP
.BR CLONE_NEWTIME " (since Linux 5.6)"
Unshare the time namespace,
so that the calling process has a new time namespace for its children
which is not shared with any previously existing process.
The calling process is
.I not
moved into the new namespace.
Use of
.B CLONE_NEWTIME
requires the
.B CAP_SYS_ADMIN
capability.
For further information, see
.BR time_namespaces (7).
.TP
.BR CLONE_NEWUSER " (since Linux 3.8)"
This flag has the same effect as the
.BR clone (2)
.B CLONE_NEWUSER
flag.
Unshare the user namespace,
so that the calling process is moved into a new user namespace
which is not shared with any previously existing process.
As with the child process created by
.BR clone (2)
with the
.B CLONE_NEWUSER
flag, the caller obtains a full set of capabilities in the new namespace.
.IP
.B CLONE_NEWUSER
requires that the calling process is not threaded; specifying
.B CLONE_NEWUSER
automatically implies
.BR CLONE_THREAD .
Since Linux 3.9,
.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
.\" https://lwn.net/Articles/543273/
.B CLONE_NEWUSER
also automatically implies
.BR CLONE_FS .
.B CLONE_NEWUSER
requires that the user ID and group ID
of the calling process are mapped to user IDs and group IDs in the
user namespace of the calling process at the time of the call.
.IP
For further information on user namespaces, see
.BR user_namespaces (7).
.TP
.BR CLONE_NEWUTS " (since Linux 2.6.19)"
This flag has the same effect as the
.BR clone (2)
.B CLONE_NEWUTS
flag.
Unshare the UTS IPC namespace,
so that the calling process has a private copy of the
UTS namespace which is not shared with any other process.
Use of
.B CLONE_NEWUTS
requires the
.B CAP_SYS_ADMIN
capability.
.TP
.BR CLONE_SYSVSEM " (since Linux 2.6.26)"
.\" commit 9edff4ab1f8d82675277a04e359d0ed8bf14a7b7
This flag reverses the effect of the
.BR clone (2)
.B CLONE_SYSVSEM
flag.
Unshare System\ V semaphore adjustment
.RI ( semadj )
values,
so that the calling process has a new empty
.I semadj
list that is not shared with any other process.
If this is the last process that has a reference to the process's current
.I semadj
list, then the adjustments in that list are applied
to the corresponding semaphores, as described in
.BR semop (2).
.\" CLONE_NEWNS If CLONE_SIGHAND is set and signals are also being shared
.\" (i.e., current->signal->count > 1), force CLONE_THREAD.
.PP
In addition,
.BR CLONE_THREAD ,
.BR CLONE_SIGHAND ,
and
.B CLONE_VM
can be specified in
.I flags
if the caller is single threaded (i.e., it is not sharing
its address space with another process or thread).
In this case, these flags have no effect.
(Note also that specifying
.B CLONE_THREAD
automatically implies
.BR CLONE_VM ,
and specifying
.B CLONE_VM
automatically implies
.BR CLONE_SIGHAND .)
.\" As at 3.9, the following forced implications also apply,
.\" although the relevant flags are not yet implemented.
.\" If CLONE_THREAD is set force CLONE_VM.
.\" If CLONE_VM is set, force CLONE_SIGHAND.
.\"
If the process is multithreaded, then
the use of these flags results in an error.
.\" See kernel/fork.c::check_unshare_flags()
.PP
If
.I flags
is specified as zero, then
.BR unshare ()
is a no-op;
no changes are made to the calling process's execution context.
.SH RETURN VALUE
On success, zero returned.
On failure, \-1 is returned and
.I errno
is set to indicate the error.
.SH ERRORS
.TP
.B EINVAL
An invalid bit was specified in
.IR flags .
.TP
.B EINVAL
.BR CLONE_THREAD ,
.BR CLONE_SIGHAND ,
or
.B CLONE_VM
was specified in
.IR flags ,
and the caller is multithreaded.
.TP
.B EINVAL
.B CLONE_NEWIPC
was specified in
.IR flags ,
but the kernel was not configured with the
.B CONFIG_SYSVIPC
and
.B CONFIG_IPC_NS
options.
.TP
.B EINVAL
.B CLONE_NEWNET
was specified in
.IR flags ,
but the kernel was not configured with the
.B CONFIG_NET_NS
option.
.TP
.B EINVAL
.B CLONE_NEWPID
was specified in
.IR flags ,
but the kernel was not configured with the
.B CONFIG_PID_NS
option.
.TP
.B EINVAL
.B CLONE_NEWUSER
was specified in
.IR flags ,
but the kernel was not configured with the
.B CONFIG_USER_NS
option.
.TP
.B EINVAL
.B CLONE_NEWUTS
was specified in
.IR flags ,
but the kernel was not configured with the
.B CONFIG_UTS_NS
option.
.TP
.B EINVAL
.B CLONE_NEWPID
was specified in
.IR flags ,
but the process has previously called
.BR unshare ()
with the
.B CLONE_NEWPID
flag.
.TP
.B ENOMEM
Cannot allocate sufficient memory to copy parts of caller's
context that need to be unshared.
.TP
.BR ENOSPC " (since Linux 3.7)"
.\" commit f2302505775fd13ba93f034206f1e2a587017929
.B CLONE_NEWPID
was specified in flags,
but the limit on the nesting depth of PID namespaces
would have been exceeded; see
.BR pid_namespaces (7).
.TP
.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
.B CLONE_NEWUSER
was specified in
.IR flags ,
and the call would cause the limit on the number of
nested user namespaces to be exceeded.
See
.BR user_namespaces (7).
.IP
From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
.BR EUSERS .
.TP
.BR ENOSPC " (since Linux 4.9)"
One of the values in
.I flags
specified the creation of a new user namespace,
but doing so would have caused the limit defined by the corresponding file in
.I /proc/sys/user
to be exceeded.
For further details, see
.BR namespaces (7).
.TP
.B EPERM
The calling process did not have the required privileges for this operation.
.TP
.B EPERM
.B CLONE_NEWUSER
was specified in
.IR flags ,
but either the effective user ID or the effective group ID of the caller
does not have a mapping in the parent namespace (see
.BR user_namespaces (7)).
.TP
.BR EPERM " (since Linux 3.9)"
.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
.B CLONE_NEWUSER
was specified in
.I flags
and the caller is in a chroot environment
.\" FIXME What is the rationale for this restriction?
(i.e., the caller's root directory does not match the root directory
of the mount namespace in which it resides).
.TP
.BR EUSERS " (from Linux 3.11 to Linux 4.8)"
.B CLONE_NEWUSER
was specified in
.IR flags ,
and the limit on the number of nested user namespaces would be exceeded.
See the discussion of the
.B ENOSPC
error above.
.SH STANDARDS
Linux.
.SH HISTORY
Linux 2.6.16.
.SH NOTES
Not all of the process attributes that can be shared when
a new process is created using
.BR clone (2)
can be unshared using
.BR unshare ().
In particular, as at kernel 3.8,
.\" FIXME all of the following needs to be reviewed for the current kernel
.BR unshare ()
does not implement flags that reverse the effects of
.BR CLONE_SIGHAND ,
.\" However, we can do unshare(CLONE_SIGHAND) if CLONE_SIGHAND
.\" was not specified when doing clone(); i.e., unsharing
.\" signal handlers is permitted if we are not actually
.\" sharing signal handlers.   mtk
.BR CLONE_THREAD ,
or
.BR CLONE_VM .
.\" However, we can do unshare(CLONE_VM) if CLONE_VM
.\" was not specified when doing clone(); i.e., unsharing
.\" virtual memory is permitted if we are not actually
.\" sharing virtual memory.   mtk
Such functionality may be added in the future, if required.
.\"
.\"9) Future Work
.\"--------------
.\"The current implementation of unshare does not allow unsharing of
.\"signals and signal handlers. Signals are complex to begin with and
.\"to unshare signals and/or signal handlers of a currently running
.\"process is even more complex. If in the future there is a specific
.\"need to allow unsharing of signals and/or signal handlers, it can
.\"be incrementally added to unshare without affecting legacy
.\"applications using unshare.
.\"
.PP
Creating all kinds of namespace, except user namespaces, requires the
.B CAP_SYS_ADMIN
capability.
However, since creating a user namespace automatically confers a full set of
capabilities,
creating both a user namespace and any other type of namespace in the same
.BR unshare ()
call does not require the
.B CAP_SYS_ADMIN
capability in the original namespace.
.SH EXAMPLES
The program below provides a simple implementation of the
.BR unshare (1)
command, which unshares one or more namespaces and executes the
command supplied in its command-line arguments.
Here's an example of the use of this program,
running a shell in a new mount namespace,
and verifying that the original shell and the
new shell are in separate mount namespaces:
.PP
.in +4n
.EX
$ \fBreadlink /proc/$$/ns/mnt\fP
mnt:[4026531840]
$ \fBsudo ./unshare \-m /bin/bash\fP
# \fBreadlink /proc/$$/ns/mnt\fP
mnt:[4026532325]
.EE
.in
.PP
The differing output of the two
.BR readlink (1)
commands shows that the two shells are in different mount namespaces.
.SS Program source
\&
.\" SRC BEGIN (unshare.c)
.EX
/* unshare.c
\&
   A simple implementation of the unshare(1) command: unshare
   namespaces and execute a command.
*/
#define _GNU_SOURCE
#include <err.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
\&
static void
usage(char *pname)
{
    fprintf(stderr, "Usage: %s [options] program [arg...]\en", pname);
    fprintf(stderr, "Options can be:\en");
    fprintf(stderr, "    \-C   unshare cgroup namespace\en");
    fprintf(stderr, "    \-i   unshare IPC namespace\en");
    fprintf(stderr, "    \-m   unshare mount namespace\en");
    fprintf(stderr, "    \-n   unshare network namespace\en");
    fprintf(stderr, "    \-p   unshare PID namespace\en");
    fprintf(stderr, "    \-t   unshare time namespace\en");
    fprintf(stderr, "    \-u   unshare UTS namespace\en");
    fprintf(stderr, "    \-U   unshare user namespace\en");
    exit(EXIT_FAILURE);
}
\&
int
main(int argc, char *argv[])
{
    int flags, opt;
\&
    flags = 0;
\&
    while ((opt = getopt(argc, argv, "CimnptuU")) != \-1) {
        switch (opt) {
        case \[aq]C\[aq]: flags |= CLONE_NEWCGROUP;     break;
        case \[aq]i\[aq]: flags |= CLONE_NEWIPC;        break;
        case \[aq]m\[aq]: flags |= CLONE_NEWNS;         break;
        case \[aq]n\[aq]: flags |= CLONE_NEWNET;        break;
        case \[aq]p\[aq]: flags |= CLONE_NEWPID;        break;
        case \[aq]t\[aq]: flags |= CLONE_NEWTIME;       break;
        case \[aq]u\[aq]: flags |= CLONE_NEWUTS;        break;
        case \[aq]U\[aq]: flags |= CLONE_NEWUSER;       break;
        default:  usage(argv[0]);
        }
    }
\&
    if (optind >= argc)
        usage(argv[0]);
\&
    if (unshare(flags) == \-1)
        err(EXIT_FAILURE, "unshare");
\&
    execvp(argv[optind], &argv[optind]);
    err(EXIT_FAILURE, "execvp");
}
.EE
.\" SRC END
.SH SEE ALSO
.BR unshare (1),
.BR clone (2),
.BR fork (2),
.BR kcmp (2),
.BR setns (2),
.BR vfork (2),
.BR namespaces (7)
.PP
.I Documentation/userspace\-api/unshare.rst
in the Linux kernel source tree
.\" commit f504d47be5e8fa7ecf2bf660b18b42e6960c0eb2
(or
.I Documentation/unshare.txt
before Linux 4.12)