1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
|
/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/prctl.h>
#include <unistd.h>
#include "alloc-util.h"
#include "capability-util.h"
#include "cap-list.h"
#include "fileio.h"
#include "log.h"
#include "macro.h"
#include "missing_prctl.h"
#include "parse-util.h"
#include "user-util.h"
#include "util.h"
int have_effective_cap(int value) {
_cleanup_cap_free_ cap_t cap;
cap_flag_value_t fv;
cap = cap_get_proc();
if (!cap)
return -errno;
if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
return -errno;
return fv == CAP_SET;
}
unsigned cap_last_cap(void) {
static thread_local unsigned saved;
static thread_local bool valid = false;
_cleanup_free_ char *content = NULL;
unsigned long p = 0;
int r;
if (valid)
return saved;
/* available since linux-3.2 */
r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
if (r >= 0) {
r = safe_atolu(content, &p);
if (r >= 0) {
if (p > 63) /* Safety for the future: if one day the kernel learns more than 64 caps,
* then we are in trouble (since we, as much userspace and kernel space
* store capability masks in uint64_t types). Let's hence protect
* ourselves against that and always cap at 63 for now. */
p = 63;
saved = p;
valid = true;
return p;
}
}
/* fall back to syscall-probing for pre linux-3.2 */
p = MIN((unsigned long) CAP_LAST_CAP, 63U);
if (prctl(PR_CAPBSET_READ, p) < 0) {
/* Hmm, look downwards, until we find one that works */
for (p--; p > 0; p--)
if (prctl(PR_CAPBSET_READ, p) >= 0)
break;
} else {
/* Hmm, look upwards, until we find one that doesn't work */
for (; p < 63; p++)
if (prctl(PR_CAPBSET_READ, p+1) < 0)
break;
}
saved = p;
valid = true;
return p;
}
int capability_update_inherited_set(cap_t caps, uint64_t set) {
/* Add capabilities in the set to the inherited caps, drops capabilities not in the set.
* Do not apply them yet. */
for (unsigned i = 0; i <= cap_last_cap(); i++) {
cap_flag_value_t flag = set & (UINT64_C(1) << i) ? CAP_SET : CAP_CLEAR;
cap_value_t v;
v = (cap_value_t) i;
if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, flag) < 0)
return -errno;
}
return 0;
}
int capability_ambient_set_apply(uint64_t set, bool also_inherit) {
_cleanup_cap_free_ cap_t caps = NULL;
int r;
/* Remove capabilities requested in ambient set, but not in the bounding set */
for (unsigned i = 0; i <= cap_last_cap(); i++) {
if (set == 0)
break;
if (FLAGS_SET(set, (UINT64_C(1) << i)) && prctl(PR_CAPBSET_READ, i) != 1) {
log_debug("Ambient capability %s requested but missing from bounding set,"
" suppressing automatically.", capability_to_name(i));
set &= ~(UINT64_C(1) << i);
}
}
/* Add the capabilities to the ambient set (an possibly also the inheritable set) */
/* Check that we can use PR_CAP_AMBIENT or quit early. */
if (!ambient_capabilities_supported())
return (set & all_capabilities()) == 0 ?
0 : -EOPNOTSUPP; /* if actually no ambient caps are to be set, be silent,
* otherwise fail recognizably */
if (also_inherit) {
caps = cap_get_proc();
if (!caps)
return -errno;
r = capability_update_inherited_set(caps, set);
if (r < 0)
return -errno;
if (cap_set_proc(caps) < 0)
return -errno;
}
for (unsigned i = 0; i <= cap_last_cap(); i++) {
if (set & (UINT64_C(1) << i)) {
/* Add the capability to the ambient set. */
if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0)
return -errno;
} else {
/* Drop the capability so we don't inherit capabilities we didn't ask for. */
r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i, 0, 0);
if (r < 0)
return -errno;
if (r)
if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, i, 0, 0) < 0)
return -errno;
}
}
return 0;
}
int capability_gain_cap_setpcap(cap_t *ret_before_caps) {
_cleanup_cap_free_ cap_t caps = NULL;
cap_flag_value_t fv;
caps = cap_get_proc();
if (!caps)
return -errno;
if (cap_get_flag(caps, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0)
return -errno;
if (fv != CAP_SET) {
_cleanup_cap_free_ cap_t temp_cap = NULL;
static const cap_value_t v = CAP_SETPCAP;
temp_cap = cap_dup(caps);
if (!temp_cap)
return -errno;
if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0)
return -errno;
if (cap_set_proc(temp_cap) < 0)
log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m");
/* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means
* we'll fail later, when we actually intend to drop some capabilities or try to set securebits. */
}
if (ret_before_caps)
/* Return the capabilities as they have been before setting CAP_SETPCAP */
*ret_before_caps = TAKE_PTR(caps);
return 0;
}
int capability_bounding_set_drop(uint64_t keep, bool right_now) {
_cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL;
int r;
/* If we are run as PID 1 we will lack CAP_SETPCAP by default
* in the effective set (yes, the kernel drops that when
* executing init!), so get it back temporarily so that we can
* call PR_CAPBSET_DROP. */
r = capability_gain_cap_setpcap(&before_cap);
if (r < 0)
return r;
after_cap = cap_dup(before_cap);
if (!after_cap)
return -errno;
for (unsigned i = 0; i <= cap_last_cap(); i++) {
cap_value_t v;
if ((keep & (UINT64_C(1) << i)))
continue;
/* Drop it from the bounding set */
if (prctl(PR_CAPBSET_DROP, i) < 0) {
r = -errno;
/* If dropping the capability failed, let's see if we didn't have it in the first place. If so,
* continue anyway, as dropping a capability we didn't have in the first place doesn't really
* matter anyway. */
if (prctl(PR_CAPBSET_READ, i) != 0)
goto finish;
}
v = (cap_value_t) i;
/* Also drop it from the inheritable set, so
* that anything we exec() loses the
* capability for good. */
if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
r = -errno;
goto finish;
}
/* If we shall apply this right now drop it
* also from our own capability sets. */
if (right_now) {
if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
r = -errno;
goto finish;
}
}
}
r = 0;
finish:
if (cap_set_proc(after_cap) < 0) {
/* If there are no actual changes anyway then let's ignore this error. */
if (cap_compare(before_cap, after_cap) != 0)
r = -errno;
}
return r;
}
static int drop_from_file(const char *fn, uint64_t keep) {
_cleanup_free_ char *p = NULL;
uint64_t current, after;
uint32_t hi, lo;
int r, k;
r = read_one_line_file(fn, &p);
if (r < 0)
return r;
k = sscanf(p, "%" PRIu32 " %" PRIu32, &lo, &hi);
if (k != 2)
return -EIO;
current = (uint64_t) lo | ((uint64_t) hi << 32);
after = current & keep;
if (current == after)
return 0;
lo = after & UINT32_C(0xFFFFFFFF);
hi = (after >> 32) & UINT32_C(0xFFFFFFFF);
return write_string_filef(fn, 0, "%" PRIu32 " %" PRIu32, lo, hi);
}
int capability_bounding_set_drop_usermode(uint64_t keep) {
int r;
r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep);
if (r < 0)
return r;
r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep);
if (r < 0)
return r;
return r;
}
int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
int r;
/* Unfortunately we cannot leave privilege dropping to PID 1 here, since we want to run as user but
* want to keep some capabilities. Since file capabilities have been introduced this cannot be done
* across exec() anymore, unless our binary has the capability configured in the file system, which
* we want to avoid. */
if (setresgid(gid, gid, gid) < 0)
return log_error_errno(errno, "Failed to change group ID: %m");
r = maybe_setgroups(0, NULL);
if (r < 0)
return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
/* Ensure we keep the permitted caps across the setresuid(). Note that we do this even if we actually
* don't want to keep any capabilities, since we want to be able to drop them from the bounding set
* too, and we can only do that if we have capabilities. */
if (prctl(PR_SET_KEEPCAPS, 1) < 0)
return log_error_errno(errno, "Failed to enable keep capabilities flag: %m");
if (setresuid(uid, uid, uid) < 0)
return log_error_errno(errno, "Failed to change user ID: %m");
if (prctl(PR_SET_KEEPCAPS, 0) < 0)
return log_error_errno(errno, "Failed to disable keep capabilities flag: %m");
/* Drop all caps from the bounding set (as well as the inheritable/permitted/effective sets), except
* the ones we want to keep */
r = capability_bounding_set_drop(keep_capabilities, true);
if (r < 0)
return log_error_errno(r, "Failed to drop capabilities: %m");
/* Now upgrade the permitted caps we still kept to effective caps */
if (keep_capabilities != 0) {
cap_value_t bits[u64log2(keep_capabilities) + 1];
_cleanup_cap_free_ cap_t d = NULL;
unsigned i, j = 0;
d = cap_init();
if (!d)
return log_oom();
for (i = 0; i < ELEMENTSOF(bits); i++)
if (keep_capabilities & (1ULL << i))
bits[j++] = i;
/* use enough bits */
assert(i == 64 || (keep_capabilities >> i) == 0);
/* don't use too many bits */
assert(keep_capabilities & (UINT64_C(1) << (i - 1)));
if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 ||
cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0)
return log_error_errno(errno, "Failed to enable capabilities bits: %m");
if (cap_set_proc(d) < 0)
return log_error_errno(errno, "Failed to increase capabilities: %m");
}
return 0;
}
int drop_capability(cap_value_t cv) {
_cleanup_cap_free_ cap_t tmp_cap = NULL;
tmp_cap = cap_get_proc();
if (!tmp_cap)
return -errno;
if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, CAP_CLEAR) < 0) ||
(cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, CAP_CLEAR) < 0) ||
(cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, CAP_CLEAR) < 0))
return -errno;
if (cap_set_proc(tmp_cap) < 0)
return -errno;
return 0;
}
bool ambient_capabilities_supported(void) {
static int cache = -1;
if (cache >= 0)
return cache;
/* If PR_CAP_AMBIENT returns something valid, or an unexpected error code we assume that ambient caps are
* available. */
cache = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_KILL, 0, 0) >= 0 ||
!IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS);
return cache;
}
bool capability_quintet_mangle(CapabilityQuintet *q) {
uint64_t combined, drop = 0;
bool ambient_supported;
assert(q);
combined = q->effective | q->bounding | q->inheritable | q->permitted;
ambient_supported = q->ambient != (uint64_t) -1;
if (ambient_supported)
combined |= q->ambient;
for (unsigned i = 0; i <= cap_last_cap(); i++) {
unsigned long bit = UINT64_C(1) << i;
if (!FLAGS_SET(combined, bit))
continue;
if (prctl(PR_CAPBSET_READ, i) > 0)
continue;
drop |= bit;
log_debug("Not in the current bounding set: %s", capability_to_name(i));
}
q->effective &= ~drop;
q->bounding &= ~drop;
q->inheritable &= ~drop;
q->permitted &= ~drop;
if (ambient_supported)
q->ambient &= ~drop;
return drop != 0; /* Let the caller know we changed something */
}
int capability_quintet_enforce(const CapabilityQuintet *q) {
_cleanup_cap_free_ cap_t c = NULL, modified = NULL;
int r;
if (q->ambient != (uint64_t) -1) {
bool changed = false;
c = cap_get_proc();
if (!c)
return -errno;
/* In order to raise the ambient caps set we first need to raise the matching
* inheritable + permitted cap */
for (unsigned i = 0; i <= cap_last_cap(); i++) {
uint64_t m = UINT64_C(1) << i;
cap_value_t cv = (cap_value_t) i;
cap_flag_value_t old_value_inheritable, old_value_permitted;
if ((q->ambient & m) == 0)
continue;
if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0)
return -errno;
if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0)
return -errno;
if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET)
continue;
if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0)
return -errno;
if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
return -errno;
changed = true;
}
if (changed)
if (cap_set_proc(c) < 0)
return -errno;
r = capability_ambient_set_apply(q->ambient, false);
if (r < 0)
return r;
}
if (q->inheritable != (uint64_t) -1 || q->permitted != (uint64_t) -1 || q->effective != (uint64_t) -1) {
bool changed = false;
if (!c) {
c = cap_get_proc();
if (!c)
return -errno;
}
for (unsigned i = 0; i <= cap_last_cap(); i++) {
uint64_t m = UINT64_C(1) << i;
cap_value_t cv = (cap_value_t) i;
if (q->inheritable != (uint64_t) -1) {
cap_flag_value_t old_value, new_value;
if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0) {
if (errno == EINVAL) /* If the kernel knows more caps than this
* version of libcap, then this will return
* EINVAL. In that case, simply ignore it,
* pretend it doesn't exist. */
continue;
return -errno;
}
new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR;
if (old_value != new_value) {
changed = true;
if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0)
return -errno;
}
}
if (q->permitted != (uint64_t) -1) {
cap_flag_value_t old_value, new_value;
if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0) {
if (errno == EINVAL)
continue;
return -errno;
}
new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR;
if (old_value != new_value) {
changed = true;
if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0)
return -errno;
}
}
if (q->effective != (uint64_t) -1) {
cap_flag_value_t old_value, new_value;
if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0) {
if (errno == EINVAL)
continue;
return -errno;
}
new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR;
if (old_value != new_value) {
changed = true;
if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0)
return -errno;
}
}
}
if (changed) {
/* In order to change the bounding caps, we need to keep CAP_SETPCAP for a bit
* longer. Let's add it to our list hence for now. */
if (q->bounding != (uint64_t) -1) {
cap_value_t cv = CAP_SETPCAP;
modified = cap_dup(c);
if (!modified)
return -ENOMEM;
if (cap_set_flag(modified, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
return -errno;
if (cap_set_flag(modified, CAP_EFFECTIVE, 1, &cv, CAP_SET) < 0)
return -errno;
if (cap_compare(modified, c) == 0) {
/* No change? then drop this nonsense again */
cap_free(modified);
modified = NULL;
}
}
/* Now, let's enforce the caps for the first time. Note that this is where we acquire
* caps in any of the sets we currently don't have. We have to do this before
* dropping the bounding caps below, since at that point we can never acquire new
* caps in inherited/permitted/effective anymore, but only lose them. */
if (cap_set_proc(modified ?: c) < 0)
return -errno;
}
}
if (q->bounding != (uint64_t) -1) {
r = capability_bounding_set_drop(q->bounding, false);
if (r < 0)
return r;
}
/* If needed, let's now set the caps again, this time in the final version, which differs from what
* we have already set only in the CAP_SETPCAP bit, which we needed for dropping the bounding
* bits. This call only undoes bits and doesn't acquire any which means the bounding caps don't
* matter. */
if (modified)
if (cap_set_proc(c) < 0)
return -errno;
return 0;
}
|