diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 13:00:47 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 13:00:47 +0000 |
commit | 2cb7e0aaedad73b076ea18c6900b0e86c5760d79 (patch) | |
tree | da68ca54bb79f4080079bf0828acda937593a4e1 /src/core/bpf-devices.c | |
parent | Initial commit. (diff) | |
download | systemd-2cb7e0aaedad73b076ea18c6900b0e86c5760d79.tar.xz systemd-2cb7e0aaedad73b076ea18c6900b0e86c5760d79.zip |
Adding upstream version 247.3.upstream/247.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/core/bpf-devices.c')
-rw-r--r-- | src/core/bpf-devices.c | 529 |
1 files changed, 529 insertions, 0 deletions
diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c new file mode 100644 index 0000000..1ad7ade --- /dev/null +++ b/src/core/bpf-devices.c @@ -0,0 +1,529 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <fnmatch.h> +#include <linux/bpf_insn.h> + +#include "bpf-devices.h" +#include "bpf-program.h" +#include "fd-util.h" +#include "fileio.h" +#include "nulstr-util.h" +#include "parse-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" + +#define PASS_JUMP_OFF 4096 + +static int bpf_access_type(const char *acc) { + int r = 0; + + assert(acc); + + for (; *acc; acc++) + switch(*acc) { + case 'r': + r |= BPF_DEVCG_ACC_READ; + break; + case 'w': + r |= BPF_DEVCG_ACC_WRITE; + break; + case 'm': + r |= BPF_DEVCG_ACC_MKNOD; + break; + default: + return -EINVAL; + } + + return r; +} + +static int bpf_prog_allow_list_device( + BPFProgram *prog, + char type, + int major, + int minor, + const char *acc) { + + int r, access; + + assert(prog); + assert(acc); + + log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc); + + access = bpf_access_type(acc); + if (access <= 0) + return -EINVAL; + + assert(IN_SET(type, 'b', 'c')); + const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK; + + const struct bpf_insn insn[] = { + BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), + BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access), + BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */ + + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3), /* compare device type */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */ + BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ + }; + + if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD)) + r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3); + else + r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); + if (r < 0) + log_error_errno(r, "Extending device control BPF program failed: %m"); + + return r; +} + +static int bpf_prog_allow_list_major( + BPFProgram *prog, + char type, + int major, + const char *acc) { + + int r, access; + + assert(prog); + assert(acc); + + log_trace("%s: %c %d:* %s", __func__, type, major, acc); + + access = bpf_access_type(acc); + if (access <= 0) + return -EINVAL; + + assert(IN_SET(type, 'b', 'c')); + const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK; + + const struct bpf_insn insn[] = { + BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), + BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access), + BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */ + + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2), /* compare device type */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */ + BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ + }; + + if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD)) + r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3); + else + r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); + if (r < 0) + log_error_errno(r, "Extending device control BPF program failed: %m"); + + return r; +} + +static int bpf_prog_allow_list_class( + BPFProgram *prog, + char type, + const char *acc) { + + int r, access; + + assert(prog); + assert(acc); + + log_trace("%s: %c *:* %s", __func__, type, acc); + + access = bpf_access_type(acc); + if (access <= 0) + return -EINVAL; + + assert(IN_SET(type, 'b', 'c')); + const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK; + + const struct bpf_insn insn[] = { + BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), + BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access), + BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */ + + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */ + BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */ + }; + + if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD)) + r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3); + else + r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn)); + if (r < 0) + log_error_errno(r, "Extending device control BPF program failed: %m"); + + return r; +} + +int bpf_devices_cgroup_init( + BPFProgram **ret, + CGroupDevicePolicy policy, + bool allow_list) { + + const struct bpf_insn pre_insn[] = { + /* load device type to r2 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_cgroup_dev_ctx, access_type)), + BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF), + + /* load access type to r3 */ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct bpf_cgroup_dev_ctx, access_type)), + BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16), + + /* load major number to r4 */ + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct bpf_cgroup_dev_ctx, major)), + + /* load minor number to r5 */ + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct bpf_cgroup_dev_ctx, minor)), + }; + + _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL; + int r; + + assert(ret); + + if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list) + return 0; + + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog); + if (r < 0) + return log_error_errno(r, "Loading device control BPF program failed: %m"); + + if (policy == CGROUP_DEVICE_POLICY_CLOSED || allow_list) { + r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn)); + if (r < 0) + return log_error_errno(r, "Extending device control BPF program failed: %m"); + } + + *ret = TAKE_PTR(prog); + + return 0; +} + +int bpf_devices_apply_policy( + BPFProgram *prog, + CGroupDevicePolicy policy, + bool allow_list, + const char *cgroup_path, + BPFProgram **prog_installed) { + + _cleanup_free_ char *controller_path = NULL; + int r; + + /* This will assign *keep_program if everything goes well. */ + + if (!prog) + goto finish; + + const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !allow_list; + + const struct bpf_insn post_insn[] = { + /* return DENY */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + }; + + const struct bpf_insn exit_insn[] = { + /* finally return DENY if deny_everything else ALLOW */ + BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1), + BPF_EXIT_INSN() + }; + + if (!deny_everything) { + r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn)); + if (r < 0) + return log_error_errno(r, "Extending device control BPF program failed: %m"); + + /* Fixup PASS_JUMP_OFF jump offsets. */ + for (size_t off = 0; off < prog->n_instructions; off++) { + struct bpf_insn *ins = &prog->instructions[off]; + + if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF) + ins->off = prog->n_instructions - off - 1; + } + } + + r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn)); + if (r < 0) + return log_error_errno(r, "Extending device control BPF program failed: %m"); + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path); + if (r < 0) + return log_error_errno(r, "Failed to determine cgroup path: %m"); + + r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI); + if (r < 0) + return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", + cgroup_path); + + finish: + /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */ + if (prog_installed) { + bpf_program_unref(*prog_installed); + *prog_installed = bpf_program_ref(prog); + } + return 0; +} + +int bpf_devices_supported(void) { + const struct bpf_insn trivial[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + _cleanup_(bpf_program_unrefp) BPFProgram *program = NULL; + static int supported = -1; + int r; + + /* Checks whether BPF device controller is supported. For this, we check five things: + * + * a) whether we are privileged + * b) whether the unified hierarchy is being used + * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require + */ + + if (supported >= 0) + return supported; + + if (geteuid() != 0) { + log_debug("Not enough privileges, BPF device control is not supported."); + return supported = 0; + } + + r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); + if (r < 0) + return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m"); + if (r == 0) { + log_debug("Not running with unified cgroups, BPF device control is not supported."); + return supported = 0; + } + + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program); + if (r < 0) { + log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m"); + return supported = 0; + } + + r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial)); + if (r < 0) { + log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m"); + return supported = 0; + } + + r = bpf_program_load_kernel(program, NULL, 0); + if (r < 0) { + log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m"); + return supported = 0; + } + + return supported = 1; +} + +static int allow_list_device_pattern( + BPFProgram *prog, + const char *path, + char type, + const unsigned *maj, + const unsigned *min, + const char *acc) { + + assert(IN_SET(type, 'b', 'c')); + + if (cg_all_unified() > 0) { + if (!prog) + return 0; + + if (maj && min) + return bpf_prog_allow_list_device(prog, type, *maj, *min, acc); + else if (maj) + return bpf_prog_allow_list_major(prog, type, *maj, acc); + else + return bpf_prog_allow_list_class(prog, type, acc); + + } else { + char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4]; + int r; + + if (maj && min) + xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc); + else if (maj) + xsprintf(buf, "%c %u:* %s", type, *maj, acc); + else + xsprintf(buf, "%c *:* %s", type, acc); + + /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore + * EINVAL here. */ + + r = cg_set_attribute("devices", path, "devices.allow", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, + r, "Failed to set devices.allow on %s: %m", path); + + return r; + } +} + +int bpf_devices_allow_list_device( + BPFProgram *prog, + const char *path, + const char *node, + const char *acc) { + + mode_t mode; + dev_t rdev; + int r; + + assert(path); + assert(acc); + assert(strlen(acc) <= 3); + + log_trace("%s: %s %s", __func__, node, acc); + + /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and + * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This + * means clients can use these path without the device node actually around */ + r = device_path_parse_major_minor(node, &mode, &rdev); + if (r < 0) { + if (r != -ENODEV) + return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node); + + struct stat st; + if (stat(node, &st) < 0) + return log_warning_errno(errno, "Couldn't stat device %s: %m", node); + + if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) + return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node); + + mode = st.st_mode; + rdev = (dev_t) st.st_rdev; + } + + unsigned maj = major(rdev), min = minor(rdev); + return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc); +} + +int bpf_devices_allow_list_major( + BPFProgram *prog, + const char *path, + const char *name, + char type, + const char *acc) { + + unsigned maj; + int r; + + assert(path); + assert(acc); + assert(IN_SET(type, 'b', 'c')); + + if (streq(name, "*")) + /* If the name is a wildcard, then apply this list to all devices of this type */ + return allow_list_device_pattern(prog, path, type, NULL, NULL, acc); + + if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj)) + /* The name is numeric and suitable as major. In that case, let's take its major, and create + * the entry directly. */ + return allow_list_device_pattern(prog, path, type, &maj, NULL, acc); + + _cleanup_fclose_ FILE *f = NULL; + bool good = false, any = false; + + f = fopen("/proc/devices", "re"); + if (!f) + return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name); + + for (;;) { + _cleanup_free_ char *line = NULL; + char *w, *p; + + r = read_line(f, LONG_LINE_MAX, &line); + if (r < 0) + return log_warning_errno(r, "Failed to read /proc/devices: %m"); + if (r == 0) + break; + + if (type == 'c' && streq(line, "Character devices:")) { + good = true; + continue; + } + + if (type == 'b' && streq(line, "Block devices:")) { + good = true; + continue; + } + + if (isempty(line)) { + good = false; + continue; + } + + if (!good) + continue; + + p = strstrip(line); + + w = strpbrk(p, WHITESPACE); + if (!w) + continue; + *w = 0; + + r = safe_atou(p, &maj); + if (r < 0) + continue; + if (maj <= 0) + continue; + + w++; + w += strspn(w, WHITESPACE); + + if (fnmatch(name, w, 0) != 0) + continue; + + any = true; + (void) allow_list_device_pattern(prog, path, type, &maj, NULL, acc); + } + + if (!any) + return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), + "Device allow list pattern \"%s\" did not match anything.", name); + + return 0; +} + +int bpf_devices_allow_list_static( + BPFProgram *prog, + const char *path) { + + static const char auto_devices[] = + "/dev/null\0" "rwm\0" + "/dev/zero\0" "rwm\0" + "/dev/full\0" "rwm\0" + "/dev/random\0" "rwm\0" + "/dev/urandom\0" "rwm\0" + "/dev/tty\0" "rwm\0" + "/dev/ptmx\0" "rwm\0" + /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */ + "/run/systemd/inaccessible/chr\0" "rwm\0" + "/run/systemd/inaccessible/blk\0" "rwm\0"; + int r = 0, k; + + const char *node, *acc; + NULSTR_FOREACH_PAIR(node, acc, auto_devices) { + k = bpf_devices_allow_list_device(prog, path, node, acc); + if (r >= 0 && k < 0) + r = k; + } + + /* PTS (/dev/pts) devices may not be duplicated, but accessed */ + k = bpf_devices_allow_list_major(prog, path, "pts", 'c', "rw"); + if (r >= 0 && k < 0) + r = k; + + return r; +} |