summaryrefslogtreecommitdiffstats
path: root/src/core/bpf
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:35:18 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:35:18 +0000
commitb750101eb236130cf056c675997decbac904cc49 (patch)
treea5df1a06754bdd014cb975c051c83b01c9a97532 /src/core/bpf
parentInitial commit. (diff)
downloadsystemd-upstream.tar.xz
systemd-upstream.zip
Adding upstream version 252.22.upstream/252.22upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/core/bpf-devices.c531
-rw-r--r--src/core/bpf-devices.h21
-rw-r--r--src/core/bpf-firewall.c969
-rw-r--r--src/core/bpf-firewall.h25
-rw-r--r--src/core/bpf-foreign.c154
-rw-r--r--src/core/bpf-foreign.h15
-rw-r--r--src/core/bpf-lsm.c364
-rw-r--r--src/core/bpf-lsm.h28
-rw-r--r--src/core/bpf-socket-bind.c244
-rw-r--r--src/core/bpf-socket-bind.h15
-rw-r--r--src/core/bpf-util.c35
-rw-r--r--src/core/bpf-util.h5
-rw-r--r--src/core/bpf/meson.build111
-rw-r--r--src/core/bpf/restrict_fs/meson.build24
-rw-r--r--src/core/bpf/restrict_fs/restrict-fs-skel.h14
-rw-r--r--src/core/bpf/restrict_fs/restrict-fs.bpf.c82
-rw-r--r--src/core/bpf/restrict_ifaces/meson.build24
-rw-r--r--src/core/bpf/restrict_ifaces/restrict-ifaces-skel.h14
-rw-r--r--src/core/bpf/restrict_ifaces/restrict-ifaces.bpf.c52
-rw-r--r--src/core/bpf/socket_bind/meson.build24
-rw-r--r--src/core/bpf/socket_bind/socket-bind-api.bpf.h51
-rw-r--r--src/core/bpf/socket_bind/socket-bind-skel.h14
-rw-r--r--src/core/bpf/socket_bind/socket-bind.bpf.c111
23 files changed, 2927 insertions, 0 deletions
diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c
new file mode 100644
index 0000000..3af9e78
--- /dev/null
+++ b/src/core/bpf-devices.c
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <fnmatch.h>
+#include <linux/bpf_insn.h>
+
+#include "bpf-devices.h"
+#include "bpf-program.h"
+#include "devnum-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "nulstr-util.h"
+#include "parse-util.h"
+#include "path-util.h"
+#include "stdio-util.h"
+#include "string-util.h"
+
+#define PASS_JUMP_OFF 4096
+
+static int bpf_access_type(const char *acc) {
+ int r = 0;
+
+ assert(acc);
+
+ for (; *acc; acc++)
+ switch (*acc) {
+ case 'r':
+ r |= BPF_DEVCG_ACC_READ;
+ break;
+ case 'w':
+ r |= BPF_DEVCG_ACC_WRITE;
+ break;
+ case 'm':
+ r |= BPF_DEVCG_ACC_MKNOD;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int bpf_prog_allow_list_device(
+ BPFProgram *prog,
+ char type,
+ int major,
+ int minor,
+ const char *acc) {
+
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ assert(IN_SET(type, 'b', 'c'));
+ const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
+
+ const struct bpf_insn insn[] = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */
+
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3), /* compare device type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+
+ if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
+ r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
+ else
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+static int bpf_prog_allow_list_major(
+ BPFProgram *prog,
+ char type,
+ int major,
+ const char *acc) {
+
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ log_trace("%s: %c %d:* %s", __func__, type, major, acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ assert(IN_SET(type, 'b', 'c'));
+ const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
+
+ const struct bpf_insn insn[] = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
+
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2), /* compare device type */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+
+ if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
+ r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
+ else
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+static int bpf_prog_allow_list_class(
+ BPFProgram *prog,
+ char type,
+ const char *acc) {
+
+ int r, access;
+
+ assert(prog);
+ assert(acc);
+
+ log_trace("%s: %c *:* %s", __func__, type, acc);
+
+ access = bpf_access_type(acc);
+ if (access <= 0)
+ return -EINVAL;
+
+ assert(IN_SET(type, 'b', 'c'));
+ const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
+
+ const struct bpf_insn insn[] = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
+
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */
+ BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
+ };
+
+ if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
+ r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
+ else
+ r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ return r;
+}
+
+int bpf_devices_cgroup_init(
+ BPFProgram **ret,
+ CGroupDevicePolicy policy,
+ bool allow_list) {
+
+ const struct bpf_insn pre_insn[] = {
+ /* load device type to r2 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
+
+ /* load access type to r3 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, access_type)),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
+
+ /* load major number to r4 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, major)),
+
+ /* load minor number to r5 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct bpf_cgroup_dev_ctx, minor)),
+ };
+
+ _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
+ int r;
+
+ assert(ret);
+
+ if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list)
+ return 0;
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &prog);
+ if (r < 0)
+ return log_error_errno(r, "Loading device control BPF program failed: %m");
+
+ if (policy == CGROUP_DEVICE_POLICY_CLOSED || allow_list) {
+ r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+ }
+
+ *ret = TAKE_PTR(prog);
+
+ return 0;
+}
+
+int bpf_devices_apply_policy(
+ BPFProgram **prog,
+ CGroupDevicePolicy policy,
+ bool allow_list,
+ const char *cgroup_path,
+ BPFProgram **prog_installed) {
+
+ _cleanup_free_ char *controller_path = NULL;
+ int r;
+
+ /* This will assign *prog_installed if everything goes well. */
+
+ assert(prog);
+ if (!*prog)
+ goto finish;
+
+ const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !allow_list;
+
+ const struct bpf_insn post_insn[] = {
+ /* return DENY */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+ };
+
+ const struct bpf_insn exit_insn[] = {
+ /* finally return DENY if deny_everything else ALLOW */
+ BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1),
+ BPF_EXIT_INSN()
+ };
+
+ if (!deny_everything) {
+ r = bpf_program_add_instructions(*prog, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ /* Fixup PASS_JUMP_OFF jump offsets. */
+ for (size_t off = 0; off < (*prog)->n_instructions; off++) {
+ struct bpf_insn *ins = &((*prog)->instructions[off]);
+
+ if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
+ ins->off = (*prog)->n_instructions - off - 1;
+ }
+ }
+
+ r = bpf_program_add_instructions(*prog, exit_insn, ELEMENTSOF(exit_insn));
+ if (r < 0)
+ return log_error_errno(r, "Extending device control BPF program failed: %m");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path);
+ if (r < 0)
+ return log_error_errno(r, "Failed to determine cgroup path: %m");
+
+ r = bpf_program_cgroup_attach(*prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
+ if (r < 0)
+ return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m",
+ empty_to_root(cgroup_path));
+
+ finish:
+ /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
+ if (prog_installed) {
+ bpf_program_free(*prog_installed);
+ *prog_installed = TAKE_PTR(*prog);
+ }
+ return 0;
+}
+
+int bpf_devices_supported(void) {
+ const struct bpf_insn trivial[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(bpf_program_freep) BPFProgram *program = NULL;
+ static int supported = -1;
+ int r;
+
+ /* Checks whether BPF device controller is supported. For this, we check five things:
+ *
+ * a) whether we are privileged
+ * b) whether the unified hierarchy is being used
+ * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
+ */
+
+ if (supported >= 0)
+ return supported;
+
+ if (geteuid() != 0) {
+ log_debug("Not enough privileges, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0) {
+ log_debug("Not running with unified cgroups, BPF device control is not supported.");
+ return supported = 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &program);
+ if (r < 0) {
+ log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
+ if (r < 0) {
+ log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ r = bpf_program_load_kernel(program, NULL, 0);
+ if (r < 0) {
+ log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
+ return supported = 0;
+ }
+
+ return supported = 1;
+}
+
+static int allow_list_device_pattern(
+ BPFProgram *prog,
+ const char *path,
+ char type,
+ const unsigned *maj,
+ const unsigned *min,
+ const char *acc) {
+
+ assert(IN_SET(type, 'b', 'c'));
+
+ if (cg_all_unified() > 0) {
+ if (!prog)
+ return 0;
+
+ if (maj && min)
+ return bpf_prog_allow_list_device(prog, type, *maj, *min, acc);
+ else if (maj)
+ return bpf_prog_allow_list_major(prog, type, *maj, acc);
+ else
+ return bpf_prog_allow_list_class(prog, type, acc);
+
+ } else {
+ char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
+ int r;
+
+ if (maj && min)
+ xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
+ else if (maj)
+ xsprintf(buf, "%c %u:* %s", type, *maj, acc);
+ else
+ xsprintf(buf, "%c *:* %s", type, acc);
+
+ /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
+ * EINVAL here. */
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
+ r, "Failed to set devices.allow on %s: %m", path);
+
+ return r;
+ }
+}
+
+int bpf_devices_allow_list_device(
+ BPFProgram *prog,
+ const char *path,
+ const char *node,
+ const char *acc) {
+
+ mode_t mode;
+ dev_t rdev;
+ int r;
+
+ assert(path);
+ assert(acc);
+ assert(strlen(acc) <= 3);
+
+ log_trace("%s: %s %s", __func__, node, acc);
+
+ /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
+ * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
+ * means clients can use these path without the device node actually around */
+ r = device_path_parse_major_minor(node, &mode, &rdev);
+ if (r < 0) {
+ if (r != -ENODEV)
+ return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
+
+ struct stat st;
+ if (stat(node, &st) < 0)
+ return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
+
+ if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
+ return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
+
+ mode = st.st_mode;
+ rdev = (dev_t) st.st_rdev;
+ }
+
+ unsigned maj = major(rdev), min = minor(rdev);
+ return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
+}
+
+int bpf_devices_allow_list_major(
+ BPFProgram *prog,
+ const char *path,
+ const char *name,
+ char type,
+ const char *acc) {
+
+ unsigned maj;
+ int r;
+
+ assert(path);
+ assert(acc);
+ assert(IN_SET(type, 'b', 'c'));
+
+ if (streq(name, "*"))
+ /* If the name is a wildcard, then apply this list to all devices of this type */
+ return allow_list_device_pattern(prog, path, type, NULL, NULL, acc);
+
+ if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
+ /* The name is numeric and suitable as major. In that case, let's take its major, and create
+ * the entry directly. */
+ return allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
+
+ _cleanup_fclose_ FILE *f = NULL;
+ bool good = false, any = false;
+
+ f = fopen("/proc/devices", "re");
+ if (!f)
+ return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
+
+ for (;;) {
+ _cleanup_free_ char *line = NULL;
+ char *w, *p;
+
+ r = read_line(f, LONG_LINE_MAX, &line);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to read /proc/devices: %m");
+ if (r == 0)
+ break;
+
+ if (type == 'c' && streq(line, "Character devices:")) {
+ good = true;
+ continue;
+ }
+
+ if (type == 'b' && streq(line, "Block devices:")) {
+ good = true;
+ continue;
+ }
+
+ if (isempty(line)) {
+ good = false;
+ continue;
+ }
+
+ if (!good)
+ continue;
+
+ p = strstrip(line);
+
+ w = strpbrk(p, WHITESPACE);
+ if (!w)
+ continue;
+ *w = 0;
+
+ r = safe_atou(p, &maj);
+ if (r < 0)
+ continue;
+ if (maj <= 0)
+ continue;
+
+ w++;
+ w += strspn(w, WHITESPACE);
+
+ if (fnmatch(name, w, 0) != 0)
+ continue;
+
+ any = true;
+ (void) allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
+ }
+
+ if (!any)
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+ "Device allow list pattern \"%s\" did not match anything.", name);
+
+ return 0;
+}
+
+int bpf_devices_allow_list_static(
+ BPFProgram *prog,
+ const char *path) {
+
+ static const char auto_devices[] =
+ "/dev/null\0" "rwm\0"
+ "/dev/zero\0" "rwm\0"
+ "/dev/full\0" "rwm\0"
+ "/dev/random\0" "rwm\0"
+ "/dev/urandom\0" "rwm\0"
+ "/dev/tty\0" "rwm\0"
+ "/dev/ptmx\0" "rwm\0"
+ /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
+ "/run/systemd/inaccessible/chr\0" "rwm\0"
+ "/run/systemd/inaccessible/blk\0" "rwm\0";
+ int r = 0, k;
+
+ const char *node, *acc;
+ NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
+ k = bpf_devices_allow_list_device(prog, path, node, acc);
+ if (r >= 0 && k < 0)
+ r = k;
+ }
+
+ /* PTS (/dev/pts) devices may not be duplicated, but accessed */
+ k = bpf_devices_allow_list_major(prog, path, "pts", 'c', "rw");
+ if (r >= 0 && k < 0)
+ r = k;
+
+ return r;
+}
diff --git a/src/core/bpf-devices.h b/src/core/bpf-devices.h
new file mode 100644
index 0000000..5106364
--- /dev/null
+++ b/src/core/bpf-devices.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "cgroup.h"
+
+typedef struct BPFProgram BPFProgram;
+
+int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool allow_list);
+int bpf_devices_apply_policy(
+ BPFProgram **prog,
+ CGroupDevicePolicy policy,
+ bool allow_list,
+ const char *cgroup_path,
+ BPFProgram **prog_installed);
+
+int bpf_devices_supported(void);
+int bpf_devices_allow_list_device(BPFProgram *prog, const char *path, const char *node, const char *acc);
+int bpf_devices_allow_list_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc);
+int bpf_devices_allow_list_static(BPFProgram *prog, const char *path);
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
new file mode 100644
index 0000000..ce3b76c
--- /dev/null
+++ b/src/core/bpf-firewall.c
@@ -0,0 +1,969 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/bpf_insn.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "in-addr-prefix-util.h"
+#include "memory-util.h"
+#include "missing_syscall.h"
+#include "unit.h"
+#include "strv.h"
+#include "virt.h"
+
+enum {
+ MAP_KEY_PACKETS,
+ MAP_KEY_BYTES,
+};
+
+enum {
+ ACCESS_ALLOWED = 1,
+ ACCESS_DENIED = 2,
+};
+
+/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
+
+static int add_lookup_instructions(
+ BPFProgram *p,
+ int map_fd,
+ int protocol,
+ bool is_ingress,
+ int verdict) {
+
+ int r, addr_offset, addr_size;
+
+ assert(p);
+ assert(map_fd >= 0);
+
+ switch (protocol) {
+
+ case ETH_P_IP:
+ addr_size = sizeof(uint32_t);
+ addr_offset = is_ingress ?
+ offsetof(struct iphdr, saddr) :
+ offsetof(struct iphdr, daddr);
+ break;
+
+ case ETH_P_IPV6:
+ addr_size = 4 * sizeof(uint32_t);
+ addr_offset = is_ingress ?
+ offsetof(struct ip6_hdr, ip6_src.s6_addr) :
+ offsetof(struct ip6_hdr, ip6_dst.s6_addr);
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ do {
+ /* Compare IPv4 with one word instruction (32bit) */
+ struct bpf_insn insn[] = {
+ /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
+
+ /*
+ * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
+ *
+ * R1: Pointer to the skb
+ * R2: Data offset
+ * R3: Destination buffer on the stack (r10 - 4)
+ * R4: Number of bytes to read (4)
+ */
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV32_IMM(BPF_REG_2, addr_offset),
+
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
+
+ BPF_MOV32_IMM(BPF_REG_4, addr_size),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+
+ /*
+ * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
+ * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
+ * has to be set to the maximum possible value.
+ *
+ * On success, the looked up value is stored in R0. For this application, the actual
+ * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
+ * matching value.
+ */
+
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
+
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
+ };
+
+ /* Jump label fixup */
+ insn[0].off = ELEMENTSOF(insn) - 1;
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+
+ } while (false);
+
+ return 0;
+}
+
+static int add_instructions_for_ip_any(
+ BPFProgram *p,
+ int verdict) {
+ int r;
+
+ assert(p);
+
+ const struct bpf_insn insn[] = {
+ BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
+ };
+
+ r = bpf_program_add_instructions(p, insn, 1);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int bpf_firewall_compile_bpf(
+ Unit *u,
+ const char *prog_name,
+ bool is_ingress,
+ BPFProgram **ret,
+ bool ip_allow_any,
+ bool ip_deny_any) {
+
+ const struct bpf_insn pre_insn[] = {
+ /*
+ * When the eBPF program is entered, R1 contains the address of the skb.
+ * However, R1-R5 are scratch registers that are not preserved when calling
+ * into kernel functions, so we need to save anything that's supposed to
+ * stay around to R6-R9. Save the skb to R6.
+ */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /*
+ * Although we cannot access the skb data directly from eBPF programs used in this
+ * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
+ * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
+ * for later use.
+ */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
+
+ /*
+ * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
+ * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
+ */
+ BPF_MOV32_IMM(BPF_REG_8, 0),
+ };
+
+ /*
+ * The access checkers compiled for the configured allowance and denial lists
+ * write to R8 at runtime. The following code prepares for an early exit that
+ * skip the accounting if the packet is denied.
+ *
+ * R0 = 1
+ * if (R8 == ACCESS_DENIED)
+ * R0 = 0
+ *
+ * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
+ * is allowed to pass.
+ */
+ const struct bpf_insn post_insn[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ };
+
+ _cleanup_(bpf_program_freep) BPFProgram *p = NULL;
+ int accounting_map_fd, r;
+ bool access_enabled;
+
+ assert(u);
+ assert(ret);
+
+ accounting_map_fd = is_ingress ?
+ u->ip_accounting_ingress_map_fd :
+ u->ip_accounting_egress_map_fd;
+
+ access_enabled =
+ u->ipv4_allow_map_fd >= 0 ||
+ u->ipv6_allow_map_fd >= 0 ||
+ u->ipv4_deny_map_fd >= 0 ||
+ u->ipv6_deny_map_fd >= 0 ||
+ ip_allow_any ||
+ ip_deny_any;
+
+ if (accounting_map_fd < 0 && !access_enabled) {
+ *ret = NULL;
+ return 0;
+ }
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, prog_name, &p);
+ if (r < 0)
+ return r;
+
+ r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
+ if (r < 0)
+ return r;
+
+ if (access_enabled) {
+ /*
+ * The simple rule this function translates into eBPF instructions is:
+ *
+ * - Access will be granted when an address matches an entry in @list_allow
+ * - Otherwise, access will be denied when an address matches an entry in @list_deny
+ * - Otherwise, access will be granted
+ */
+
+ if (u->ipv4_deny_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv6_deny_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv4_allow_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+
+ if (u->ipv6_allow_map_fd >= 0) {
+ r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+
+ if (ip_allow_any) {
+ r = add_instructions_for_ip_any(p, ACCESS_ALLOWED);
+ if (r < 0)
+ return r;
+ }
+
+ if (ip_deny_any) {
+ r = add_instructions_for_ip_any(p, ACCESS_DENIED);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
+ if (r < 0)
+ return r;
+
+ if (accounting_map_fd >= 0) {
+ struct bpf_insn insn[] = {
+ /*
+ * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
+ * The jump label will be fixed up later.
+ */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+
+ /* Count packets */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Count bytes */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Allow the packet to pass */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ };
+
+ /* Jump label fixup */
+ insn[0].off = ELEMENTSOF(insn) - 1;
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+ }
+
+ do {
+ /*
+ * Exit from the eBPF program, R0 contains the verdict.
+ * 0 means the packet is denied, 1 means the packet may pass.
+ */
+ const struct bpf_insn insn[] = {
+ BPF_EXIT_INSN()
+ };
+
+ r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+ if (r < 0)
+ return r;
+ } while (false);
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static int bpf_firewall_count_access_items(Set *prefixes, size_t *n_ipv4, size_t *n_ipv6) {
+ struct in_addr_prefix *a;
+
+ assert(n_ipv4);
+ assert(n_ipv6);
+
+ SET_FOREACH(a, prefixes)
+ switch (a->family) {
+
+ case AF_INET:
+ (*n_ipv4)++;
+ break;
+
+ case AF_INET6:
+ (*n_ipv6)++;
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ return 0;
+}
+
+static int bpf_firewall_add_access_items(
+ Set *prefixes,
+ int ipv4_map_fd,
+ int ipv6_map_fd,
+ int verdict) {
+
+ struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
+ struct in_addr_prefix *a;
+ uint64_t value = verdict;
+ int r;
+
+ key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
+ key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
+
+ SET_FOREACH(a, prefixes)
+ switch (a->family) {
+
+ case AF_INET:
+ key_ipv4->prefixlen = a->prefixlen;
+ memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
+
+ r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
+ if (r < 0)
+ return r;
+
+ break;
+
+ case AF_INET6:
+ key_ipv6->prefixlen = a->prefixlen;
+ memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
+
+ r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
+ if (r < 0)
+ return r;
+
+ break;
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ return 0;
+}
+
+static int bpf_firewall_prepare_access_maps(
+ Unit *u,
+ int verdict,
+ int *ret_ipv4_map_fd,
+ int *ret_ipv6_map_fd,
+ bool *ret_has_any) {
+
+ _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
+ size_t n_ipv4 = 0, n_ipv6 = 0;
+ Unit *p;
+ int r;
+
+ assert(ret_ipv4_map_fd);
+ assert(ret_ipv6_map_fd);
+ assert(ret_has_any);
+
+ for (p = u; p; p = UNIT_GET_SLICE(p)) {
+ CGroupContext *cc;
+ Set *prefixes;
+ bool *reduced;
+
+ cc = unit_get_cgroup_context(p);
+ if (!cc)
+ continue;
+
+ prefixes = verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny;
+ reduced = verdict == ACCESS_ALLOWED ? &cc->ip_address_allow_reduced : &cc->ip_address_deny_reduced;
+
+ if (!*reduced) {
+ r = in_addr_prefixes_reduce(prefixes);
+ if (r < 0)
+ return r;
+
+ *reduced = true;
+ }
+
+ bpf_firewall_count_access_items(prefixes, &n_ipv4, &n_ipv6);
+
+ /* Skip making the LPM trie map in cases where we are using "any" in order to hack around
+ * needing CAP_SYS_ADMIN for allocating LPM trie map. */
+ if (in_addr_prefixes_is_any(prefixes)) {
+ *ret_has_any = true;
+ return 0;
+ }
+ }
+
+ if (n_ipv4 > 0) {
+ ipv4_map_fd = bpf_map_new(
+ BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
+ sizeof(uint64_t),
+ n_ipv4,
+ BPF_F_NO_PREALLOC);
+ if (ipv4_map_fd < 0)
+ return ipv4_map_fd;
+ }
+
+ if (n_ipv6 > 0) {
+ ipv6_map_fd = bpf_map_new(
+ BPF_MAP_TYPE_LPM_TRIE,
+ offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
+ sizeof(uint64_t),
+ n_ipv6,
+ BPF_F_NO_PREALLOC);
+ if (ipv6_map_fd < 0)
+ return ipv6_map_fd;
+ }
+
+ for (p = u; p; p = UNIT_GET_SLICE(p)) {
+ CGroupContext *cc;
+
+ cc = unit_get_cgroup_context(p);
+ if (!cc)
+ continue;
+
+ r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
+ ipv4_map_fd, ipv6_map_fd, verdict);
+ if (r < 0)
+ return r;
+ }
+
+ *ret_ipv4_map_fd = TAKE_FD(ipv4_map_fd);
+ *ret_ipv6_map_fd = TAKE_FD(ipv6_map_fd);
+ *ret_has_any = false;
+ return 0;
+}
+
+static int bpf_firewall_prepare_accounting_maps(Unit *u, bool enabled, int *fd_ingress, int *fd_egress) {
+ int r;
+
+ assert(u);
+ assert(fd_ingress);
+ assert(fd_egress);
+
+ if (enabled) {
+ if (*fd_ingress < 0) {
+ r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+ if (r < 0)
+ return r;
+
+ *fd_ingress = r;
+ }
+
+ if (*fd_egress < 0) {
+
+ r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+ if (r < 0)
+ return r;
+
+ *fd_egress = r;
+ }
+
+ } else {
+ *fd_ingress = safe_close(*fd_ingress);
+ *fd_egress = safe_close(*fd_egress);
+
+ zero(u->ip_accounting_extra);
+ }
+
+ return 0;
+}
+
+int bpf_firewall_compile(Unit *u) {
+ const char *ingress_name = NULL, *egress_name = NULL;
+ bool ip_allow_any = false, ip_deny_any = false;
+ CGroupContext *cc;
+ int r, supported;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -EINVAL;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == BPF_FIREWALL_UNSUPPORTED)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-firewall: BPF firewalling not supported, proceeding without.");
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE)
+ /* If BPF_F_ALLOW_MULTI is not supported we don't support any BPF magic on inner nodes (i.e. on slice
+ * units), since that would mean leaf nodes couldn't do any BPF anymore at all. Under the assumption
+ * that BPF is more interesting on leaf nodes we hence avoid it on inner nodes in that case. This is
+ * consistent with old systemd behaviour from before v238, where BPF wasn't supported in inner nodes at
+ * all, either. */
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-firewall: BPF_F_ALLOW_MULTI is not supported, not doing BPF firewall on slice units.");
+
+ /* If BPF_F_ALLOW_MULTI flag is supported program name is also supported (both were added to v4.15
+ * kernel). */
+ if (supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI) {
+ ingress_name = "sd_fw_ingress";
+ egress_name = "sd_fw_egress";
+ }
+
+ /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
+ * but we reuse the accounting maps. That way the firewall in effect always maps to the actual
+ * configuration, but we don't flush out the accounting unnecessarily */
+
+ u->ip_bpf_ingress = bpf_program_free(u->ip_bpf_ingress);
+ u->ip_bpf_egress = bpf_program_free(u->ip_bpf_egress);
+
+ u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+ u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+
+ u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+ u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+ if (u->type != UNIT_SLICE) {
+ /* In inner nodes we only do accounting, we do not actually bother with access control. However, leaf
+ * nodes will incorporate all IP access rules set on all their parent nodes. This has the benefit that
+ * they can optionally cancel out system-wide rules. Since inner nodes can't contain processes this
+ * means that all configure IP access rules *will* take effect on processes, even though we never
+ * compile them for inner nodes. */
+
+ r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd, &ip_allow_any);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF allow maps failed: %m");
+
+ r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd, &ip_deny_any);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF deny maps failed: %m");
+ }
+
+ r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF accounting maps failed: %m");
+
+ r = bpf_firewall_compile_bpf(u, ingress_name, true, &u->ip_bpf_ingress, ip_allow_any, ip_deny_any);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Compilation of ingress BPF program failed: %m");
+
+ r = bpf_firewall_compile_bpf(u, egress_name, false, &u->ip_bpf_egress, ip_allow_any, ip_deny_any);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Compilation of egress BPF program failed: %m");
+
+ return 0;
+}
+
+static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
+ set_clear(*set);
+
+ STRV_FOREACH(bpf_fs_path, filter_paths) {
+ _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
+ int r;
+
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, NULL, &prog);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Allocation of SKB BPF program failed: %m");
+
+ r = bpf_program_load_from_bpf_fs(prog, *bpf_fs_path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Loading of ingress BPF program %s failed: %m", *bpf_fs_path);
+
+ r = set_ensure_consume(set, &bpf_program_hash_ops, TAKE_PTR(prog));
+ if (r < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+int bpf_firewall_load_custom(Unit *u) {
+ CGroupContext *cc;
+ int r, supported;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return 0;
+
+ if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
+ return 0;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs.");
+
+ r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
+ if (r < 0)
+ return r;
+ r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, Set **set, Set **set_installed) {
+ BPFProgram *prog;
+ int r;
+
+ assert(u);
+
+ set_clear(*set_installed);
+ r = set_ensure_allocated(set_installed, &bpf_program_hash_ops);
+ if (r < 0)
+ return log_oom();
+
+ SET_FOREACH_MOVE(prog, *set_installed, *set) {
+ r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Attaching custom egress BPF program to cgroup %s failed: %m", path);
+ }
+ return 0;
+}
+
+int bpf_firewall_install(Unit *u) {
+ _cleanup_(bpf_program_freep) BPFProgram *ip_bpf_ingress_uninstall = NULL, *ip_bpf_egress_uninstall = NULL;
+ _cleanup_free_ char *path = NULL;
+ CGroupContext *cc;
+ int r, supported;
+ uint32_t flags;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return -EINVAL;
+ if (!u->cgroup_path)
+ return -EINVAL;
+ if (!u->cgroup_realized)
+ return -EINVAL;
+
+ supported = bpf_firewall_supported();
+ if (supported < 0)
+ return supported;
+ if (supported == BPF_FIREWALL_UNSUPPORTED)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-firewall: BPF firewalling not supported, proceeding without.");
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && u->type == UNIT_SLICE)
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-firewall: BPF_F_ALLOW_MULTI not supported, not doing BPF firewall on slice units.");
+ if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
+ (!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs.");
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-firewall: Failed to determine cgroup path: %m");
+
+ flags = supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI ? BPF_F_ALLOW_MULTI : 0;
+
+ if (FLAGS_SET(flags, BPF_F_ALLOW_MULTI)) {
+ /* If we have BPF_F_ALLOW_MULTI, then let's clear the fields, but destroy the programs only
+ * after attaching the new programs, so that there's no time window where neither program is
+ * attached. (There will be a program where both are attached, but that's OK, since this is a
+ * security feature where we rather want to lock down too much than too little */
+ ip_bpf_egress_uninstall = TAKE_PTR(u->ip_bpf_egress_installed);
+ ip_bpf_ingress_uninstall = TAKE_PTR(u->ip_bpf_ingress_installed);
+ } else {
+ /* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly
+ * detach them) right before attaching the new program, to minimize the time window when we
+ * don't account for IP traffic. */
+ u->ip_bpf_egress_installed = bpf_program_free(u->ip_bpf_egress_installed);
+ u->ip_bpf_ingress_installed = bpf_program_free(u->ip_bpf_ingress_installed);
+ }
+
+ if (u->ip_bpf_egress) {
+ r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
+ if (r < 0)
+ return log_unit_error_errno(u, r,
+ "bpf-firewall: Attaching egress BPF program to cgroup %s failed: %m", path);
+
+ /* Remember that this BPF program is installed now. */
+ u->ip_bpf_egress_installed = TAKE_PTR(u->ip_bpf_egress);
+ }
+
+ if (u->ip_bpf_ingress) {
+ r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
+ if (r < 0)
+ return log_unit_error_errno(u, r,
+ "bpf-firewall: Attaching ingress BPF program to cgroup %s failed: %m", path);
+
+ u->ip_bpf_ingress_installed = TAKE_PTR(u->ip_bpf_ingress);
+ }
+
+ /* And now, definitely get rid of the old programs, and detach them */
+ ip_bpf_egress_uninstall = bpf_program_free(ip_bpf_egress_uninstall);
+ ip_bpf_ingress_uninstall = bpf_program_free(ip_bpf_ingress_uninstall);
+
+ r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
+ if (r < 0)
+ return r;
+
+ r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
+ uint64_t key, packets;
+ int r;
+
+ if (map_fd < 0)
+ return -EBADF;
+
+ if (ret_packets) {
+ key = MAP_KEY_PACKETS;
+ r = bpf_map_lookup_element(map_fd, &key, &packets);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_bytes) {
+ key = MAP_KEY_BYTES;
+ r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
+ if (r < 0)
+ return r;
+ }
+
+ if (ret_packets)
+ *ret_packets = packets;
+
+ return 0;
+}
+
+int bpf_firewall_reset_accounting(int map_fd) {
+ uint64_t key, value = 0;
+ int r;
+
+ if (map_fd < 0)
+ return -EBADF;
+
+ key = MAP_KEY_PACKETS;
+ r = bpf_map_update_element(map_fd, &key, &value);
+ if (r < 0)
+ return r;
+
+ key = MAP_KEY_BYTES;
+ return bpf_map_update_element(map_fd, &key, &value);
+}
+
+static int bpf_firewall_unsupported_reason = 0;
+
+int bpf_firewall_supported(void) {
+ const struct bpf_insn trivial[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN()
+ };
+
+ _cleanup_(bpf_program_freep) BPFProgram *program = NULL;
+ static int supported = -1;
+ union bpf_attr attr;
+ int r;
+
+ /* Checks whether BPF firewalling is supported. For this, we check the following things:
+ *
+ * - whether the unified hierarchy is being used
+ * - the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_SKB programs, which we require
+ * - the BPF implementation in the kernel supports the BPF_PROG_DETACH call, which we require
+ */
+ if (supported >= 0)
+ return supported;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0)
+ return log_error_errno(r, "bpf-firewall: Can't determine whether the unified hierarchy is used: %m");
+ if (r == 0) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(SYNTHETIC_ERRNO(EUCLEAN),
+ "bpf-firewall: Not running with unified cgroup hierarchy, BPF firewalling is not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* prog_name is NULL since it is supported only starting from v4.15 kernel. */
+ r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, NULL, &program);
+ if (r < 0) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(r, "bpf-firewall: Can't allocate CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
+ if (r < 0) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(r, "bpf-firewall: Can't add trivial instructions to CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ r = bpf_program_load_kernel(program, NULL, 0);
+ if (r < 0) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(r, "bpf-firewall: Can't load kernel CGROUP SKB BPF program, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* Unfortunately the kernel allows us to create BPF_PROG_TYPE_CGROUP_SKB programs even when CONFIG_CGROUP_BPF
+ * is turned off at kernel compilation time. This sucks of course: why does it allow us to create a cgroup BPF
+ * program if we can't do a thing with it later?
+ *
+ * We detect this case by issuing the BPF_PROG_DETACH bpf() call with invalid file descriptors: if
+ * CONFIG_CGROUP_BPF is turned off, then the call will fail early with EINVAL. If it is turned on the
+ * parameters are validated however, and that'll fail with EBADF then. */
+
+ // FIXME: Clang doesn't 0-pad with structured initialization, causing
+ // the kernel to reject the bpf_attr as invalid. See:
+ // https://github.com/torvalds/linux/blob/v5.9/kernel/bpf/syscall.c#L65
+ // Ideally it should behave like GCC, so that we can remove these workarounds.
+ zero(attr);
+ attr.attach_type = BPF_CGROUP_INET_EGRESS;
+ attr.target_fd = -1;
+ attr.attach_bpf_fd = -1;
+
+ if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0) {
+ if (errno != EBADF) {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(errno, "bpf-firewall: Didn't get EBADF from BPF_PROG_DETACH, BPF firewalling is not supported: %m");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* YAY! */
+ } else {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(SYNTHETIC_ERRNO(EBADE),
+ "bpf-firewall: Wut? Kernel accepted our invalid BPF_PROG_DETACH call? "
+ "Something is weird, assuming BPF firewalling is broken and hence not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+
+ /* So now we know that the BPF program is generally available, let's see if BPF_F_ALLOW_MULTI is also supported
+ * (which was added in kernel 4.15). We use a similar logic as before, but this time we use the BPF_PROG_ATTACH
+ * bpf() call and the BPF_F_ALLOW_MULTI flags value. Since the flags are checked early in the system call we'll
+ * get EINVAL if it's not supported, and EBADF as before if it is available.
+ * Use probe result as the indicator that program name is also supported since they both were
+ * added in kernel 4.15. */
+
+ zero(attr);
+ attr.attach_type = BPF_CGROUP_INET_EGRESS;
+ attr.target_fd = -1;
+ attr.attach_bpf_fd = -1;
+ attr.attach_flags = BPF_F_ALLOW_MULTI;
+
+ if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0) {
+ if (errno == EBADF) {
+ log_debug_errno(errno, "bpf-firewall: Got EBADF when using BPF_F_ALLOW_MULTI, which indicates it is supported. Yay!");
+ return supported = BPF_FIREWALL_SUPPORTED_WITH_MULTI;
+ }
+
+ if (errno == EINVAL)
+ log_debug_errno(errno, "bpf-firewall: Got EINVAL error when using BPF_F_ALLOW_MULTI, which indicates it's not supported.");
+ else
+ log_debug_errno(errno, "bpf-firewall: Got unexpected error when using BPF_F_ALLOW_MULTI, assuming it's not supported: %m");
+
+ return supported = BPF_FIREWALL_SUPPORTED;
+ } else {
+ bpf_firewall_unsupported_reason =
+ log_debug_errno(SYNTHETIC_ERRNO(EBADE),
+ "bpf-firewall: Wut? Kernel accepted our invalid BPF_PROG_ATTACH+BPF_F_ALLOW_MULTI call? "
+ "Something is weird, assuming BPF firewalling is broken and hence not supported.");
+ return supported = BPF_FIREWALL_UNSUPPORTED;
+ }
+}
+
+void emit_bpf_firewall_warning(Unit *u) {
+ static bool warned = false;
+
+ assert(u);
+ assert(u->manager);
+
+ if (warned || MANAGER_IS_TEST_RUN(u->manager))
+ return;
+
+ bool quiet = ERRNO_IS_PRIVILEGE(bpf_firewall_unsupported_reason) && detect_container() > 0;
+
+ log_unit_full_errno(u, quiet ? LOG_DEBUG : LOG_WARNING, bpf_firewall_unsupported_reason,
+ "unit configures an IP firewall, but %s.\n"
+ "(This warning is only shown for the first unit using IP firewalling.)",
+ getuid() != 0 ? "not running as root" :
+ "the local system does not support BPF/cgroup firewalling");
+ warned = true;
+}
+
+void bpf_firewall_close(Unit *u) {
+ assert(u);
+
+ u->ip_accounting_ingress_map_fd = safe_close(u->ip_accounting_ingress_map_fd);
+ u->ip_accounting_egress_map_fd = safe_close(u->ip_accounting_egress_map_fd);
+
+ u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+ u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+ u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+ u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+ u->ip_bpf_ingress = bpf_program_free(u->ip_bpf_ingress);
+ u->ip_bpf_ingress_installed = bpf_program_free(u->ip_bpf_ingress_installed);
+ u->ip_bpf_egress = bpf_program_free(u->ip_bpf_egress);
+ u->ip_bpf_egress_installed = bpf_program_free(u->ip_bpf_egress_installed);
+
+ u->ip_bpf_custom_ingress = set_free(u->ip_bpf_custom_ingress);
+ u->ip_bpf_custom_egress = set_free(u->ip_bpf_custom_egress);
+ u->ip_bpf_custom_ingress_installed = set_free(u->ip_bpf_custom_ingress_installed);
+ u->ip_bpf_custom_egress_installed = set_free(u->ip_bpf_custom_egress_installed);
+}
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
new file mode 100644
index 0000000..58b401f
--- /dev/null
+++ b/src/core/bpf-firewall.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+enum {
+ BPF_FIREWALL_UNSUPPORTED = 0,
+ BPF_FIREWALL_SUPPORTED = 1,
+ BPF_FIREWALL_SUPPORTED_WITH_MULTI = 2,
+};
+
+int bpf_firewall_supported(void);
+
+int bpf_firewall_compile(Unit *u);
+int bpf_firewall_install(Unit *u);
+int bpf_firewall_load_custom(Unit *u);
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
+int bpf_firewall_reset_accounting(int map_fd);
+
+void emit_bpf_firewall_warning(Unit *u);
+
+void bpf_firewall_close(Unit *u);
diff --git a/src/core/bpf-foreign.c b/src/core/bpf-foreign.c
new file mode 100644
index 0000000..cff2f61
--- /dev/null
+++ b/src/core/bpf-foreign.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bpf-foreign.h"
+#include "bpf-program.h"
+#include "cgroup.h"
+#include "memory-util.h"
+#include "missing_magic.h"
+#include "mountpoint-util.h"
+#include "set.h"
+#include "stat-util.h"
+
+typedef struct BPFForeignKey BPFForeignKey;
+struct BPFForeignKey {
+ uint32_t prog_id;
+ uint32_t attach_type;
+};
+
+static int bpf_foreign_key_new(uint32_t prog_id,
+ enum bpf_attach_type attach_type,
+ BPFForeignKey **ret) {
+ _cleanup_free_ BPFForeignKey *p = NULL;
+
+ assert(ret);
+
+ p = new(BPFForeignKey, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (BPFForeignKey) {
+ .prog_id = prog_id,
+ .attach_type = attach_type,
+ };
+
+ *ret = TAKE_PTR(p);
+
+ return 0;
+}
+
+static int bpf_foreign_key_compare_func(const BPFForeignKey *a, const BPFForeignKey *b) {
+ int r = CMP(a->prog_id, b->prog_id);
+ if (r != 0)
+ return r;
+
+ return CMP(a->attach_type, b->attach_type);
+}
+
+static void bpf_foreign_key_hash_func(const BPFForeignKey *p, struct siphash *h) {
+ siphash24_compress(&p->prog_id, sizeof(p->prog_id), h);
+ siphash24_compress(&p->attach_type, sizeof(p->attach_type), h);
+}
+
+DEFINE_PRIVATE_HASH_OPS_FULL(bpf_foreign_by_key_hash_ops,
+ BPFForeignKey, bpf_foreign_key_hash_func, bpf_foreign_key_compare_func, free,
+ BPFProgram, bpf_program_free);
+
+static int attach_programs(Unit *u, const char *path, Hashmap* foreign_by_key, uint32_t attach_flags) {
+ const BPFForeignKey *key;
+ BPFProgram *prog;
+ int r, ret = 0;
+
+ assert(u);
+
+ HASHMAP_FOREACH_KEY(prog, key, foreign_by_key) {
+ r = bpf_program_cgroup_attach(prog, key->attach_type, path, attach_flags);
+ if (r < 0) {
+ log_unit_error_errno(u, r, "bpf-foreign: Attaching foreign BPF program to cgroup %s failed: %m", path);
+ if (ret >= 0)
+ ret = r;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Prepare foreign BPF program for installation:
+ * - Load the program from BPF filesystem to the kernel;
+ * - Store program FD identified by program ID and attach type in the unit.
+ */
+static int bpf_foreign_prepare(
+ Unit *u,
+ enum bpf_attach_type attach_type,
+ const char *bpffs_path) {
+ _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
+ _cleanup_free_ BPFForeignKey *key = NULL;
+ uint32_t prog_id;
+ int r;
+
+ assert(u);
+ assert(bpffs_path);
+
+ r = path_is_fs_type(bpffs_path, BPF_FS_MAGIC);
+ if (r == -ENOENT) {
+ log_unit_warning_errno(u, r, "bpf-foreign: foreign program %s does not exist, skipping.", bpffs_path);
+ return 0;
+ }
+ if (r < 0)
+ return log_unit_error_errno(u, r,
+ "bpf-foreign: Failed to determine filesystem type of %s: %m", bpffs_path);
+ if (r == 0)
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "bpf-foreign: Path in BPF filesystem is expected.");
+
+ r = bpf_program_new_from_bpffs_path(bpffs_path, &prog);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-foreign: Failed to create foreign BPF program: %m");
+
+ r = bpf_program_get_id_by_fd(prog->kernel_fd, &prog_id);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-foreign: Failed to get BPF program id from fd: %m");
+
+ r = bpf_foreign_key_new(prog_id, attach_type, &key);
+ if (r < 0)
+ return log_unit_error_errno(u, r,
+ "bpf-foreign: Failed to create foreign BPF program key from path '%s': %m", bpffs_path);
+
+ r = hashmap_ensure_put(&u->bpf_foreign_by_key, &bpf_foreign_by_key_hash_ops, key, prog);
+ if (r == -EEXIST) {
+ log_unit_warning_errno(u, r, "bpf-foreign: Foreign BPF program already exists, ignoring: %m");
+ return 0;
+ }
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-foreign: Failed to put foreign BPF program into map: %m");
+
+ TAKE_PTR(key);
+ TAKE_PTR(prog);
+
+ return 0;
+}
+
+int bpf_foreign_install(Unit *u) {
+ _cleanup_free_ char *cgroup_path = NULL;
+ CGroupContext *cc;
+ int r, ret = 0;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return 0;
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-foreign: Failed to get cgroup path: %m");
+
+ LIST_FOREACH(programs, p, cc->bpf_foreign_programs) {
+ r = bpf_foreign_prepare(u, p->attach_type, p->bpffs_path);
+ if (r < 0 && ret >= 0)
+ ret = r;
+ }
+
+ r = attach_programs(u, cgroup_path, u->bpf_foreign_by_key, BPF_F_ALLOW_MULTI);
+ return ret < 0 ? ret : r;
+}
diff --git a/src/core/bpf-foreign.h b/src/core/bpf-foreign.h
new file mode 100644
index 0000000..e387b1b
--- /dev/null
+++ b/src/core/bpf-foreign.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#pragma once
+
+#include "unit.h"
+
+static inline int bpf_foreign_supported(void) {
+ return cg_all_unified();
+}
+
+/*
+ * Attach cgroup-bpf programs foreign to systemd, i.e. loaded to the kernel by an entity
+ * external to systemd.
+ */
+int bpf_foreign_install(Unit *u);
diff --git a/src/core/bpf-lsm.c b/src/core/bpf-lsm.c
new file mode 100644
index 0000000..a3726d9
--- /dev/null
+++ b/src/core/bpf-lsm.c
@@ -0,0 +1,364 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-lsm.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "filesystems.h"
+#include "log.h"
+#include "manager.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "stat-util.h"
+#include "strv.h"
+
+#if BPF_FRAMEWORK
+/* libbpf, clang and llc compile time dependencies are satisfied */
+#include "bpf-dlopen.h"
+#include "bpf-link.h"
+#include "bpf-util.h"
+#include "bpf/restrict_fs/restrict-fs-skel.h"
+
+#define CGROUP_HASH_SIZE_MAX 2048
+
+static struct restrict_fs_bpf *restrict_fs_bpf_free(struct restrict_fs_bpf *obj) {
+ /* restrict_fs_bpf__destroy handles object == NULL case */
+ (void) restrict_fs_bpf__destroy(obj);
+
+ return NULL;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct restrict_fs_bpf *, restrict_fs_bpf_free);
+
+static bool bpf_can_link_lsm_program(struct bpf_program *prog) {
+ _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
+
+ assert(prog);
+
+ link = sym_bpf_program__attach_lsm(prog);
+
+ /* If bpf_program__attach_lsm fails the resulting value stores libbpf error code instead of memory
+ * pointer. That is the case when the helper is called on architectures where BPF trampoline (hence
+ * BPF_LSM_MAC attach type) is not supported. */
+ return sym_libbpf_get_error(link) == 0;
+}
+
+static int prepare_restrict_fs_bpf(struct restrict_fs_bpf **ret_obj) {
+ _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
+ _cleanup_close_ int inner_map_fd = -1;
+ int r;
+
+ assert(ret_obj);
+
+ obj = restrict_fs_bpf__open();
+ if (!obj)
+ return log_error_errno(errno, "bpf-lsm: Failed to open BPF object: %m");
+
+ /* TODO Maybe choose a number based on runtime information? */
+ r = sym_bpf_map__set_max_entries(obj->maps.cgroup_hash, CGROUP_HASH_SIZE_MAX);
+ assert(r <= 0);
+ if (r < 0)
+ return log_error_errno(r, "bpf-lsm: Failed to resize BPF map '%s': %m",
+ sym_bpf_map__name(obj->maps.cgroup_hash));
+
+ /* Dummy map to satisfy the verifier */
+ inner_map_fd = compat_bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(uint32_t), sizeof(uint32_t), 128U, NULL);
+ if (inner_map_fd < 0)
+ return log_error_errno(errno, "bpf-lsm: Failed to create BPF map: %m");
+
+ r = sym_bpf_map__set_inner_map_fd(obj->maps.cgroup_hash, inner_map_fd);
+ assert(r <= 0);
+ if (r < 0)
+ return log_error_errno(r, "bpf-lsm: Failed to set inner map fd: %m");
+
+ r = restrict_fs_bpf__load(obj);
+ assert(r <= 0);
+ if (r < 0)
+ return log_error_errno(r, "bpf-lsm: Failed to load BPF object: %m");
+
+ *ret_obj = TAKE_PTR(obj);
+
+ return 0;
+}
+
+static int mac_bpf_use(void) {
+ _cleanup_free_ char *lsm_list = NULL;
+ static int cached_use = -1;
+ int r;
+
+ if (cached_use >= 0)
+ return cached_use;
+
+ cached_use = 0;
+
+ r = read_one_line_file("/sys/kernel/security/lsm", &lsm_list);
+ if (r < 0) {
+ if (r != -ENOENT)
+ log_notice_errno(r, "bpf-lsm: Failed to read /sys/kernel/security/lsm, assuming bpf is unavailable: %m");
+ return 0;
+ }
+
+ for (const char *p = lsm_list;;) {
+ _cleanup_free_ char *word = NULL;
+
+ r = extract_first_word(&p, &word, ",", 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_notice_errno(r, "bpf-lsm: Failed to parse /sys/kernel/security/lsm, assuming bpf is unavailable: %m");
+ return 0;
+ }
+
+ if (streq(word, "bpf"))
+ return cached_use = 1;
+ }
+}
+
+bool lsm_bpf_supported(bool initialize) {
+ _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
+ static int supported = -1;
+ int r;
+
+ if (supported >= 0)
+ return supported;
+ if (!initialize)
+ return false;
+
+ if (!cgroup_bpf_supported())
+ return (supported = false);
+
+ r = mac_bpf_use();
+ if (r < 0) {
+ log_warning_errno(r, "bpf-lsm: Can't determine whether the BPF LSM module is used: %m");
+ return (supported = false);
+ }
+
+ if (r == 0) {
+ log_info_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-lsm: BPF LSM hook not enabled in the kernel, BPF LSM not supported");
+ return (supported = false);
+ }
+
+ r = prepare_restrict_fs_bpf(&obj);
+ if (r < 0)
+ return (supported = false);
+
+ if (!bpf_can_link_lsm_program(obj->progs.restrict_filesystems)) {
+ log_warning_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-lsm: Failed to link program; assuming BPF LSM is not available");
+ return (supported = false);
+ }
+
+ return (supported = true);
+}
+
+int lsm_bpf_setup(Manager *m) {
+ _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
+ _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
+ int r;
+
+ assert(m);
+
+ r = prepare_restrict_fs_bpf(&obj);
+ if (r < 0)
+ return r;
+
+ link = sym_bpf_program__attach_lsm(obj->progs.restrict_filesystems);
+ r = sym_libbpf_get_error(link);
+ if (r != 0)
+ return log_error_errno(r, "bpf-lsm: Failed to link '%s' LSM BPF program: %m",
+ sym_bpf_program__name(obj->progs.restrict_filesystems));
+
+ log_info("bpf-lsm: LSM BPF program attached");
+
+ obj->links.restrict_filesystems = TAKE_PTR(link);
+ m->restrict_fs = TAKE_PTR(obj);
+
+ return 0;
+}
+
+int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, bool allow_list) {
+ uint32_t dummy_value = 1, zero = 0;
+ const char *fs;
+ const statfs_f_type_t *magic;
+ int r;
+
+ assert(filesystems);
+ assert(u);
+
+ if (!u->manager->restrict_fs)
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "bpf-lsm: BPF LSM object is not installed, has setup failed?");
+
+ int inner_map_fd = compat_bpf_map_create(
+ BPF_MAP_TYPE_HASH,
+ NULL,
+ sizeof(uint32_t),
+ sizeof(uint32_t),
+ 128U, /* Should be enough for all filesystem types */
+ NULL);
+ if (inner_map_fd < 0)
+ return log_unit_error_errno(u, errno, "bpf-lsm: Failed to create inner BPF map: %m");
+
+ int outer_map_fd = sym_bpf_map__fd(u->manager->restrict_fs->maps.cgroup_hash);
+ if (outer_map_fd < 0)
+ return log_unit_error_errno(u, errno, "bpf-lsm: Failed to get BPF map fd: %m");
+
+ if (sym_bpf_map_update_elem(outer_map_fd, &u->cgroup_id, &inner_map_fd, BPF_ANY) != 0)
+ return log_unit_error_errno(u, errno, "bpf-lsm: Error populating BPF map: %m");
+
+ uint32_t allow = allow_list;
+
+ /* Use key 0 to store whether this is an allow list or a deny list */
+ if (sym_bpf_map_update_elem(inner_map_fd, &zero, &allow, BPF_ANY) != 0)
+ return log_unit_error_errno(u, errno, "bpf-lsm: Error initializing map: %m");
+
+ SET_FOREACH(fs, filesystems) {
+ r = fs_type_from_string(fs, &magic);
+ if (r < 0) {
+ log_unit_warning(u, "bpf-lsm: Invalid filesystem name '%s', ignoring.", fs);
+ continue;
+ }
+
+ log_unit_debug(u, "bpf-lsm: Restricting filesystem access to '%s'", fs);
+
+ for (int i = 0; i < FILESYSTEM_MAGIC_MAX; i++) {
+ if (magic[i] == 0)
+ break;
+
+ if (sym_bpf_map_update_elem(inner_map_fd, &magic[i], &dummy_value, BPF_ANY) != 0) {
+ r = log_unit_error_errno(u, errno, "bpf-lsm: Failed to update BPF map: %m");
+
+ if (sym_bpf_map_delete_elem(outer_map_fd, &u->cgroup_id) != 0)
+ log_unit_debug_errno(u, errno, "bpf-lsm: Failed to delete cgroup entry from BPF map: %m");
+
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int lsm_bpf_cleanup(const Unit *u) {
+ assert(u);
+ assert(u->manager);
+
+ /* If we never successfully detected support, there is nothing to clean up. */
+ if (!lsm_bpf_supported(/* initialize = */ false))
+ return 0;
+
+ if (!u->manager->restrict_fs)
+ return 0;
+
+ if (u->cgroup_id == 0)
+ return 0;
+
+ int fd = sym_bpf_map__fd(u->manager->restrict_fs->maps.cgroup_hash);
+ if (fd < 0)
+ return log_unit_error_errno(u, errno, "bpf-lsm: Failed to get BPF map fd: %m");
+
+ if (sym_bpf_map_delete_elem(fd, &u->cgroup_id) != 0 && errno != ENOENT)
+ return log_unit_debug_errno(u, errno, "bpf-lsm: Failed to delete cgroup entry from LSM BPF map: %m");
+
+ return 0;
+}
+
+int lsm_bpf_map_restrict_fs_fd(Unit *unit) {
+ assert(unit);
+ assert(unit->manager);
+
+ if (!unit->manager->restrict_fs)
+ return -ENOMEDIUM;
+
+ return sym_bpf_map__fd(unit->manager->restrict_fs->maps.cgroup_hash);
+}
+
+void lsm_bpf_destroy(struct restrict_fs_bpf *prog) {
+ restrict_fs_bpf__destroy(prog);
+}
+#else /* ! BPF_FRAMEWORK */
+bool lsm_bpf_supported(bool initialize) {
+ return false;
+}
+
+int lsm_bpf_setup(Manager *m) {
+ return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-lsm: Failed to set up LSM BPF: %m");
+}
+
+int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, const bool allow_list) {
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-lsm: Failed to restrict filesystems using LSM BPF: %m");
+}
+
+int lsm_bpf_cleanup(const Unit *u) {
+ return 0;
+}
+
+int lsm_bpf_map_restrict_fs_fd(Unit *unit) {
+ return -ENOMEDIUM;
+}
+
+void lsm_bpf_destroy(struct restrict_fs_bpf *prog) {
+ return;
+}
+#endif
+
+int lsm_bpf_parse_filesystem(
+ const char *name,
+ Set **filesystems,
+ FilesystemParseFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line) {
+ int r;
+
+ assert(name);
+ assert(filesystems);
+
+ if (name[0] == '@') {
+ const FilesystemSet *set;
+ const char *i;
+
+ set = filesystem_set_find(name);
+ if (!set) {
+ log_syntax(unit, flags & FILESYSTEM_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+ "bpf-lsm: Unknown filesystem group, ignoring: %s", name);
+ return 0;
+ }
+
+ NULSTR_FOREACH(i, set->value) {
+ /* Call ourselves again, for the group to parse. Note that we downgrade logging here
+ * (i.e. take away the FILESYSTEM_PARSE_LOG flag) since any issues in the group table
+ * are our own problem, not a problem in user configuration data and we shouldn't
+ * pretend otherwise by complaining about them. */
+ r = lsm_bpf_parse_filesystem(i, filesystems, flags &~ FILESYSTEM_PARSE_LOG, unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+ } else {
+ /* If we previously wanted to forbid access to a filesystem and now
+ * we want to allow it, then remove it from the list. */
+ if (!(flags & FILESYSTEM_PARSE_INVERT) == !!(flags & FILESYSTEM_PARSE_ALLOW_LIST)) {
+ r = set_put_strdup(filesystems, name);
+ if (r == -ENOMEM)
+ return flags & FILESYSTEM_PARSE_LOG ? log_oom() : -ENOMEM;
+ if (r < 0 && r != -EEXIST) /* When already in set, ignore */
+ return r;
+ } else
+ free(set_remove(*filesystems, name));
+ }
+
+ return 0;
+}
diff --git a/src/core/bpf-lsm.h b/src/core/bpf-lsm.h
new file mode 100644
index 0000000..dff5812
--- /dev/null
+++ b/src/core/bpf-lsm.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hashmap.h"
+
+typedef enum FilesystemParseFlags {
+ FILESYSTEM_PARSE_INVERT = 1 << 0,
+ FILESYSTEM_PARSE_ALLOW_LIST = 1 << 1,
+ FILESYSTEM_PARSE_LOG = 1 << 2,
+} FilesystemParseFlags;
+
+typedef struct Unit Unit;
+typedef struct Manager Manager;
+
+typedef struct restrict_fs_bpf restrict_fs_bpf;
+
+bool lsm_bpf_supported(bool initialize);
+int lsm_bpf_setup(Manager *m);
+int lsm_bpf_unit_restrict_filesystems(Unit *u, const Set *filesystems, bool allow_list);
+int lsm_bpf_cleanup(const Unit *u);
+int lsm_bpf_map_restrict_fs_fd(Unit *u);
+void lsm_bpf_destroy(struct restrict_fs_bpf *prog);
+int lsm_bpf_parse_filesystem(const char *name,
+ Set **filesystems,
+ FilesystemParseFlags flags,
+ const char *unit,
+ const char *filename,
+ unsigned line);
diff --git a/src/core/bpf-socket-bind.c b/src/core/bpf-socket-bind.c
new file mode 100644
index 0000000..660ffdb
--- /dev/null
+++ b/src/core/bpf-socket-bind.c
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#if BPF_FRAMEWORK
+#include <bpf/bpf.h>
+#endif
+
+#include "fd-util.h"
+#include "bpf-socket-bind.h"
+
+#if BPF_FRAMEWORK
+/* libbpf, clang, llvm and bpftool compile time dependencies are satisfied */
+#include "bpf-dlopen.h"
+#include "bpf-link.h"
+#include "bpf-util.h"
+#include "bpf/socket_bind/socket-bind-api.bpf.h"
+#include "bpf/socket_bind/socket-bind-skel.h"
+
+static struct socket_bind_bpf *socket_bind_bpf_free(struct socket_bind_bpf *obj) {
+ /* socket_bind_bpf__destroy handles object == NULL case */
+ (void) socket_bind_bpf__destroy(obj);
+
+ return NULL;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct socket_bind_bpf *, socket_bind_bpf_free);
+
+static int update_rules_map(
+ int map_fd,
+ CGroupSocketBindItem *head) {
+
+ uint32_t i = 0;
+
+ assert(map_fd >= 0);
+
+ LIST_FOREACH(socket_bind_items, item, head) {
+ struct socket_bind_rule val = {
+ .address_family = (uint32_t) item->address_family,
+ .protocol = item->ip_protocol,
+ .nr_ports = item->nr_ports,
+ .port_min = item->port_min,
+ };
+
+ uint32_t key = i++;
+
+ if (sym_bpf_map_update_elem(map_fd, &key, &val, BPF_ANY) != 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int prepare_socket_bind_bpf(
+ Unit *u,
+ CGroupSocketBindItem *allow,
+ CGroupSocketBindItem *deny,
+ struct socket_bind_bpf **ret_obj) {
+
+ _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL;
+ size_t allow_count = 0, deny_count = 0;
+ int allow_map_fd, deny_map_fd, r;
+
+ assert(ret_obj);
+
+ LIST_FOREACH(socket_bind_items, item, allow)
+ allow_count++;
+
+ LIST_FOREACH(socket_bind_items, item, deny)
+ deny_count++;
+
+ if (allow_count > SOCKET_BIND_MAX_RULES)
+ return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, SYNTHETIC_ERRNO(EINVAL),
+ "bpf-socket-bind: Maximum number of socket bind rules=%i is exceeded", SOCKET_BIND_MAX_RULES);
+
+ if (deny_count > SOCKET_BIND_MAX_RULES)
+ return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, SYNTHETIC_ERRNO(EINVAL),
+ "bpf-socket-bind: Maximum number of socket bind rules=%i is exceeded", SOCKET_BIND_MAX_RULES);
+
+ obj = socket_bind_bpf__open();
+ if (!obj)
+ return log_unit_full_errno(u, u ? LOG_ERR : LOG_DEBUG, errno, "bpf-socket-bind: Failed to open BPF object: %m");
+
+ if (sym_bpf_map__set_max_entries(obj->maps.sd_bind_allow, MAX(allow_count, 1u)) != 0)
+ return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, errno,
+ "bpf-socket-bind: Failed to resize BPF map '%s': %m", sym_bpf_map__name(obj->maps.sd_bind_allow));
+
+ if (sym_bpf_map__set_max_entries(obj->maps.sd_bind_deny, MAX(deny_count, 1u)) != 0)
+ return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, errno,
+ "bpf-socket-bind: Failed to resize BPF map '%s': %m", sym_bpf_map__name(obj->maps.sd_bind_deny));
+
+ if (socket_bind_bpf__load(obj) != 0)
+ return log_unit_full_errno(u, u ? LOG_ERR : LOG_DEBUG, errno,
+ "bpf-socket-bind: Failed to load BPF object: %m");
+
+ allow_map_fd = sym_bpf_map__fd(obj->maps.sd_bind_allow);
+ assert(allow_map_fd >= 0);
+
+ r = update_rules_map(allow_map_fd, allow);
+ if (r < 0)
+ return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, r,
+ "bpf-socket-bind: Failed to put socket bind allow rules into BPF map '%s'",
+ sym_bpf_map__name(obj->maps.sd_bind_allow));
+
+ deny_map_fd = sym_bpf_map__fd(obj->maps.sd_bind_deny);
+ assert(deny_map_fd >= 0);
+
+ r = update_rules_map(deny_map_fd, deny);
+ if (r < 0)
+ return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, r,
+ "bpf-socket-bind: Failed to put socket bind deny rules into BPF map '%s'",
+ sym_bpf_map__name(obj->maps.sd_bind_deny));
+
+ *ret_obj = TAKE_PTR(obj);
+ return 0;
+}
+
+int bpf_socket_bind_supported(void) {
+ _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL;
+ int r;
+
+ if (!cgroup_bpf_supported())
+ return false;
+
+ if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, /*opts=*/NULL)) {
+ log_debug("bpf-socket-bind: BPF program type cgroup_sock_addr is not supported");
+ return false;
+ }
+
+ r = prepare_socket_bind_bpf(/*unit=*/NULL, /*allow_rules=*/NULL, /*deny_rules=*/NULL, &obj);
+ if (r < 0) {
+ log_debug_errno(r, "bpf-socket-bind: socket bind filtering is not supported: %m");
+ return false;
+ }
+
+ return bpf_can_link_program(obj->progs.sd_bind4);
+}
+
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) {
+ int r;
+
+ assert(u);
+
+ if (!u->initial_socket_bind_link_fds) {
+ u->initial_socket_bind_link_fds = fdset_new();
+ if (!u->initial_socket_bind_link_fds)
+ return log_oom();
+ }
+
+ r = fdset_put(u->initial_socket_bind_link_fds, fd);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to put BPF fd %d to initial fdset", fd);
+
+ return 0;
+}
+
+static int socket_bind_install_impl(Unit *u) {
+ _cleanup_(bpf_link_freep) struct bpf_link *ipv4 = NULL, *ipv6 = NULL;
+ _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL;
+ _cleanup_free_ char *cgroup_path = NULL;
+ _cleanup_close_ int cgroup_fd = -1;
+ CGroupContext *cc;
+ int r;
+
+ assert(u);
+
+ cc = unit_get_cgroup_context(u);
+ if (!cc)
+ return 0;
+
+ r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to get cgroup path: %m");
+
+ if (!cc->socket_bind_allow && !cc->socket_bind_deny)
+ return 0;
+
+ r = prepare_socket_bind_bpf(u, cc->socket_bind_allow, cc->socket_bind_deny, &obj);
+ if (r < 0)
+ return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to load BPF object: %m");
+
+ cgroup_fd = open(cgroup_path, O_RDONLY | O_CLOEXEC, 0);
+ if (cgroup_fd < 0)
+ return log_unit_error_errno(u, errno, "bpf-socket-bind: Failed to open cgroup %s for reading: %m", cgroup_path);
+
+ ipv4 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind4, cgroup_fd);
+ r = sym_libbpf_get_error(ipv4);
+ if (r != 0)
+ return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to link '%s' cgroup-bpf program: %m",
+ sym_bpf_program__name(obj->progs.sd_bind4));
+
+ ipv6 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind6, cgroup_fd);
+ r = sym_libbpf_get_error(ipv6);
+ if (r != 0)
+ return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to link '%s' cgroup-bpf program: %m",
+ sym_bpf_program__name(obj->progs.sd_bind6));
+
+ u->ipv4_socket_bind_link = TAKE_PTR(ipv4);
+ u->ipv6_socket_bind_link = TAKE_PTR(ipv6);
+
+ return 0;
+}
+
+int bpf_socket_bind_install(Unit *u) {
+ int r;
+
+ assert(u);
+
+ r = socket_bind_install_impl(u);
+ if (r == -ENOMEM)
+ return r;
+
+ fdset_close(u->initial_socket_bind_link_fds);
+ return r;
+}
+
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
+ int r;
+
+ assert(u);
+
+ r = bpf_serialize_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link);
+ if (r < 0)
+ return r;
+
+ return bpf_serialize_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link);
+}
+
+#else /* ! BPF_FRAMEWORK */
+int bpf_socket_bind_supported(void) {
+ return false;
+}
+
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) {
+ return 0;
+}
+
+int bpf_socket_bind_install(Unit *u) {
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "bpf-socket-bind: Failed to install; BPF framework is not supported");
+}
+
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
+ return 0;
+}
+#endif
diff --git a/src/core/bpf-socket-bind.h b/src/core/bpf-socket-bind.h
new file mode 100644
index 0000000..7d426df
--- /dev/null
+++ b/src/core/bpf-socket-bind.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "fdset.h"
+#include "unit.h"
+
+int bpf_socket_bind_supported(void);
+
+/* Add BPF link fd created before daemon-reload or daemon-reexec. FDs will be closed at the end of
+ * socket_bind_install. */
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd);
+
+int bpf_socket_bind_install(Unit *u);
+
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds);
diff --git a/src/core/bpf-util.c b/src/core/bpf-util.c
new file mode 100644
index 0000000..84170da
--- /dev/null
+++ b/src/core/bpf-util.c
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "bpf-dlopen.h"
+#include "bpf-util.h"
+#include "cgroup-util.h"
+#include "log.h"
+
+bool cgroup_bpf_supported(void) {
+ static int supported = -1;
+ int r;
+
+ if (supported >= 0)
+ return supported;
+
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ if (r < 0) {
+ log_warning_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+ return (supported = false);
+ }
+
+ if (r == 0) {
+ log_info_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Not running with unified cgroup hierarchy, disabling cgroup BPF features.");
+ return (supported = false);
+ }
+
+ r = dlopen_bpf();
+ if (r < 0) {
+ log_full_errno(in_initrd() ? LOG_DEBUG : LOG_INFO,
+ r, "Failed to open libbpf, cgroup BPF features disabled: %m");
+ return (supported = false);
+ }
+
+ return (supported = true);
+}
diff --git a/src/core/bpf-util.h b/src/core/bpf-util.h
new file mode 100644
index 0000000..a6c55cd
--- /dev/null
+++ b/src/core/bpf-util.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <stdbool.h>
+
+bool cgroup_bpf_supported(void);
diff --git a/src/core/bpf/meson.build b/src/core/bpf/meson.build
new file mode 100644
index 0000000..f654016
--- /dev/null
+++ b/src/core/bpf/meson.build
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: LGPL-2.1+
+
+if conf.get('BPF_FRAMEWORK') != 1
+ subdir_done()
+endif
+
+bpf_clang_flags = [
+ '-std=gnu11',
+ '-Wno-compare-distinct-pointer-types',
+ '-O2',
+ '-target',
+ 'bpf',
+ '-g',
+ '-c',
+]
+
+bpf_gcc_flags = [
+ '-std=gnu11',
+ '-O2',
+ '-mkernel=5.2',
+ '-mcpu=v3',
+ '-mco-re',
+ '-gbtf',
+ '-c',
+]
+
+# Generate defines that are appropriate to tell the compiler what architecture
+# we're compiling for. By default we just map meson's cpu_family to __<cpu_family>__.
+# This dictionary contains the exceptions where this doesn't work.
+#
+# C.f. https://mesonbuild.com/Reference-tables.html#cpu-families
+# and src/basic/missing_syscall_def.h.
+cpu_arch_defines = {
+ 'ppc' : ['-D__powerpc__'],
+ 'ppc64' : ['-D__powerpc64__', '-D_CALL_ELF=2'],
+ 'riscv32' : ['-D__riscv', '-D__riscv_xlen=32'],
+ 'riscv64' : ['-D__riscv', '-D__riscv_xlen=64'],
+ 'x86' : ['-D__i386__'],
+
+ # For arm, assume hardware fp is available.
+ 'arm' : ['-D__arm__', '-D__ARM_PCS_VFP'],
+}
+
+bpf_arch_flags = cpu_arch_defines.get(host_machine.cpu_family(),
+ ['-D__@0@__'.format(host_machine.cpu_family())])
+if bpf_compiler == 'gcc'
+ bpf_arch_flags += ['-m' + host_machine.endian() + '-endian']
+endif
+
+libbpf_include_dir = libbpf.get_variable(pkgconfig : 'includedir')
+
+bpf_o_unstripped_cmd = []
+if bpf_compiler == 'clang'
+ bpf_o_unstripped_cmd += [
+ clang,
+ bpf_clang_flags,
+ bpf_arch_flags,
+ ]
+elif bpf_compiler == 'gcc'
+ bpf_o_unstripped_cmd += [
+ bpf_gcc,
+ bpf_gcc_flags,
+ bpf_arch_flags,
+ ]
+endif
+
+bpf_o_unstripped_cmd += ['-I.']
+
+if not meson.is_cross_build() and bpf_compiler == 'clang'
+ target_triplet_cmd = run_command('gcc', '-dumpmachine', check: false)
+ if target_triplet_cmd.returncode() == 0
+ target_triplet = target_triplet_cmd.stdout().strip()
+ bpf_o_unstripped_cmd += [
+ '-isystem',
+ '/usr/include/@0@'.format(target_triplet)
+ ]
+ endif
+endif
+
+bpf_o_unstripped_cmd += [
+ '-idirafter',
+ libbpf_include_dir,
+ '@INPUT@',
+ '-o',
+ '@OUTPUT@'
+]
+
+if bpftool_strip
+ bpf_o_cmd = [
+ bpftool,
+ 'gen',
+ 'object',
+ '@OUTPUT@',
+ '@INPUT@'
+ ]
+elif bpf_compiler == 'clang'
+ bpf_o_cmd = [
+ llvm_strip,
+ '-g',
+ '@INPUT@',
+ '-o',
+ '@OUTPUT@'
+ ]
+endif
+
+skel_h_cmd = [
+ bpftool,
+ 'gen',
+ 'skeleton',
+ '@INPUT@'
+]
diff --git a/src/core/bpf/restrict_fs/meson.build b/src/core/bpf/restrict_fs/meson.build
new file mode 100644
index 0000000..69cde02
--- /dev/null
+++ b/src/core/bpf/restrict_fs/meson.build
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+if conf.get('BPF_FRAMEWORK') != 1
+ subdir_done()
+endif
+
+restrict_fs_bpf_o_unstripped = custom_target(
+ 'restrict-fs.bpf.unstripped.o',
+ input : 'restrict-fs.bpf.c',
+ output : 'restrict-fs.bpf.unstripped.o',
+ command : bpf_o_unstripped_cmd)
+
+restrict_fs_bpf_o = custom_target(
+ 'restrict-fs.bpf.o',
+ input : restrict_fs_bpf_o_unstripped,
+ output : 'restrict-fs.bpf.o',
+ command : bpf_o_cmd)
+
+restrict_fs_skel_h = custom_target(
+ 'restrict-fs.skel.h',
+ input : restrict_fs_bpf_o,
+ output : 'restrict-fs.skel.h',
+ command : skel_h_cmd,
+ capture : true)
diff --git a/src/core/bpf/restrict_fs/restrict-fs-skel.h b/src/core/bpf/restrict_fs/restrict-fs-skel.h
new file mode 100644
index 0000000..412cf62
--- /dev/null
+++ b/src/core/bpf/restrict_fs/restrict-fs-skel.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* The SPDX header above is actually correct in claiming this was
+ * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that
+ * compatible with GPL we will claim this to be GPL however, which should be
+ * fine given that LGPL-2.1-or-later downgrades to GPL if needed.
+ */
+
+/* libbpf is used via dlopen(), so rename symbols */
+#define bpf_object__open_skeleton sym_bpf_object__open_skeleton
+#define bpf_object__load_skeleton sym_bpf_object__load_skeleton
+#define bpf_object__destroy_skeleton sym_bpf_object__destroy_skeleton
+
+#include "bpf/restrict_fs/restrict-fs.skel.h"
diff --git a/src/core/bpf/restrict_fs/restrict-fs.bpf.c b/src/core/bpf/restrict_fs/restrict-fs.bpf.c
new file mode 100644
index 0000000..eb5ed3e
--- /dev/null
+++ b/src/core/bpf/restrict_fs/restrict-fs.bpf.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* The SPDX header above is actually correct in claiming this was
+ * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that
+ * compatible with GPL we will claim this to be GPL however, which should be
+ * fine given that LGPL-2.1-or-later downgrades to GPL if needed.
+ */
+
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+
+struct super_block {
+ unsigned long int s_magic;
+} __attribute__((preserve_access_index));
+
+struct inode {
+ struct super_block *i_sb;
+} __attribute__((preserve_access_index));
+
+struct file {
+ struct inode *f_inode;
+} __attribute__((preserve_access_index));
+
+/*
+ * max_entries is set from user space with the bpf_map__set_max_entries helper.
+ * */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __type(key, uint64_t); /* cgroup ID */
+ __type(value, uint32_t); /* fs magic set */
+} cgroup_hash SEC(".maps");
+
+SEC("lsm/file_open")
+int BPF_PROG(restrict_filesystems, struct file *file, int ret)
+{
+ unsigned long raw_magic_number;
+ uint64_t cgroup_id;
+ uint32_t *value, *magic_map, magic_number, zero = 0, *is_allow;
+
+ /* ret is the return value from the previous BPF program or 0 if it's
+ * the first hook */
+ if (ret != 0)
+ return ret;
+
+ BPF_CORE_READ_INTO(&raw_magic_number, file, f_inode, i_sb, s_magic);
+ /* super_block.s_magic is unsigned long, but magic_map keys are
+ * uint32_t. Using s_magic as-is would fail on big-endian systems,
+ * which have 64-bit unsigned long. So cast it. */
+ magic_number = (uint32_t)raw_magic_number;
+
+ cgroup_id = bpf_get_current_cgroup_id();
+
+ magic_map = bpf_map_lookup_elem(&cgroup_hash, &cgroup_id);
+ if (!magic_map)
+ return 0;
+
+ is_allow = bpf_map_lookup_elem(magic_map, &zero);
+ if (!is_allow)
+ /* Malformed map, it doesn't include whether it's an allow list
+ * or a deny list. Allow. */
+ return 0;
+
+ if (*is_allow) {
+ /* Allow-list: Allow access only if magic_number present in inner map */
+ if (!bpf_map_lookup_elem(magic_map, &magic_number))
+ return -EPERM;
+ } else {
+ /* Deny-list: Allow access only if magic_number is not present in inner map */
+ if (bpf_map_lookup_elem(magic_map, &magic_number))
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+static const char _license[] SEC("license") = "GPL";
diff --git a/src/core/bpf/restrict_ifaces/meson.build b/src/core/bpf/restrict_ifaces/meson.build
new file mode 100644
index 0000000..5f36178
--- /dev/null
+++ b/src/core/bpf/restrict_ifaces/meson.build
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+if conf.get('BPF_FRAMEWORK') != 1
+ subdir_done()
+endif
+
+restrict_ifaces_bpf_o_unstripped = custom_target(
+ 'restrict-ifaces.bpf.unstripped.o',
+ input : 'restrict-ifaces.bpf.c',
+ output : 'restrict-ifaces.bpf.unstripped.o',
+ command : bpf_o_unstripped_cmd)
+
+restrict_ifaces_bpf_o = custom_target(
+ 'restrict-ifaces.bpf.o',
+ input : restrict_ifaces_bpf_o_unstripped,
+ output : 'restrict-ifaces.bpf.o',
+ command : bpf_o_cmd)
+
+restrict_ifaces_skel_h = custom_target(
+ 'restrict-ifaces.skel.h',
+ input : restrict_ifaces_bpf_o,
+ output : 'restrict-ifaces.skel.h',
+ command : skel_h_cmd,
+ capture : true)
diff --git a/src/core/bpf/restrict_ifaces/restrict-ifaces-skel.h b/src/core/bpf/restrict_ifaces/restrict-ifaces-skel.h
new file mode 100644
index 0000000..f937490
--- /dev/null
+++ b/src/core/bpf/restrict_ifaces/restrict-ifaces-skel.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* The SPDX header above is actually correct in claiming this was
+ * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that
+ * compatible with GPL we will claim this to be GPL however, which should be
+ * fine given that LGPL-2.1-or-later downgrades to GPL if needed.
+ */
+
+/* libbpf is used via dlopen(), so rename symbols */
+#define bpf_object__open_skeleton sym_bpf_object__open_skeleton
+#define bpf_object__load_skeleton sym_bpf_object__load_skeleton
+#define bpf_object__destroy_skeleton sym_bpf_object__destroy_skeleton
+
+#include "bpf/restrict_ifaces/restrict-ifaces.skel.h"
diff --git a/src/core/bpf/restrict_ifaces/restrict-ifaces.bpf.c b/src/core/bpf/restrict_ifaces/restrict-ifaces.bpf.c
new file mode 100644
index 0000000..32cde5c
--- /dev/null
+++ b/src/core/bpf/restrict_ifaces/restrict-ifaces.bpf.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* <linux/bpf.h> must precede <bpf/bpf_helpers.h> due to integer types
+ * in bpf helpers signatures.
+ */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+const volatile __u8 is_allow_list = 0;
+
+/* Map containing the network interfaces indexes.
+ * The interpretation of the map depends on the value of is_allow_list.
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u8);
+} sd_restrictif SEC(".maps");
+
+#define DROP 0
+#define PASS 1
+
+static __always_inline int restrict_network_interfaces_impl(const struct __sk_buff *sk) {
+ __u32 zero = 0, ifindex;
+ __u8 *lookup_result;
+
+ ifindex = sk->ifindex;
+ lookup_result = bpf_map_lookup_elem(&sd_restrictif, &ifindex);
+ if (is_allow_list) {
+ /* allow-list: let the packet pass if iface in the list */
+ if (lookup_result)
+ return PASS;
+ } else {
+ /* deny-list: let the packet pass if iface *not* in the list */
+ if (!lookup_result)
+ return PASS;
+ }
+
+ return DROP;
+}
+
+SEC("cgroup_skb/egress")
+int sd_restrictif_e(const struct __sk_buff *sk) {
+ return restrict_network_interfaces_impl(sk);
+}
+
+SEC("cgroup_skb/ingress")
+int sd_restrictif_i(const struct __sk_buff *sk) {
+ return restrict_network_interfaces_impl(sk);
+}
+
+static const char _license[] SEC("license") = "LGPL-2.1-or-later";
diff --git a/src/core/bpf/socket_bind/meson.build b/src/core/bpf/socket_bind/meson.build
new file mode 100644
index 0000000..05a2b9d
--- /dev/null
+++ b/src/core/bpf/socket_bind/meson.build
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+if conf.get('BPF_FRAMEWORK') != 1
+ subdir_done()
+endif
+
+socket_bind_bpf_o_unstripped = custom_target(
+ 'socket-bind.bpf.unstripped.o',
+ input : 'socket-bind.bpf.c',
+ output : 'socket-bind.bpf.unstripped.o',
+ command : bpf_o_unstripped_cmd)
+
+socket_bind_bpf_o = custom_target(
+ 'socket-bind.bpf.o',
+ input : socket_bind_bpf_o_unstripped,
+ output : 'socket-bind.bpf.o',
+ command : bpf_o_cmd)
+
+socket_bind_skel_h = custom_target(
+ 'socket-bind.skel.h',
+ input : socket_bind_bpf_o,
+ output : 'socket-bind.skel.h',
+ command : skel_h_cmd,
+ capture : true)
diff --git a/src/core/bpf/socket_bind/socket-bind-api.bpf.h b/src/core/bpf/socket_bind/socket-bind-api.bpf.h
new file mode 100644
index 0000000..277b9bb
--- /dev/null
+++ b/src/core/bpf/socket_bind/socket-bind-api.bpf.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* The SPDX header above is actually correct in claiming this was
+ * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that
+ * compatible with GPL we will claim this to be GPL however, which should be
+ * fine given that LGPL-2.1-or-later downgrades to GPL if needed.
+ */
+
+#include <linux/types.h>
+
+/*
+ * Bind rule is matched with socket fields accessible to cgroup/bind{4,6} hook
+ * through bpf_sock_addr struct.
+ * 'address_family' is expected to be one of AF_UNSPEC, AF_INET or AF_INET6.
+ * Matching by family is bypassed for rules with AF_UNSPEC set, which makes the
+ * rest of a rule applicable for both IPv4 and IPv6 addresses.
+ * If matching by family is either successful or bypassed, a rule and a socket
+ * are matched by ip protocol.
+ * If 'protocol' is 0, matching is bypassed.
+ * 'nr_ports' and 'port_min' fields specify a set of ports to match a user port
+ * with.
+ * If 'nr_ports' is 0, matching by port is bypassed, making that rule applicable
+ * for all possible ports, e.g. [1, 65535] range. Thus a rule with
+ * 'address_family', 'protocol' and 'nr_ports' equal to AF_UNSPEC, 0 and 0
+ * correspondingly forms 'allow any' or 'deny any' cases.
+ * For positive 'nr_ports', a user_port lying in a range from 'port_min' to'
+ * 'port_min' + 'nr_ports' exclusively is considered to be a match. 'nr_ports'
+ * equalling to 1 forms a rule for a single port.
+ * Ports are in host order.
+ *
+ * Examples:
+ * AF_UNSPEC, 1, 0, 7777: match IPv4 and IPv6 addresses with 7777 user port;
+ *
+ * AF_INET, 1023, 0, 1: match IPv4 addresses with user port in [1, 1023]
+ * range inclusively;
+ *
+ * AF_INET6, 0, 0, 0: match IPv6 addresses;
+ *
+ * AF_UNSPEC, 0, 0, 0: match IPv4 and IPv6 addresses;
+ *
+ * AF_INET6, IPPROTO_TCP, 0, 0: match IPv6/TCP addresses.
+ */
+
+struct socket_bind_rule {
+ __u32 address_family;
+ __u32 protocol;
+ __u16 nr_ports;
+ __u16 port_min;
+};
+
+#define SOCKET_BIND_MAX_RULES 128
diff --git a/src/core/bpf/socket_bind/socket-bind-skel.h b/src/core/bpf/socket_bind/socket-bind-skel.h
new file mode 100644
index 0000000..e0d1626
--- /dev/null
+++ b/src/core/bpf/socket_bind/socket-bind-skel.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* The SPDX header above is actually correct in claiming this was
+ * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that
+ * compatible with GPL we will claim this to be GPL however, which should be
+ * fine given that LGPL-2.1-or-later downgrades to GPL if needed.
+ */
+
+/* libbpf is used via dlopen(), so rename symbols */
+#define bpf_object__open_skeleton sym_bpf_object__open_skeleton
+#define bpf_object__load_skeleton sym_bpf_object__load_skeleton
+#define bpf_object__destroy_skeleton sym_bpf_object__destroy_skeleton
+
+#include "bpf/socket_bind/socket-bind.skel.h"
diff --git a/src/core/bpf/socket_bind/socket-bind.bpf.c b/src/core/bpf/socket_bind/socket-bind.bpf.c
new file mode 100644
index 0000000..b7972a8
--- /dev/null
+++ b/src/core/bpf/socket_bind/socket-bind.bpf.c
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+/* The SPDX header above is actually correct in claiming this was
+ * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that
+ * compatible with GPL we will claim this to be GPL however, which should be
+ * fine given that LGPL-2.1-or-later downgrades to GPL if needed.
+ */
+
+#include "socket-bind-api.bpf.h"
+/* <linux/types.h> must precede <bpf/bpf_helpers.h> due to
+ * <bpf/bpf_helpers.h> does not depend from type header by design.
+ */
+#include <linux/types.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+
+/*
+ * max_entries is set from user space with bpf_map__set_max_entries helper.
+ */
+struct socket_bind_map_t {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct socket_bind_rule);
+};
+
+enum socket_bind_action {
+ SOCKET_BIND_DENY = 0,
+ SOCKET_BIND_ALLOW = 1,
+};
+
+struct socket_bind_map_t sd_bind_allow SEC(".maps");
+struct socket_bind_map_t sd_bind_deny SEC(".maps");
+
+static __always_inline bool match_af(
+ __u8 address_family, const struct socket_bind_rule *r) {
+ return r->address_family == AF_UNSPEC || address_family == r->address_family;
+}
+
+static __always_inline bool match_protocol(
+ __u32 protocol, const struct socket_bind_rule *r) {
+ return r->protocol == 0 || r->protocol == protocol;
+}
+
+static __always_inline bool match_user_port(
+ __u16 port, const struct socket_bind_rule *r) {
+ return r->nr_ports == 0 ||
+ (port >= r->port_min && port < r->port_min + (__u32) r->nr_ports);
+}
+
+static __always_inline bool match(
+ __u8 address_family,
+ __u32 protocol,
+ __u16 port,
+ const struct socket_bind_rule *r) {
+ return match_af(address_family, r) &&
+ match_protocol(protocol, r) &&
+ match_user_port(port, r);
+}
+
+static __always_inline bool match_rules(
+ struct bpf_sock_addr *ctx,
+ struct socket_bind_map_t *rules) {
+ volatile __u32 user_port = ctx->user_port;
+ __u16 port = (__u16)bpf_ntohs(user_port);
+
+ for (__u32 i = 0; i < SOCKET_BIND_MAX_RULES; ++i) {
+ const __u32 key = i;
+ const struct socket_bind_rule *rule = bpf_map_lookup_elem(rules, &key);
+
+ /* Lookup returns NULL if iterator is advanced past the last
+ * element put in the map. */
+ if (!rule)
+ break;
+
+ if (match(ctx->user_family, ctx->protocol, port, rule))
+ return true;
+ }
+
+ return false;
+}
+
+static __always_inline int bind_socket(struct bpf_sock_addr *ctx) {
+ if (match_rules(ctx, &sd_bind_allow))
+ return SOCKET_BIND_ALLOW;
+
+ if (match_rules(ctx, &sd_bind_deny))
+ return SOCKET_BIND_DENY;
+
+ return SOCKET_BIND_ALLOW;
+}
+
+SEC("cgroup/bind4")
+int sd_bind4(struct bpf_sock_addr *ctx) {
+ if (ctx->user_family != AF_INET || ctx->family != AF_INET)
+ return SOCKET_BIND_ALLOW;
+
+ return bind_socket(ctx);
+}
+
+SEC("cgroup/bind6")
+int sd_bind6(struct bpf_sock_addr *ctx) {
+ if (ctx->user_family != AF_INET6 || ctx->family != AF_INET6)
+ return SOCKET_BIND_ALLOW;
+
+ return bind_socket(ctx);
+}
+
+char _license[] SEC("license") = "GPL";