From efeb864cb547a2cbf96dc0053a8bdb4d9190b364 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 12 Jun 2024 05:50:45 +0200
Subject: Merging upstream version 256.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/core/automount.c                  |   75 +-
 src/core/bpf-devices.c                |   82 +-
 src/core/bpf-firewall.c               |  160 ++--
 src/core/bpf-foreign.c                |   21 +-
 src/core/bpf-lsm.c                    |  320 -------
 src/core/bpf-lsm.h                    |   28 -
 src/core/bpf-restrict-fs.c            |  324 +++++++
 src/core/bpf-restrict-fs.h            |   23 +
 src/core/bpf-restrict-ifaces.c        |  223 +++++
 src/core/bpf-restrict-ifaces.h        |   16 +
 src/core/bpf-socket-bind.c            |   51 +-
 src/core/bpf-socket-bind.h            |    2 +-
 src/core/bpf-util.c                   |    3 +-
 src/core/cgroup.c                     | 1646 ++++++++++++++++++++++++++-------
 src/core/cgroup.h                     |  132 ++-
 src/core/core-varlink.c               |  105 ++-
 src/core/core-varlink.h               |    4 -
 src/core/crash-handler.c              |    8 +-
 src/core/dbus-cgroup.c                |   21 +-
 src/core/dbus-execute.c               |  117 +--
 src/core/dbus-execute.h               |    1 +
 src/core/dbus-job.c                   |   25 +-
 src/core/dbus-manager.c               |  407 +++++---
 src/core/dbus-mount.c                 |   31 +-
 src/core/dbus-scope.c                 |   24 +-
 src/core/dbus-service.c               |    2 -
 src/core/dbus-socket.c                |    4 +
 src/core/dbus-unit.c                  |  158 ++--
 src/core/dbus-util.c                  |    7 +-
 src/core/dbus-util.h                  |    3 +-
 src/core/dbus.c                       |   92 +-
 src/core/device.c                     |   75 +-
 src/core/dynamic-user.c               |   49 +-
 src/core/emergency-action.c           |   32 +-
 src/core/emergency-action.h           |    6 +-
 src/core/exec-credential.c            |  256 ++---
 src/core/exec-credential.h            |    4 +-
 src/core/exec-invoke.c                |  649 +++++++------
 src/core/execute-serialize.c          |  131 +--
 src/core/execute.c                    |  239 +++--
 src/core/execute.h                    |  175 ++--
 src/core/executor.c                   |    5 +-
 src/core/fuzz-execute-serialize.c     |    2 +-
 src/core/generator-setup.c            |   12 +-
 src/core/import-creds.c               |   17 +-
 src/core/job.c                        |   42 +-
 src/core/job.h                        |    1 +
 src/core/kmod-setup.c                 |   48 +-
 src/core/load-fragment-gperf.gperf.in |   15 +-
 src/core/load-fragment.c              |  353 +++----
 src/core/load-fragment.h              |    4 +-
 src/core/main.c                       |  246 ++++-
 src/core/main.h                       |   14 +-
 src/core/manager-dump.c               |    2 +-
 src/core/manager-serialize.c          |   97 +-
 src/core/manager.c                    |  671 ++++++++------
 src/core/manager.h                    |   63 +-
 src/core/meson.build                  |    7 +-
 src/core/mount.c                      |  353 ++++---
 src/core/mount.h                      |    1 +
 src/core/namespace.c                  |  333 ++++---
 src/core/path.c                       |   81 +-
 src/core/restrict-ifaces.c            |  200 ----
 src/core/restrict-ifaces.h            |   16 -
 src/core/scope.c                      |   95 +-
 src/core/scope.h                      |    1 +
 src/core/selinux-access.c             |    5 +-
 src/core/service.c                    |  787 ++++++++--------
 src/core/service.h                    |    4 +
 src/core/show-status.c                |    4 +-
 src/core/slice.c                      |  147 ++-
 src/core/slice.h                      |    2 +
 src/core/socket.c                     |  382 ++++----
 src/core/socket.h                     |    4 +-
 src/core/swap.c                       |  257 +++--
 src/core/swap.h                       |    1 +
 src/core/system.conf.in               |    3 +-
 src/core/taint.c                      |   85 ++
 src/core/taint.h                      |    4 +
 src/core/target.c                     |   57 +-
 src/core/timer.c                      |   89 +-
 src/core/transaction.c                |    8 +-
 src/core/unit-printf.c                |   59 +-
 src/core/unit-serialize.c             |  279 +-----
 src/core/unit.c                       | 1264 +++++++++++++------------
 src/core/unit.h                       |  166 ++--
 86 files changed, 6932 insertions(+), 5085 deletions(-)
 delete mode 100644 src/core/bpf-lsm.c
 delete mode 100644 src/core/bpf-lsm.h
 create mode 100644 src/core/bpf-restrict-fs.c
 create mode 100644 src/core/bpf-restrict-fs.h
 create mode 100644 src/core/bpf-restrict-ifaces.c
 create mode 100644 src/core/bpf-restrict-ifaces.h
 delete mode 100644 src/core/restrict-ifaces.c
 delete mode 100644 src/core/restrict-ifaces.h
 create mode 100644 src/core/taint.c
 create mode 100644 src/core/taint.h

(limited to 'src/core')

diff --git a/src/core/automount.c b/src/core/automount.c
index 14bf7e6..6cb9d52 100644
--- a/src/core/automount.c
+++ b/src/core/automount.c
@@ -38,10 +38,10 @@
 #include "unit.h"
 
 static const UnitActiveState state_translation_table[_AUTOMOUNT_STATE_MAX] = {
-        [AUTOMOUNT_DEAD] = UNIT_INACTIVE,
+        [AUTOMOUNT_DEAD]    = UNIT_INACTIVE,
         [AUTOMOUNT_WAITING] = UNIT_ACTIVE,
         [AUTOMOUNT_RUNNING] = UNIT_ACTIVE,
-        [AUTOMOUNT_FAILED] = UNIT_FAILED
+        [AUTOMOUNT_FAILED]  = UNIT_FAILED,
 };
 
 static int open_dev_autofs(Manager *m);
@@ -51,10 +51,8 @@ static void automount_stop_expire(Automount *a);
 static int automount_send_ready(Automount *a, Set *tokens, int status);
 
 static void automount_init(Unit *u) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
 
-        assert(a);
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         a->pipe_fd = -EBADF;
@@ -88,9 +86,7 @@ static void unmount_autofs(Automount *a) {
 }
 
 static void automount_done(Unit *u) {
-        Automount *a = AUTOMOUNT(u);
-
-        assert(a);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
 
         unmount_autofs(a);
 
@@ -126,7 +122,7 @@ static int automount_add_mount_dependencies(Automount *a) {
         if (r < 0)
                 return r;
 
-        return unit_require_mounts_for(UNIT(a), parent, UNIT_DEPENDENCY_IMPLICIT);
+        return unit_add_mounts_for(UNIT(a), parent, UNIT_DEPENDENCY_IMPLICIT, UNIT_MOUNT_REQUIRES);
 }
 
 static int automount_add_default_dependencies(Automount *a) {
@@ -227,10 +223,9 @@ static int automount_add_extras(Automount *a) {
 }
 
 static int automount_load(Unit *u) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
         int r;
 
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         /* Load a .automount file */
@@ -250,6 +245,7 @@ static int automount_load(Unit *u) {
 
 static void automount_set_state(Automount *a, AutomountState state) {
         AutomountState old_state;
+
         assert(a);
 
         if (a->state != state)
@@ -271,10 +267,9 @@ static void automount_set_state(Automount *a, AutomountState state) {
 }
 
 static int automount_coldplug(Unit *u) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
         int r;
 
-        assert(a);
         assert(a->state == AUTOMOUNT_DEAD);
 
         if (a->deserialized_state == a->state)
@@ -310,9 +305,7 @@ static int automount_coldplug(Unit *u) {
 }
 
 static void automount_dump(Unit *u, FILE *f, const char *prefix) {
-        Automount *a = AUTOMOUNT(u);
-
-        assert(a);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
 
         fprintf(f,
                 "%sAutomount State: %s\n"
@@ -478,30 +471,22 @@ static int automount_send_ready(Automount *a, Set *tokens, int status) {
         r = 0;
 
         /* Autofs thankfully does not hand out 0 as a token */
-        while ((token = PTR_TO_UINT(set_steal_first(tokens)))) {
-                int k;
-
+        while ((token = PTR_TO_UINT(set_steal_first(tokens))))
                 /* Autofs fun fact:
                  *
-                 * if you pass a positive status code here, kernels
-                 * prior to 4.12 will freeze! Yay! */
-
-                k = autofs_send_ready(UNIT(a)->manager->dev_autofs_fd,
-                                      ioctl_fd,
-                                      token,
-                                      status);
-                if (k < 0)
-                        r = k;
-        }
+                 * if you pass a positive status code here, kernels prior to 4.12 will freeze! Yay! */
+                RET_GATHER(r, autofs_send_ready(UNIT(a)->manager->dev_autofs_fd,
+                                                ioctl_fd,
+                                                token,
+                                                status));
 
         return r;
 }
 
 static void automount_trigger_notify(Unit *u, Unit *other) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
         int r;
 
-        assert(a);
         assert(other);
 
         /* Filter out invocations with bogus state */
@@ -697,11 +682,10 @@ static int asynchronous_expire(int dev_autofs_fd, int ioctl_fd) {
 }
 
 static int automount_dispatch_expire(sd_event_source *source, usec_t usec, void *userdata) {
+        Automount *a = ASSERT_PTR(AUTOMOUNT(userdata));
         _cleanup_close_ int ioctl_fd = -EBADF;
-        Automount *a = AUTOMOUNT(userdata);
         int r;
 
-        assert(a);
         assert(source == a->expire_event_source);
 
         ioctl_fd = open_ioctl_fd(UNIT(a)->manager->dev_autofs_fd, a->where, a->dev_id);
@@ -815,13 +799,12 @@ fail:
 }
 
 static int automount_start(Unit *u) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
         int r;
 
-        assert(a);
         assert(IN_SET(a->state, AUTOMOUNT_DEAD, AUTOMOUNT_FAILED));
 
-        if (path_is_mount_point(a->where, NULL, 0) > 0)
+        if (path_is_mount_point(a->where) > 0)
                 return log_unit_error_errno(u, SYNTHETIC_ERRNO(EEXIST), "Path %s is already a mount point, refusing start.", a->where);
 
         r = unit_test_trigger_loaded(u);
@@ -838,9 +821,8 @@ static int automount_start(Unit *u) {
 }
 
 static int automount_stop(Unit *u) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
 
-        assert(a);
         assert(IN_SET(a->state, AUTOMOUNT_WAITING, AUTOMOUNT_RUNNING));
 
         automount_enter_dead(a, AUTOMOUNT_SUCCESS);
@@ -848,11 +830,10 @@ static int automount_stop(Unit *u) {
 }
 
 static int automount_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
         void *p;
         int r;
 
-        assert(a);
         assert(f);
         assert(fds);
 
@@ -873,10 +854,9 @@ static int automount_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int automount_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
         int r;
 
-        assert(a);
         assert(fds);
 
         if (streq(key, "state")) {
@@ -958,13 +938,12 @@ static bool automount_may_gc(Unit *u) {
 }
 
 static int automount_dispatch_io(sd_event_source *s, int fd, uint32_t events, void *userdata) {
+        Automount *a = ASSERT_PTR(AUTOMOUNT(userdata));
         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
         union autofs_v5_packet_union packet;
-        Automount *a = AUTOMOUNT(userdata);
         Unit *trigger;
         int r;
 
-        assert(a);
         assert(fd == a->pipe_fd);
 
         if (events & (EPOLLHUP|EPOLLERR)) {
@@ -1048,9 +1027,7 @@ static void automount_shutdown(Manager *m) {
 }
 
 static void automount_reset_failed(Unit *u) {
-        Automount *a = AUTOMOUNT(u);
-
-        assert(a);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
 
         if (a->state == AUTOMOUNT_FAILED)
                 automount_set_state(a, AUTOMOUNT_DEAD);
@@ -1068,11 +1045,9 @@ static bool automount_supported(void) {
 }
 
 static int automount_can_start(Unit *u) {
-        Automount *a = AUTOMOUNT(u);
+        Automount *a = ASSERT_PTR(AUTOMOUNT(u));
         int r;
 
-        assert(a);
-
         r = unit_test_start_limit(u);
         if (r < 0) {
                 automount_enter_dead(a, AUTOMOUNT_FAILURE_START_LIMIT_HIT);
diff --git a/src/core/bpf-devices.c b/src/core/bpf-devices.c
index 06d2146..8484dbc 100644
--- a/src/core/bpf-devices.c
+++ b/src/core/bpf-devices.c
@@ -24,15 +24,15 @@ assert_cc((unsigned) BPF_DEVCG_ACC_WRITE == (unsigned) CGROUP_DEVICE_WRITE);
 static int bpf_prog_allow_list_device(
                 BPFProgram *prog,
                 char type,
-                int major,
-                int minor,
+                unsigned major,
+                unsigned minor,
                 CGroupDevicePermissions p) {
 
         int r;
 
         assert(prog);
 
-        log_trace("%s: %c %d:%d %s", __func__, type, major, minor, cgroup_device_permissions_to_string(p));
+        log_trace("%s: %c %u:%u %s", __func__, type, major, minor, cgroup_device_permissions_to_string(p));
 
         if (p <= 0 || p >= _CGROUP_DEVICE_PERMISSIONS_MAX)
                 return -EINVAL;
@@ -56,22 +56,22 @@ static int bpf_prog_allow_list_device(
         else
                 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
         if (r < 0)
-                log_error_errno(r, "Extending device control BPF program failed: %m");
+                return log_error_errno(r, "Extending device control BPF program failed: %m");
 
-        return r;
+        return 1; /* return 1 → we did something */
 }
 
 static int bpf_prog_allow_list_major(
                 BPFProgram *prog,
                 char type,
-                int major,
+                unsigned major,
                 CGroupDevicePermissions p) {
 
         int r;
 
         assert(prog);
 
-        log_trace("%s: %c %d:* %s", __func__, type, major, cgroup_device_permissions_to_string(p));
+        log_trace("%s: %c %u:* %s", __func__, type, major, cgroup_device_permissions_to_string(p));
 
         if (p <= 0 || p >= _CGROUP_DEVICE_PERMISSIONS_MAX)
                 return -EINVAL;
@@ -94,9 +94,9 @@ static int bpf_prog_allow_list_major(
         else
                 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
         if (r < 0)
-                log_error_errno(r, "Extending device control BPF program failed: %m");
+                return log_error_errno(r, "Extending device control BPF program failed: %m");
 
-        return r;
+        return 1; /* return 1 → we did something */
 }
 
 static int bpf_prog_allow_list_class(
@@ -130,9 +130,9 @@ static int bpf_prog_allow_list_class(
         else
                 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
         if (r < 0)
-                log_error_errno(r, "Extending device control BPF program failed: %m");
+                return log_error_errno(r, "Extending device control BPF program failed: %m");
 
-        return r;
+        return 1; /* return 1 → we did something */
 }
 
 int bpf_devices_cgroup_init(
@@ -165,8 +165,10 @@ int bpf_devices_cgroup_init(
 
         assert(ret);
 
-        if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list)
+        if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list) {
+                *ret = NULL;
                 return 0;
+        }
 
         r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &prog);
         if (r < 0)
@@ -179,8 +181,7 @@ int bpf_devices_cgroup_init(
         }
 
         *ret = TAKE_PTR(prog);
-
-        return 0;
+        return 1;
 }
 
 int bpf_devices_apply_policy(
@@ -307,8 +308,8 @@ static int allow_list_device_pattern(
                 BPFProgram *prog,
                 const char *path,
                 char type,
-                const unsigned *maj,
-                const unsigned *min,
+                unsigned major,
+                unsigned minor,
                 CGroupDevicePermissions p) {
 
         assert(IN_SET(type, 'b', 'c'));
@@ -317,10 +318,10 @@ static int allow_list_device_pattern(
                 if (!prog)
                         return 0;
 
-                if (maj && min)
-                        return bpf_prog_allow_list_device(prog, type, *maj, *min, p);
-                else if (maj)
-                        return bpf_prog_allow_list_major(prog, type, *maj, p);
+                if (major != UINT_MAX && minor != UINT_MAX)
+                        return bpf_prog_allow_list_device(prog, type, major, minor, p);
+                else if (major != UINT_MAX)
+                        return bpf_prog_allow_list_major(prog, type, major, p);
                 else
                         return bpf_prog_allow_list_class(prog, type, p);
 
@@ -328,10 +329,10 @@ static int allow_list_device_pattern(
                 char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
                 int r;
 
-                if (maj && min)
-                        xsprintf(buf, "%c %u:%u %s", type, *maj, *min, cgroup_device_permissions_to_string(p));
-                else if (maj)
-                        xsprintf(buf, "%c %u:* %s", type, *maj, cgroup_device_permissions_to_string(p));
+                if (major != UINT_MAX && minor != UINT_MAX)
+                        xsprintf(buf, "%c %u:%u %s", type, major, minor, cgroup_device_permissions_to_string(p));
+                else if (major != UINT_MAX)
+                        xsprintf(buf, "%c %u:* %s", type, major, cgroup_device_permissions_to_string(p));
                 else
                         xsprintf(buf, "%c *:* %s", type, cgroup_device_permissions_to_string(p));
 
@@ -371,8 +372,14 @@ int bpf_devices_allow_list_device(
                         return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
 
                 struct stat st;
-                if (stat(node, &st) < 0)
+                if (stat(node, &st) < 0) {
+                        if (errno == ENOENT) {
+                                log_debug_errno(errno, "Device '%s' does not exist, skipping.", node);
+                                return 0; /* returning 0 means → skipped */
+                        }
+
                         return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
+                }
 
                 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
                         return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
@@ -381,8 +388,7 @@ int bpf_devices_allow_list_device(
                 rdev = (dev_t) st.st_rdev;
         }
 
-        unsigned maj = major(rdev), min = minor(rdev);
-        return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, p);
+        return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', major(rdev), minor(rdev), p);
 }
 
 int bpf_devices_allow_list_major(
@@ -392,7 +398,7 @@ int bpf_devices_allow_list_major(
                 char type,
                 CGroupDevicePermissions permissions) {
 
-        unsigned maj;
+        unsigned major;
         int r;
 
         assert(path);
@@ -401,12 +407,12 @@ int bpf_devices_allow_list_major(
 
         if (streq(name, "*"))
                 /* If the name is a wildcard, then apply this list to all devices of this type */
-                return allow_list_device_pattern(prog, path, type, NULL, NULL, permissions);
+                return allow_list_device_pattern(prog, path, type, /* major= */ UINT_MAX, /* minor= */ UINT_MAX, permissions);
 
-        if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
+        if (safe_atou(name, &major) >= 0 && DEVICE_MAJOR_VALID(major))
                 /* The name is numeric and suitable as major. In that case, let's take its major, and create
                  * the entry directly. */
-                return allow_list_device_pattern(prog, path, type, &maj, NULL, permissions);
+                return allow_list_device_pattern(prog, path, type, major, /* minor= */ UINT_MAX, permissions);
 
         _cleanup_fclose_ FILE *f = NULL;
         bool good = false, any = false;
@@ -450,10 +456,10 @@ int bpf_devices_allow_list_major(
                         continue;
                 *w = 0;
 
-                r = safe_atou(p, &maj);
+                r = safe_atou(p, &major);
                 if (r < 0)
                         continue;
-                if (maj <= 0)
+                if (major <= 0)
                         continue;
 
                 w++;
@@ -462,15 +468,15 @@ int bpf_devices_allow_list_major(
                 if (fnmatch(name, w, 0) != 0)
                         continue;
 
-                any = true;
-                (void) allow_list_device_pattern(prog, path, type, &maj, NULL, permissions);
+                if (allow_list_device_pattern(prog, path, type, major, /* minor= */ UINT_MAX, permissions) > 0)
+                        any = true;
         }
 
         if (!any)
                 return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
                                        "Device allow list pattern \"%s\" did not match anything.", name);
 
-        return 0;
+        return any;
 }
 
 int bpf_devices_allow_list_static(
@@ -492,13 +498,13 @@ int bpf_devices_allow_list_static(
 
         NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
                 k = bpf_devices_allow_list_device(prog, path, node, cgroup_device_permissions_from_string(acc));
-                if (r >= 0 && k < 0)
+                if ((r >= 0 && k < 0) || (r >= 0 && k > 0))
                         r = k;
         }
 
         /* PTS (/dev/pts) devices may not be duplicated, but accessed */
         k = bpf_devices_allow_list_major(prog, path, "pts", 'c', CGROUP_DEVICE_READ|CGROUP_DEVICE_WRITE);
-        if (r >= 0 && k < 0)
+        if ((r >= 0 && k < 0) || (r >= 0 && k > 0))
                 r = k;
 
         return r;
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
index 66773e1..185ed7d 100644
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@@ -1,12 +1,13 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
+/* Make sure the net/if.h header is included before any linux/ one */
+#include <net/if.h>
 #include <arpa/inet.h>
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <linux/bpf_insn.h>
 #include <net/ethernet.h>
-#include <net/if.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <stddef.h>
@@ -196,19 +197,26 @@ static int bpf_firewall_compile_bpf(
         _cleanup_(bpf_program_freep) BPFProgram *p = NULL;
         int accounting_map_fd, r;
         bool access_enabled;
+        CGroupRuntime *crt;
 
         assert(u);
         assert(ret);
 
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt) {
+                *ret = NULL;
+                return 0;
+        }
+
         accounting_map_fd = is_ingress ?
-                u->ip_accounting_ingress_map_fd :
-                u->ip_accounting_egress_map_fd;
+                crt->ip_accounting_ingress_map_fd :
+                crt->ip_accounting_egress_map_fd;
 
         access_enabled =
-                u->ipv4_allow_map_fd >= 0 ||
-                u->ipv6_allow_map_fd >= 0 ||
-                u->ipv4_deny_map_fd >= 0 ||
-                u->ipv6_deny_map_fd >= 0 ||
+                crt->ipv4_allow_map_fd >= 0 ||
+                crt->ipv6_allow_map_fd >= 0 ||
+                crt->ipv4_deny_map_fd >= 0 ||
+                crt->ipv6_deny_map_fd >= 0 ||
                 ip_allow_any ||
                 ip_deny_any;
 
@@ -234,26 +242,26 @@ static int bpf_firewall_compile_bpf(
                  * - Otherwise, access will be granted
                  */
 
-                if (u->ipv4_deny_map_fd >= 0) {
-                        r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
+                if (crt->ipv4_deny_map_fd >= 0) {
+                        r = add_lookup_instructions(p, crt->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
                         if (r < 0)
                                 return r;
                 }
 
-                if (u->ipv6_deny_map_fd >= 0) {
-                        r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
+                if (crt->ipv6_deny_map_fd >= 0) {
+                        r = add_lookup_instructions(p, crt->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
                         if (r < 0)
                                 return r;
                 }
 
-                if (u->ipv4_allow_map_fd >= 0) {
-                        r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
+                if (crt->ipv4_allow_map_fd >= 0) {
+                        r = add_lookup_instructions(p, crt->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
                         if (r < 0)
                                 return r;
                 }
 
-                if (u->ipv6_allow_map_fd >= 0) {
-                        r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
+                if (crt->ipv6_allow_map_fd >= 0) {
+                        r = add_lookup_instructions(p, crt->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
                         if (r < 0)
                                 return r;
                 }
@@ -495,37 +503,36 @@ static int bpf_firewall_prepare_access_maps(
         return 0;
 }
 
-static int bpf_firewall_prepare_accounting_maps(Unit *u, bool enabled, int *fd_ingress, int *fd_egress) {
+static int bpf_firewall_prepare_accounting_maps(Unit *u, bool enabled, CGroupRuntime *crt) {
         int r;
 
         assert(u);
-        assert(fd_ingress);
-        assert(fd_egress);
+        assert(crt);
 
         if (enabled) {
-                if (*fd_ingress < 0) {
+                if (crt->ip_accounting_ingress_map_fd < 0) {
                         char *name = strjoina("I_", u->id);
                         r = bpf_map_new(name, BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
                         if (r < 0)
                                 return r;
 
-                        *fd_ingress = r;
+                        crt->ip_accounting_ingress_map_fd = r;
                 }
 
-                if (*fd_egress < 0) {
+                if (crt->ip_accounting_egress_map_fd < 0) {
                         char *name = strjoina("E_", u->id);
                         r = bpf_map_new(name, BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
                         if (r < 0)
                                 return r;
 
-                        *fd_egress = r;
+                        crt->ip_accounting_egress_map_fd = r;
                 }
 
         } else {
-                *fd_ingress = safe_close(*fd_ingress);
-                *fd_egress = safe_close(*fd_egress);
+                crt->ip_accounting_ingress_map_fd = safe_close(crt->ip_accounting_ingress_map_fd);
+                crt->ip_accounting_egress_map_fd = safe_close(crt->ip_accounting_egress_map_fd);
 
-                zero(u->ip_accounting_extra);
+                zero(crt->ip_accounting_extra);
         }
 
         return 0;
@@ -535,6 +542,7 @@ int bpf_firewall_compile(Unit *u) {
         const char *ingress_name = NULL, *egress_name = NULL;
         bool ip_allow_any = false, ip_deny_any = false;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r, supported;
 
         assert(u);
@@ -543,6 +551,10 @@ int bpf_firewall_compile(Unit *u) {
         if (!cc)
                 return -EINVAL;
 
+        crt = unit_setup_cgroup_runtime(u);
+        if (!crt)
+                return -ENOMEM;
+
         supported = bpf_firewall_supported();
         if (supported < 0)
                 return supported;
@@ -569,14 +581,14 @@ int bpf_firewall_compile(Unit *u) {
          * but we reuse the accounting maps. That way the firewall in effect always maps to the actual
          * configuration, but we don't flush out the accounting unnecessarily */
 
-        u->ip_bpf_ingress = bpf_program_free(u->ip_bpf_ingress);
-        u->ip_bpf_egress = bpf_program_free(u->ip_bpf_egress);
+        crt->ip_bpf_ingress = bpf_program_free(crt->ip_bpf_ingress);
+        crt->ip_bpf_egress = bpf_program_free(crt->ip_bpf_egress);
 
-        u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
-        u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+        crt->ipv4_allow_map_fd = safe_close(crt->ipv4_allow_map_fd);
+        crt->ipv4_deny_map_fd = safe_close(crt->ipv4_deny_map_fd);
 
-        u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
-        u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+        crt->ipv6_allow_map_fd = safe_close(crt->ipv6_allow_map_fd);
+        crt->ipv6_deny_map_fd = safe_close(crt->ipv6_deny_map_fd);
 
         if (u->type != UNIT_SLICE) {
                 /* In inner nodes we only do accounting, we do not actually bother with access control. However, leaf
@@ -585,24 +597,24 @@ int bpf_firewall_compile(Unit *u) {
                  * means that all configure IP access rules *will* take effect on processes, even though we never
                  * compile them for inner nodes. */
 
-                r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd, &ip_allow_any);
+                r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &crt->ipv4_allow_map_fd, &crt->ipv6_allow_map_fd, &ip_allow_any);
                 if (r < 0)
                         return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF allow maps failed: %m");
 
-                r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd, &ip_deny_any);
+                r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &crt->ipv4_deny_map_fd, &crt->ipv6_deny_map_fd, &ip_deny_any);
                 if (r < 0)
                         return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF deny maps failed: %m");
         }
 
-        r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
+        r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, crt);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF accounting maps failed: %m");
 
-        r = bpf_firewall_compile_bpf(u, ingress_name, true, &u->ip_bpf_ingress, ip_allow_any, ip_deny_any);
+        r = bpf_firewall_compile_bpf(u, ingress_name, true, &crt->ip_bpf_ingress, ip_allow_any, ip_deny_any);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-firewall: Compilation of ingress BPF program failed: %m");
 
-        r = bpf_firewall_compile_bpf(u, egress_name, false, &u->ip_bpf_egress, ip_allow_any, ip_deny_any);
+        r = bpf_firewall_compile_bpf(u, egress_name, false, &crt->ip_bpf_egress, ip_allow_any, ip_deny_any);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-firewall: Compilation of egress BPF program failed: %m");
 
@@ -634,6 +646,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set
 
 int bpf_firewall_load_custom(Unit *u) {
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r, supported;
 
         assert(u);
@@ -641,6 +654,9 @@ int bpf_firewall_load_custom(Unit *u) {
         cc = unit_get_cgroup_context(u);
         if (!cc)
                 return 0;
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
 
         if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
                 return 0;
@@ -653,10 +669,10 @@ int bpf_firewall_load_custom(Unit *u) {
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
                                             "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs.");
 
-        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
+        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &crt->ip_bpf_custom_ingress);
         if (r < 0)
                 return r;
-        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
+        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &crt->ip_bpf_custom_egress);
         if (r < 0)
                 return r;
 
@@ -686,6 +702,7 @@ int bpf_firewall_install(Unit *u) {
         _cleanup_(bpf_program_freep) BPFProgram *ip_bpf_ingress_uninstall = NULL, *ip_bpf_egress_uninstall = NULL;
         _cleanup_free_ char *path = NULL;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r, supported;
         uint32_t flags;
 
@@ -694,9 +711,12 @@ int bpf_firewall_install(Unit *u) {
         cc = unit_get_cgroup_context(u);
         if (!cc)
                 return -EINVAL;
-        if (!u->cgroup_path)
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -EINVAL;
+        if (!crt->cgroup_path)
                 return -EINVAL;
-        if (!u->cgroup_realized)
+        if (!crt->cgroup_realized)
                 return -EINVAL;
 
         supported = bpf_firewall_supported();
@@ -709,11 +729,11 @@ int bpf_firewall_install(Unit *u) {
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
                                             "bpf-firewall: BPF_F_ALLOW_MULTI not supported, not doing BPF firewall on slice units.");
         if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
-            (!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
+            (!set_isempty(crt->ip_bpf_custom_ingress) || !set_isempty(crt->ip_bpf_custom_egress)))
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
                                             "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs.");
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-firewall: Failed to determine cgroup path: %m");
 
@@ -724,44 +744,44 @@ int bpf_firewall_install(Unit *u) {
                  * after attaching the new programs, so that there's no time window where neither program is
                  * attached. (There will be a program where both are attached, but that's OK, since this is a
                  * security feature where we rather want to lock down too much than too little */
-                ip_bpf_egress_uninstall = TAKE_PTR(u->ip_bpf_egress_installed);
-                ip_bpf_ingress_uninstall = TAKE_PTR(u->ip_bpf_ingress_installed);
+                ip_bpf_egress_uninstall = TAKE_PTR(crt->ip_bpf_egress_installed);
+                ip_bpf_ingress_uninstall = TAKE_PTR(crt->ip_bpf_ingress_installed);
         } else {
                 /* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly
                  * detach them) right before attaching the new program, to minimize the time window when we
                  * don't account for IP traffic. */
-                u->ip_bpf_egress_installed = bpf_program_free(u->ip_bpf_egress_installed);
-                u->ip_bpf_ingress_installed = bpf_program_free(u->ip_bpf_ingress_installed);
+                crt->ip_bpf_egress_installed = bpf_program_free(crt->ip_bpf_egress_installed);
+                crt->ip_bpf_ingress_installed = bpf_program_free(crt->ip_bpf_ingress_installed);
         }
 
-        if (u->ip_bpf_egress) {
-                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
+        if (crt->ip_bpf_egress) {
+                r = bpf_program_cgroup_attach(crt->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
                 if (r < 0)
                         return log_unit_error_errno(u, r,
                                 "bpf-firewall: Attaching egress BPF program to cgroup %s failed: %m", path);
 
                 /* Remember that this BPF program is installed now. */
-                u->ip_bpf_egress_installed = TAKE_PTR(u->ip_bpf_egress);
+                crt->ip_bpf_egress_installed = TAKE_PTR(crt->ip_bpf_egress);
         }
 
-        if (u->ip_bpf_ingress) {
-                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
+        if (crt->ip_bpf_ingress) {
+                r = bpf_program_cgroup_attach(crt->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
                 if (r < 0)
                         return log_unit_error_errno(u, r,
                                 "bpf-firewall: Attaching ingress BPF program to cgroup %s failed: %m", path);
 
-                u->ip_bpf_ingress_installed = TAKE_PTR(u->ip_bpf_ingress);
+                crt->ip_bpf_ingress_installed = TAKE_PTR(crt->ip_bpf_ingress);
         }
 
         /* And now, definitely get rid of the old programs, and detach them */
         ip_bpf_egress_uninstall = bpf_program_free(ip_bpf_egress_uninstall);
         ip_bpf_ingress_uninstall = bpf_program_free(ip_bpf_ingress_uninstall);
 
-        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
+        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &crt->ip_bpf_custom_egress, &crt->ip_bpf_custom_egress_installed);
         if (r < 0)
                 return r;
 
-        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
+        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &crt->ip_bpf_custom_ingress, &crt->ip_bpf_custom_ingress_installed);
         if (r < 0)
                 return r;
 
@@ -954,21 +974,25 @@ void emit_bpf_firewall_warning(Unit *u) {
 void bpf_firewall_close(Unit *u) {
         assert(u);
 
-        u->ip_accounting_ingress_map_fd = safe_close(u->ip_accounting_ingress_map_fd);
-        u->ip_accounting_egress_map_fd = safe_close(u->ip_accounting_egress_map_fd);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
+        crt->ip_accounting_ingress_map_fd = safe_close(crt->ip_accounting_ingress_map_fd);
+        crt->ip_accounting_egress_map_fd = safe_close(crt->ip_accounting_egress_map_fd);
 
-        u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
-        u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
-        u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
-        u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+        crt->ipv4_allow_map_fd = safe_close(crt->ipv4_allow_map_fd);
+        crt->ipv6_allow_map_fd = safe_close(crt->ipv6_allow_map_fd);
+        crt->ipv4_deny_map_fd = safe_close(crt->ipv4_deny_map_fd);
+        crt->ipv6_deny_map_fd = safe_close(crt->ipv6_deny_map_fd);
 
-        u->ip_bpf_ingress = bpf_program_free(u->ip_bpf_ingress);
-        u->ip_bpf_ingress_installed = bpf_program_free(u->ip_bpf_ingress_installed);
-        u->ip_bpf_egress = bpf_program_free(u->ip_bpf_egress);
-        u->ip_bpf_egress_installed = bpf_program_free(u->ip_bpf_egress_installed);
+        crt->ip_bpf_ingress = bpf_program_free(crt->ip_bpf_ingress);
+        crt->ip_bpf_ingress_installed = bpf_program_free(crt->ip_bpf_ingress_installed);
+        crt->ip_bpf_egress = bpf_program_free(crt->ip_bpf_egress);
+        crt->ip_bpf_egress_installed = bpf_program_free(crt->ip_bpf_egress_installed);
 
-        u->ip_bpf_custom_ingress = set_free(u->ip_bpf_custom_ingress);
-        u->ip_bpf_custom_egress = set_free(u->ip_bpf_custom_egress);
-        u->ip_bpf_custom_ingress_installed = set_free(u->ip_bpf_custom_ingress_installed);
-        u->ip_bpf_custom_egress_installed = set_free(u->ip_bpf_custom_egress_installed);
+        crt->ip_bpf_custom_ingress = set_free(crt->ip_bpf_custom_ingress);
+        crt->ip_bpf_custom_egress = set_free(crt->ip_bpf_custom_egress);
+        crt->ip_bpf_custom_ingress_installed = set_free(crt->ip_bpf_custom_ingress_installed);
+        crt->ip_bpf_custom_egress_installed = set_free(crt->ip_bpf_custom_egress_installed);
 }
diff --git a/src/core/bpf-foreign.c b/src/core/bpf-foreign.c
index cff2f61..851cc42 100644
--- a/src/core/bpf-foreign.c
+++ b/src/core/bpf-foreign.c
@@ -45,8 +45,8 @@ static int bpf_foreign_key_compare_func(const BPFForeignKey *a, const BPFForeign
 }
 
 static void bpf_foreign_key_hash_func(const BPFForeignKey *p, struct siphash *h) {
-        siphash24_compress(&p->prog_id, sizeof(p->prog_id), h);
-        siphash24_compress(&p->attach_type, sizeof(p->attach_type), h);
+        siphash24_compress_typesafe(p->prog_id, h);
+        siphash24_compress_typesafe(p->attach_type, h);
 }
 
 DEFINE_PRIVATE_HASH_OPS_FULL(bpf_foreign_by_key_hash_ops,
@@ -81,6 +81,7 @@ static int bpf_foreign_prepare(
                 Unit *u,
                 enum bpf_attach_type attach_type,
                 const char *bpffs_path) {
+
         _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
         _cleanup_free_ BPFForeignKey *key = NULL;
         uint32_t prog_id;
@@ -101,6 +102,11 @@ static int bpf_foreign_prepare(
                 return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
                                 "bpf-foreign: Path in BPF filesystem is expected.");
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
+                                            "Failed to get control group runtime object.");
+
         r = bpf_program_new_from_bpffs_path(bpffs_path, &prog);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-foreign: Failed to create foreign BPF program: %m");
@@ -114,7 +120,7 @@ static int bpf_foreign_prepare(
                 return log_unit_error_errno(u, r,
                                 "bpf-foreign: Failed to create foreign BPF program key from path '%s': %m", bpffs_path);
 
-        r = hashmap_ensure_put(&u->bpf_foreign_by_key, &bpf_foreign_by_key_hash_ops, key, prog);
+        r = hashmap_ensure_put(&crt->bpf_foreign_by_key, &bpf_foreign_by_key_hash_ops, key, prog);
         if (r == -EEXIST) {
                 log_unit_warning_errno(u, r, "bpf-foreign: Foreign BPF program already exists, ignoring: %m");
                 return 0;
@@ -131,6 +137,7 @@ static int bpf_foreign_prepare(
 int bpf_foreign_install(Unit *u) {
         _cleanup_free_ char *cgroup_path = NULL;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r, ret = 0;
 
         assert(u);
@@ -139,7 +146,11 @@ int bpf_foreign_install(Unit *u) {
         if (!cc)
                 return 0;
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &cgroup_path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-foreign: Failed to get cgroup path: %m");
 
@@ -149,6 +160,6 @@ int bpf_foreign_install(Unit *u) {
                         ret = r;
         }
 
-        r = attach_programs(u, cgroup_path, u->bpf_foreign_by_key, BPF_F_ALLOW_MULTI);
+        r = attach_programs(u, cgroup_path, crt->bpf_foreign_by_key, BPF_F_ALLOW_MULTI);
         return ret < 0 ? ret : r;
 }
diff --git a/src/core/bpf-lsm.c b/src/core/bpf-lsm.c
deleted file mode 100644
index 216fc34..0000000
--- a/src/core/bpf-lsm.c
+++ /dev/null
@@ -1,320 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1-or-later */
-
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/types.h>
-#include <sys/resource.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "alloc-util.h"
-#include "bpf-lsm.h"
-#include "cgroup-util.h"
-#include "fd-util.h"
-#include "fileio.h"
-#include "filesystems.h"
-#include "log.h"
-#include "lsm-util.h"
-#include "manager.h"
-#include "mkdir.h"
-#include "nulstr-util.h"
-#include "stat-util.h"
-#include "strv.h"
-
-#if BPF_FRAMEWORK
-/* libbpf, clang and llc compile time dependencies are satisfied */
-#include "bpf-dlopen.h"
-#include "bpf-link.h"
-#include "bpf-util.h"
-#include "bpf/restrict_fs/restrict-fs-skel.h"
-
-#define CGROUP_HASH_SIZE_MAX 2048
-
-static struct restrict_fs_bpf *restrict_fs_bpf_free(struct restrict_fs_bpf *obj) {
-        /* restrict_fs_bpf__destroy handles object == NULL case */
-        (void) restrict_fs_bpf__destroy(obj);
-
-        return NULL;
-}
-
-DEFINE_TRIVIAL_CLEANUP_FUNC(struct restrict_fs_bpf *, restrict_fs_bpf_free);
-
-static bool bpf_can_link_lsm_program(struct bpf_program *prog) {
-        _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
-
-        assert(prog);
-
-        link = sym_bpf_program__attach_lsm(prog);
-
-        /* If bpf_program__attach_lsm fails the resulting value stores libbpf error code instead of memory
-         * pointer. That is the case when the helper is called on architectures where BPF trampoline (hence
-         * BPF_LSM_MAC attach type) is not supported. */
-        return sym_libbpf_get_error(link) == 0;
-}
-
-static int prepare_restrict_fs_bpf(struct restrict_fs_bpf **ret_obj) {
-        _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
-        _cleanup_close_ int inner_map_fd = -EBADF;
-        int r;
-
-        assert(ret_obj);
-
-        obj = restrict_fs_bpf__open();
-        if (!obj)
-                return log_error_errno(errno, "bpf-lsm: Failed to open BPF object: %m");
-
-        /* TODO Maybe choose a number based on runtime information? */
-        r = sym_bpf_map__set_max_entries(obj->maps.cgroup_hash, CGROUP_HASH_SIZE_MAX);
-        assert(r <= 0);
-        if (r < 0)
-                return log_error_errno(r, "bpf-lsm: Failed to resize BPF map '%s': %m",
-                                       sym_bpf_map__name(obj->maps.cgroup_hash));
-
-        /* Dummy map to satisfy the verifier */
-        inner_map_fd = compat_bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(uint32_t), sizeof(uint32_t), 128U, NULL);
-        if (inner_map_fd < 0)
-                return log_error_errno(errno, "bpf-lsm: Failed to create BPF map: %m");
-
-        r = sym_bpf_map__set_inner_map_fd(obj->maps.cgroup_hash, inner_map_fd);
-        assert(r <= 0);
-        if (r < 0)
-                return log_error_errno(r, "bpf-lsm: Failed to set inner map fd: %m");
-
-        r = restrict_fs_bpf__load(obj);
-        assert(r <= 0);
-        if (r < 0)
-                return log_error_errno(r, "bpf-lsm: Failed to load BPF object: %m");
-
-        *ret_obj = TAKE_PTR(obj);
-
-        return 0;
-}
-
-bool lsm_bpf_supported(bool initialize) {
-        _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
-        static int supported = -1;
-        int r;
-
-        if (supported >= 0)
-                return supported;
-        if (!initialize)
-                return false;
-
-        if (!cgroup_bpf_supported())
-                return (supported = false);
-
-        r = lsm_supported("bpf");
-        if (r < 0) {
-                log_warning_errno(r, "bpf-lsm: Can't determine whether the BPF LSM module is used: %m");
-                return (supported = false);
-        }
-        if (r == 0) {
-                log_info_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
-                               "bpf-lsm: BPF LSM hook not enabled in the kernel, BPF LSM not supported");
-                return (supported = false);
-        }
-
-        r = prepare_restrict_fs_bpf(&obj);
-        if (r < 0)
-                return (supported = false);
-
-        if (!bpf_can_link_lsm_program(obj->progs.restrict_filesystems)) {
-                log_warning_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
-                                  "bpf-lsm: Failed to link program; assuming BPF LSM is not available");
-                return (supported = false);
-        }
-
-        return (supported = true);
-}
-
-int lsm_bpf_setup(Manager *m) {
-        _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
-        _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
-        int r;
-
-        assert(m);
-
-        r = prepare_restrict_fs_bpf(&obj);
-        if (r < 0)
-                return r;
-
-        link = sym_bpf_program__attach_lsm(obj->progs.restrict_filesystems);
-        r = sym_libbpf_get_error(link);
-        if (r != 0)
-                return log_error_errno(r, "bpf-lsm: Failed to link '%s' LSM BPF program: %m",
-                                       sym_bpf_program__name(obj->progs.restrict_filesystems));
-
-        log_info("bpf-lsm: LSM BPF program attached");
-
-        obj->links.restrict_filesystems = TAKE_PTR(link);
-        m->restrict_fs = TAKE_PTR(obj);
-
-        return 0;
-}
-
-int lsm_bpf_restrict_filesystems(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, bool allow_list) {
-        uint32_t dummy_value = 1, zero = 0;
-        const char *fs;
-        const statfs_f_type_t *magic;
-        int r;
-
-        assert(filesystems);
-        assert(outer_map_fd >= 0);
-
-        int inner_map_fd = compat_bpf_map_create(
-                        BPF_MAP_TYPE_HASH,
-                        NULL,
-                        sizeof(uint32_t),
-                        sizeof(uint32_t),
-                        128U, /* Should be enough for all filesystem types */
-                        NULL);
-        if (inner_map_fd < 0)
-                return log_error_errno(errno, "bpf-lsm: Failed to create inner BPF map: %m");
-
-        if (sym_bpf_map_update_elem(outer_map_fd, &cgroup_id, &inner_map_fd, BPF_ANY) != 0)
-                return log_error_errno(errno, "bpf-lsm: Error populating BPF map: %m");
-
-        uint32_t allow = allow_list;
-
-        /* Use key 0 to store whether this is an allow list or a deny list */
-        if (sym_bpf_map_update_elem(inner_map_fd, &zero, &allow, BPF_ANY) != 0)
-                return log_error_errno(errno, "bpf-lsm: Error initializing map: %m");
-
-        SET_FOREACH(fs, filesystems) {
-                r = fs_type_from_string(fs, &magic);
-                if (r < 0) {
-                        log_warning("bpf-lsm: Invalid filesystem name '%s', ignoring.", fs);
-                        continue;
-                }
-
-                log_debug("bpf-lsm: Restricting filesystem access to '%s'", fs);
-
-                for (int i = 0; i < FILESYSTEM_MAGIC_MAX; i++) {
-                        if (magic[i] == 0)
-                                break;
-
-                        if (sym_bpf_map_update_elem(inner_map_fd, &magic[i], &dummy_value, BPF_ANY) != 0) {
-                                r = log_error_errno(errno, "bpf-lsm: Failed to update BPF map: %m");
-
-                                if (sym_bpf_map_delete_elem(outer_map_fd, &cgroup_id) != 0)
-                                        log_debug_errno(errno, "bpf-lsm: Failed to delete cgroup entry from BPF map: %m");
-
-                                return r;
-                        }
-                }
-        }
-
-        return 0;
-}
-
-int lsm_bpf_cleanup(const Unit *u) {
-        assert(u);
-        assert(u->manager);
-
-        /* If we never successfully detected support, there is nothing to clean up. */
-        if (!lsm_bpf_supported(/* initialize = */ false))
-                return 0;
-
-        if (!u->manager->restrict_fs)
-                return 0;
-
-        if (u->cgroup_id == 0)
-                return 0;
-
-        int fd = sym_bpf_map__fd(u->manager->restrict_fs->maps.cgroup_hash);
-        if (fd < 0)
-                return log_unit_error_errno(u, errno, "bpf-lsm: Failed to get BPF map fd: %m");
-
-        if (sym_bpf_map_delete_elem(fd, &u->cgroup_id) != 0 && errno != ENOENT)
-                return log_unit_debug_errno(u, errno, "bpf-lsm: Failed to delete cgroup entry from LSM BPF map: %m");
-
-        return 0;
-}
-
-int lsm_bpf_map_restrict_fs_fd(Unit *unit) {
-        assert(unit);
-        assert(unit->manager);
-
-        if (!unit->manager->restrict_fs)
-                return -ENOMEDIUM;
-
-        return sym_bpf_map__fd(unit->manager->restrict_fs->maps.cgroup_hash);
-}
-
-void lsm_bpf_destroy(struct restrict_fs_bpf *prog) {
-        restrict_fs_bpf__destroy(prog);
-}
-#else /* ! BPF_FRAMEWORK */
-bool lsm_bpf_supported(bool initialize) {
-        return false;
-}
-
-int lsm_bpf_setup(Manager *m) {
-        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-lsm: Failed to set up LSM BPF: %m");
-}
-
-int lsm_bpf_restrict_filesystems(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, const bool allow_list) {
-        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-lsm: Failed to restrict filesystems using LSM BPF: %m");
-}
-
-int lsm_bpf_cleanup(const Unit *u) {
-        return 0;
-}
-
-int lsm_bpf_map_restrict_fs_fd(Unit *unit) {
-        return -ENOMEDIUM;
-}
-
-void lsm_bpf_destroy(struct restrict_fs_bpf *prog) {
-        return;
-}
-#endif
-
-int lsm_bpf_parse_filesystem(
-                const char *name,
-                Set **filesystems,
-                FilesystemParseFlags flags,
-                const char *unit,
-                const char *filename,
-                unsigned line) {
-        int r;
-
-        assert(name);
-        assert(filesystems);
-
-        if (name[0] == '@') {
-                const FilesystemSet *set;
-
-                set = filesystem_set_find(name);
-                if (!set) {
-                        log_syntax(unit, flags & FILESYSTEM_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
-                                   "bpf-lsm: Unknown filesystem group, ignoring: %s", name);
-                        return 0;
-                }
-
-                NULSTR_FOREACH(i, set->value) {
-                        /* Call ourselves again, for the group to parse. Note that we downgrade logging here
-                         * (i.e. take away the FILESYSTEM_PARSE_LOG flag) since any issues in the group table
-                         * are our own problem, not a problem in user configuration data and we shouldn't
-                         * pretend otherwise by complaining about them. */
-                        r = lsm_bpf_parse_filesystem(i, filesystems, flags &~ FILESYSTEM_PARSE_LOG, unit, filename, line);
-                        if (r < 0)
-                                return r;
-                }
-        } else {
-                /* If we previously wanted to forbid access to a filesystem and now
-                 * we want to allow it, then remove it from the list. */
-                if (!(flags & FILESYSTEM_PARSE_INVERT) == !!(flags & FILESYSTEM_PARSE_ALLOW_LIST)) {
-                        r = set_put_strdup(filesystems, name);
-                        if (r == -ENOMEM)
-                                return flags & FILESYSTEM_PARSE_LOG ? log_oom() : -ENOMEM;
-                        if (r < 0 && r != -EEXIST)  /* When already in set, ignore */
-                                return r;
-                } else
-                        free(set_remove(*filesystems, name));
-        }
-
-        return 0;
-}
diff --git a/src/core/bpf-lsm.h b/src/core/bpf-lsm.h
deleted file mode 100644
index a6eda19..0000000
--- a/src/core/bpf-lsm.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1-or-later */
-#pragma once
-
-#include "hashmap.h"
-
-typedef enum FilesystemParseFlags {
-        FILESYSTEM_PARSE_INVERT     = 1 << 0,
-        FILESYSTEM_PARSE_ALLOW_LIST = 1 << 1,
-        FILESYSTEM_PARSE_LOG        = 1 << 2,
-} FilesystemParseFlags;
-
-typedef struct Unit Unit;
-typedef struct Manager Manager;
-
-typedef struct restrict_fs_bpf restrict_fs_bpf;
-
-bool lsm_bpf_supported(bool initialize);
-int lsm_bpf_setup(Manager *m);
-int lsm_bpf_restrict_filesystems(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, bool allow_list);
-int lsm_bpf_cleanup(const Unit *u);
-int lsm_bpf_map_restrict_fs_fd(Unit *u);
-void lsm_bpf_destroy(struct restrict_fs_bpf *prog);
-int lsm_bpf_parse_filesystem(const char *name,
-                             Set **filesystems,
-                             FilesystemParseFlags flags,
-                             const char *unit,
-                             const char *filename,
-                             unsigned line);
diff --git a/src/core/bpf-restrict-fs.c b/src/core/bpf-restrict-fs.c
new file mode 100644
index 0000000..d36bfb5
--- /dev/null
+++ b/src/core/bpf-restrict-fs.c
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-restrict-fs.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "filesystems.h"
+#include "log.h"
+#include "lsm-util.h"
+#include "manager.h"
+#include "mkdir.h"
+#include "nulstr-util.h"
+#include "stat-util.h"
+#include "strv.h"
+
+#if BPF_FRAMEWORK
+/* libbpf, clang and llc compile time dependencies are satisfied */
+#include "bpf-dlopen.h"
+#include "bpf-link.h"
+#include "bpf-util.h"
+#include "bpf/restrict_fs/restrict-fs-skel.h"
+
+#define CGROUP_HASH_SIZE_MAX 2048
+
+static struct restrict_fs_bpf *restrict_fs_bpf_free(struct restrict_fs_bpf *obj) {
+        /* restrict_fs_bpf__destroy handles object == NULL case */
+        (void) restrict_fs_bpf__destroy(obj);
+
+        return NULL;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct restrict_fs_bpf *, restrict_fs_bpf_free);
+
+static bool bpf_can_link_lsm_program(struct bpf_program *prog) {
+        _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
+
+        assert(prog);
+
+        link = sym_bpf_program__attach_lsm(prog);
+
+        /* If bpf_program__attach_lsm fails the resulting value stores libbpf error code instead of memory
+         * pointer. That is the case when the helper is called on architectures where BPF trampoline (hence
+         * BPF_LSM_MAC attach type) is not supported. */
+        return bpf_get_error_translated(link) == 0;
+}
+
+static int prepare_restrict_fs_bpf(struct restrict_fs_bpf **ret_obj) {
+        _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
+        _cleanup_close_ int inner_map_fd = -EBADF;
+        int r;
+
+        assert(ret_obj);
+
+        obj = restrict_fs_bpf__open();
+        if (!obj)
+                return log_error_errno(errno, "bpf-restrict-fs: Failed to open BPF object: %m");
+
+        /* TODO Maybe choose a number based on runtime information? */
+        r = sym_bpf_map__set_max_entries(obj->maps.cgroup_hash, CGROUP_HASH_SIZE_MAX);
+        assert(r <= 0);
+        if (r < 0)
+                return log_error_errno(r, "bpf-restrict-fs: Failed to resize BPF map '%s': %m",
+                                       sym_bpf_map__name(obj->maps.cgroup_hash));
+
+        /* Dummy map to satisfy the verifier */
+        inner_map_fd = compat_bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(uint32_t), sizeof(uint32_t), 128U, NULL);
+        if (inner_map_fd < 0)
+                return log_error_errno(errno, "bpf-restrict-fs: Failed to create BPF map: %m");
+
+        r = sym_bpf_map__set_inner_map_fd(obj->maps.cgroup_hash, inner_map_fd);
+        assert(r <= 0);
+        if (r < 0)
+                return log_error_errno(r, "bpf-restrict-fs: Failed to set inner map fd: %m");
+
+        r = restrict_fs_bpf__load(obj);
+        assert(r <= 0);
+        if (r < 0)
+                return log_error_errno(r, "bpf-restrict-fs: Failed to load BPF object: %m");
+
+        *ret_obj = TAKE_PTR(obj);
+
+        return 0;
+}
+
+bool bpf_restrict_fs_supported(bool initialize) {
+        _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
+        static int supported = -1;
+        int r;
+
+        if (supported >= 0)
+                return supported;
+        if (!initialize)
+                return false;
+
+        if (!cgroup_bpf_supported())
+                return (supported = false);
+
+        r = lsm_supported("bpf");
+        if (r < 0) {
+                log_warning_errno(r, "bpf-restrict-fs: Can't determine whether the BPF LSM module is used: %m");
+                return (supported = false);
+        }
+        if (r == 0) {
+                log_info("bpf-restrict-fs: BPF LSM hook not enabled in the kernel, BPF LSM not supported.");
+                return (supported = false);
+        }
+
+        r = prepare_restrict_fs_bpf(&obj);
+        if (r < 0)
+                return (supported = false);
+
+        if (!bpf_can_link_lsm_program(obj->progs.restrict_filesystems)) {
+                log_warning("bpf-restrict-fs: Failed to link program; assuming BPF LSM is not available.");
+                return (supported = false);
+        }
+
+        return (supported = true);
+}
+
+int bpf_restrict_fs_setup(Manager *m) {
+        _cleanup_(restrict_fs_bpf_freep) struct restrict_fs_bpf *obj = NULL;
+        _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
+        int r;
+
+        assert(m);
+
+        r = prepare_restrict_fs_bpf(&obj);
+        if (r < 0)
+                return r;
+
+        link = sym_bpf_program__attach_lsm(obj->progs.restrict_filesystems);
+        r = bpf_get_error_translated(link);
+        if (r != 0)
+                return log_error_errno(r, "bpf-restrict-fs: Failed to link '%s' LSM BPF program: %m",
+                                       sym_bpf_program__name(obj->progs.restrict_filesystems));
+
+        log_info("bpf-restrict-fs: LSM BPF program attached");
+
+        obj->links.restrict_filesystems = TAKE_PTR(link);
+        m->restrict_fs = TAKE_PTR(obj);
+
+        return 0;
+}
+
+int bpf_restrict_fs_update(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, bool allow_list) {
+        uint32_t dummy_value = 1, zero = 0;
+        const char *fs;
+        const statfs_f_type_t *magic;
+        int r;
+
+        assert(filesystems);
+        assert(outer_map_fd >= 0);
+
+        int inner_map_fd = compat_bpf_map_create(
+                        BPF_MAP_TYPE_HASH,
+                        NULL,
+                        sizeof(uint32_t),
+                        sizeof(uint32_t),
+                        128U, /* Should be enough for all filesystem types */
+                        NULL);
+        if (inner_map_fd < 0)
+                return log_error_errno(errno, "bpf-restrict-fs: Failed to create inner BPF map: %m");
+
+        if (sym_bpf_map_update_elem(outer_map_fd, &cgroup_id, &inner_map_fd, BPF_ANY) != 0)
+                return log_error_errno(errno, "bpf-restrict-fs: Error populating BPF map: %m");
+
+        uint32_t allow = allow_list;
+
+        /* Use key 0 to store whether this is an allow list or a deny list */
+        if (sym_bpf_map_update_elem(inner_map_fd, &zero, &allow, BPF_ANY) != 0)
+                return log_error_errno(errno, "bpf-restrict-fs: Error initializing map: %m");
+
+        SET_FOREACH(fs, filesystems) {
+                r = fs_type_from_string(fs, &magic);
+                if (r < 0) {
+                        log_warning("bpf-restrict-fs: Invalid filesystem name '%s', ignoring.", fs);
+                        continue;
+                }
+
+                log_debug("bpf-restrict-fs: Restricting filesystem access to '%s'", fs);
+
+                for (int i = 0; i < FILESYSTEM_MAGIC_MAX; i++) {
+                        if (magic[i] == 0)
+                                break;
+
+                        if (sym_bpf_map_update_elem(inner_map_fd, &magic[i], &dummy_value, BPF_ANY) != 0) {
+                                r = log_error_errno(errno, "bpf-restrict-fs: Failed to update BPF map: %m");
+
+                                if (sym_bpf_map_delete_elem(outer_map_fd, &cgroup_id) != 0)
+                                        log_debug_errno(errno, "bpf-restrict-fs: Failed to delete cgroup entry from BPF map: %m");
+
+                                return r;
+                        }
+                }
+        }
+
+        return 0;
+}
+
+int bpf_restrict_fs_cleanup(Unit *u) {
+        CGroupRuntime *crt;
+
+        assert(u);
+        assert(u->manager);
+
+        /* If we never successfully detected support, there is nothing to clean up. */
+        if (!bpf_restrict_fs_supported(/* initialize = */ false))
+                return 0;
+
+        if (!u->manager->restrict_fs)
+                return 0;
+
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        if (crt->cgroup_id == 0)
+                return 0;
+
+        int fd = sym_bpf_map__fd(u->manager->restrict_fs->maps.cgroup_hash);
+        if (fd < 0)
+                return log_unit_error_errno(u, errno, "bpf-restrict-fs: Failed to get BPF map fd: %m");
+
+        if (sym_bpf_map_delete_elem(fd, &crt->cgroup_id) != 0 && errno != ENOENT)
+                return log_unit_debug_errno(u, errno, "bpf-restrict-fs: Failed to delete cgroup entry from LSM BPF map: %m");
+
+        return 0;
+}
+
+int bpf_restrict_fs_map_fd(Unit *unit) {
+        assert(unit);
+        assert(unit->manager);
+
+        if (!unit->manager->restrict_fs)
+                return -ENOMEDIUM;
+
+        return sym_bpf_map__fd(unit->manager->restrict_fs->maps.cgroup_hash);
+}
+
+void bpf_restrict_fs_destroy(struct restrict_fs_bpf *prog) {
+        restrict_fs_bpf__destroy(prog);
+}
+#else /* ! BPF_FRAMEWORK */
+bool bpf_restrict_fs_supported(bool initialize) {
+        return false;
+}
+
+int bpf_restrict_fs_setup(Manager *m) {
+        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-restrict-fs: BPF framework is not supported.");
+}
+
+int bpf_restrict_fs_update(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, const bool allow_list) {
+        return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-restrict-fs: BPF framework is not supported.");
+}
+
+int bpf_restrict_fs_cleanup(Unit *u) {
+        return 0;
+}
+
+int bpf_restrict_fs_map_fd(Unit *unit) {
+        return -ENOMEDIUM;
+}
+
+void bpf_restrict_fs_destroy(struct restrict_fs_bpf *prog) {
+        return;
+}
+#endif
+
+int bpf_restrict_fs_parse_filesystem(
+                const char *name,
+                Set **filesystems,
+                FilesystemParseFlags flags,
+                const char *unit,
+                const char *filename,
+                unsigned line) {
+        int r;
+
+        assert(name);
+        assert(filesystems);
+
+        if (name[0] == '@') {
+                const FilesystemSet *set;
+
+                set = filesystem_set_find(name);
+                if (!set) {
+                        log_syntax(unit, flags & FILESYSTEM_PARSE_LOG ? LOG_WARNING : LOG_DEBUG, filename, line, 0,
+                                   "bpf-restrict-fs: Unknown filesystem group, ignoring: %s", name);
+                        return 0;
+                }
+
+                NULSTR_FOREACH(i, set->value) {
+                        /* Call ourselves again, for the group to parse. Note that we downgrade logging here
+                         * (i.e. take away the FILESYSTEM_PARSE_LOG flag) since any issues in the group table
+                         * are our own problem, not a problem in user configuration data and we shouldn't
+                         * pretend otherwise by complaining about them. */
+                        r = bpf_restrict_fs_parse_filesystem(i, filesystems, flags &~ FILESYSTEM_PARSE_LOG, unit, filename, line);
+                        if (r < 0)
+                                return r;
+                }
+        } else {
+                /* If we previously wanted to forbid access to a filesystem and now
+                 * we want to allow it, then remove it from the list. */
+                if (!(flags & FILESYSTEM_PARSE_INVERT) == !!(flags & FILESYSTEM_PARSE_ALLOW_LIST)) {
+                        r = set_put_strdup(filesystems, name);
+                        if (r == -ENOMEM)
+                                return flags & FILESYSTEM_PARSE_LOG ? log_oom() : -ENOMEM;
+                        if (r < 0 && r != -EEXIST)  /* When already in set, ignore */
+                                return r;
+                } else
+                        free(set_remove(*filesystems, name));
+        }
+
+        return 0;
+}
diff --git a/src/core/bpf-restrict-fs.h b/src/core/bpf-restrict-fs.h
new file mode 100644
index 0000000..8da12de
--- /dev/null
+++ b/src/core/bpf-restrict-fs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "hashmap.h"
+
+typedef enum FilesystemParseFlags {
+        FILESYSTEM_PARSE_INVERT     = 1 << 0,
+        FILESYSTEM_PARSE_ALLOW_LIST = 1 << 1,
+        FILESYSTEM_PARSE_LOG        = 1 << 2,
+} FilesystemParseFlags;
+
+typedef struct Unit Unit;
+typedef struct Manager Manager;
+
+typedef struct restrict_fs_bpf restrict_fs_bpf;
+
+bool bpf_restrict_fs_supported(bool initialize);
+int bpf_restrict_fs_setup(Manager *m);
+int bpf_restrict_fs_update(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, bool allow_list);
+int bpf_restrict_fs_cleanup(Unit *u);
+int bpf_restrict_fs_map_fd(Unit *u);
+void bpf_restrict_fs_destroy(struct restrict_fs_bpf *prog);
+int bpf_restrict_fs_parse_filesystem(const char *name, Set **filesystems, FilesystemParseFlags flags, const char *unit, const char *filename, unsigned line);
diff --git a/src/core/bpf-restrict-ifaces.c b/src/core/bpf-restrict-ifaces.c
new file mode 100644
index 0000000..64d8d1a
--- /dev/null
+++ b/src/core/bpf-restrict-ifaces.c
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include "fd-util.h"
+#include "bpf-restrict-ifaces.h"
+#include "netlink-util.h"
+
+#if BPF_FRAMEWORK
+/* libbpf, clang and llc compile time dependencies are satisfied */
+
+#include "bpf-dlopen.h"
+#include "bpf-link.h"
+#include "bpf-util.h"
+#include "bpf/restrict_ifaces/restrict-ifaces-skel.h"
+
+static struct restrict_ifaces_bpf *restrict_ifaces_bpf_free(struct restrict_ifaces_bpf *obj) {
+        restrict_ifaces_bpf__destroy(obj);
+        return NULL;
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(struct restrict_ifaces_bpf *, restrict_ifaces_bpf_free);
+
+static int prepare_restrict_ifaces_bpf(
+                Unit* u,
+                bool is_allow_list,
+                const Set *restrict_network_interfaces,
+                struct restrict_ifaces_bpf **ret_object) {
+
+        _cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL;
+        _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
+        char *iface;
+        int r, map_fd;
+
+        assert(ret_object);
+
+        obj = restrict_ifaces_bpf__open();
+        if (!obj)
+                return log_unit_full_errno(u, u ? LOG_ERR : LOG_DEBUG, errno, "restrict-interfaces: Failed to open BPF object: %m");
+
+        r = sym_bpf_map__set_max_entries(obj->maps.sd_restrictif, MAX(set_size(restrict_network_interfaces), 1u));
+        if (r != 0)
+                return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, r,
+                                "restrict-interfaces: Failed to resize BPF map '%s': %m",
+                                sym_bpf_map__name(obj->maps.sd_restrictif));
+
+        obj->rodata->is_allow_list = is_allow_list;
+
+        r = restrict_ifaces_bpf__load(obj);
+        if (r != 0)
+                return log_unit_full_errno(u, u ? LOG_ERR : LOG_DEBUG, r, "restrict-interfaces: Failed to load BPF object: %m");
+
+        map_fd = sym_bpf_map__fd(obj->maps.sd_restrictif);
+
+        SET_FOREACH(iface, restrict_network_interfaces) {
+                uint8_t dummy = 0;
+                int ifindex;
+
+                ifindex = rtnl_resolve_interface(&rtnl, iface);
+                if (ifindex < 0) {
+                        log_unit_warning_errno(u, ifindex,
+                                               "restrict-interfaces: Couldn't find index of network interface '%s', ignoring: %m",
+                                               iface);
+                        continue;
+                }
+
+                if (sym_bpf_map_update_elem(map_fd, &ifindex, &dummy, BPF_ANY))
+                        return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, errno,
+                                                   "restrict-interfaces: Failed to update BPF map '%s' fd: %m",
+                                                   sym_bpf_map__name(obj->maps.sd_restrictif));
+        }
+
+        *ret_object = TAKE_PTR(obj);
+        return 0;
+}
+
+int bpf_restrict_ifaces_supported(void) {
+        _cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL;
+        static int supported = -1;
+        int r;
+
+        if (supported >= 0)
+                return supported;
+
+        if (!cgroup_bpf_supported())
+                return (supported = false);
+
+        if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SKB, /*opts=*/NULL)) {
+                log_debug("restrict-interfaces: BPF program type cgroup_skb is not supported");
+                return (supported = false);
+        }
+
+        r = prepare_restrict_ifaces_bpf(NULL, true, NULL, &obj);
+        if (r < 0) {
+                log_debug_errno(r, "restrict-interfaces: Failed to load BPF object: %m");
+                return (supported = false);
+        }
+
+        return (supported = bpf_can_link_program(obj->progs.sd_restrictif_i));
+}
+
+static int restrict_ifaces_install_impl(Unit *u) {
+        _cleanup_(bpf_link_freep) struct bpf_link *egress_link = NULL, *ingress_link = NULL;
+        _cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL;
+        _cleanup_free_ char *cgroup_path = NULL;
+        _cleanup_close_ int cgroup_fd = -EBADF;
+        CGroupContext *cc;
+        CGroupRuntime *crt;
+        int r;
+
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return 0;
+
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &cgroup_path);
+        if (r < 0)
+                return log_unit_error_errno(u, r, "restrict-interfaces: Failed to get cgroup path: %m");
+
+        if (!cc->restrict_network_interfaces)
+                return 0;
+
+        r = prepare_restrict_ifaces_bpf(u,
+                cc->restrict_network_interfaces_is_allow_list,
+                cc->restrict_network_interfaces,
+                &obj);
+        if (r < 0)
+                return r;
+
+        cgroup_fd = open(cgroup_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY, 0);
+        if (cgroup_fd < 0)
+                return -errno;
+
+        ingress_link = sym_bpf_program__attach_cgroup(obj->progs.sd_restrictif_i, cgroup_fd);
+        r = bpf_get_error_translated(ingress_link);
+        if (r != 0)
+                return log_unit_error_errno(u, r, "restrict-interfaces: Failed to create ingress cgroup link: %m");
+
+        egress_link = sym_bpf_program__attach_cgroup(obj->progs.sd_restrictif_e, cgroup_fd);
+        r = bpf_get_error_translated(egress_link);
+        if (r != 0)
+                return log_unit_error_errno(u, r, "restrict-interfaces: Failed to create egress cgroup link: %m");
+
+        crt->restrict_ifaces_ingress_bpf_link = TAKE_PTR(ingress_link);
+        crt->restrict_ifaces_egress_bpf_link = TAKE_PTR(egress_link);
+
+        return 0;
+}
+
+int bpf_restrict_ifaces_install(Unit *u) {
+        CGroupRuntime *crt;
+        int r;
+
+        assert(u);
+
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = restrict_ifaces_install_impl(u);
+        fdset_close(crt->initial_restrict_ifaces_link_fds);
+        return r;
+}
+
+int bpf_restrict_ifaces_serialize(Unit *u, FILE *f, FDSet *fds) {
+        CGroupRuntime *crt;
+        int r;
+
+        assert(u);
+
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = bpf_serialize_link(f, fds, "restrict-ifaces-bpf-fd", crt->restrict_ifaces_ingress_bpf_link);
+        if (r < 0)
+                return r;
+
+        return bpf_serialize_link(f, fds, "restrict-ifaces-bpf-fd", crt->restrict_ifaces_egress_bpf_link);
+}
+
+int bpf_restrict_ifaces_add_initial_link_fd(Unit *u, int fd) {
+        int r;
+
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -EINVAL;
+
+        if (!crt->initial_restrict_ifaces_link_fds) {
+                crt->initial_restrict_ifaces_link_fds = fdset_new();
+                if (!crt->initial_restrict_ifaces_link_fds)
+                        return log_oom();
+        }
+
+        r = fdset_put(crt->initial_restrict_ifaces_link_fds, fd);
+        if (r < 0)
+                return log_unit_error_errno(u, r,
+                        "restrict-interfaces: Failed to put restrict-ifaces-bpf-fd %d to restored fdset: %m", fd);
+
+        return 0;
+}
+
+#else /* ! BPF_FRAMEWORK */
+int bpf_restrict_ifaces_supported(void) {
+        return 0;
+}
+
+int bpf_restrict_ifaces_install(Unit *u) {
+        return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
+                        "restrict-interfaces: Failed to install; BPF programs built from source code are not supported: %m");
+}
+
+int bpf_restrict_ifaces_serialize(Unit *u, FILE *f, FDSet *fds) {
+        return 0;
+}
+
+int bpf_restrict_ifaces_add_initial_link_fd(Unit *u, int fd) {
+        return 0;
+}
+#endif
diff --git a/src/core/bpf-restrict-ifaces.h b/src/core/bpf-restrict-ifaces.h
new file mode 100644
index 0000000..28f7427
--- /dev/null
+++ b/src/core/bpf-restrict-ifaces.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include "fdset.h"
+#include "unit.h"
+
+typedef struct Unit Unit;
+
+int bpf_restrict_ifaces_supported(void);
+int bpf_restrict_ifaces_install(Unit *u);
+
+int bpf_restrict_ifaces_serialize(Unit *u, FILE *f, FDSet *fds);
+
+/* Add BPF link fd created before daemon-reload or daemon-reexec.
+ * FDs will be closed at the end of restrict_network_interfaces_install. */
+int bpf_restrict_ifaces_add_initial_link_fd(Unit *u, int fd);
diff --git a/src/core/bpf-socket-bind.c b/src/core/bpf-socket-bind.c
index 88ab487..2a1a027 100644
--- a/src/core/bpf-socket-bind.c
+++ b/src/core/bpf-socket-bind.c
@@ -148,13 +148,18 @@ int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) {
 
         assert(u);
 
-        if (!u->initial_socket_bind_link_fds) {
-                u->initial_socket_bind_link_fds = fdset_new();
-                if (!u->initial_socket_bind_link_fds)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
+                                            "Failed to get control group runtime object.");
+
+        if (!crt->initial_socket_bind_link_fds) {
+                crt->initial_socket_bind_link_fds = fdset_new();
+                if (!crt->initial_socket_bind_link_fds)
                         return log_oom();
         }
 
-        r = fdset_put(u->initial_socket_bind_link_fds, fd);
+        r = fdset_put(crt->initial_socket_bind_link_fds, fd);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to put BPF fd %d to initial fdset", fd);
 
@@ -167,6 +172,7 @@ static int socket_bind_install_impl(Unit *u) {
         _cleanup_free_ char *cgroup_path = NULL;
         _cleanup_close_ int cgroup_fd = -EBADF;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
@@ -175,7 +181,11 @@ static int socket_bind_install_impl(Unit *u) {
         if (!cc)
                 return 0;
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &cgroup_path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to get cgroup path: %m");
 
@@ -191,46 +201,53 @@ static int socket_bind_install_impl(Unit *u) {
                 return log_unit_error_errno(u, errno, "bpf-socket-bind: Failed to open cgroup %s for reading: %m", cgroup_path);
 
         ipv4 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind4, cgroup_fd);
-        r = sym_libbpf_get_error(ipv4);
+        r = bpf_get_error_translated(ipv4);
         if (r != 0)
                 return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to link '%s' cgroup-bpf program: %m",
                                             sym_bpf_program__name(obj->progs.sd_bind4));
 
         ipv6 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind6, cgroup_fd);
-        r = sym_libbpf_get_error(ipv6);
+        r = bpf_get_error_translated(ipv6);
         if (r != 0)
                 return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to link '%s' cgroup-bpf program: %m",
                                             sym_bpf_program__name(obj->progs.sd_bind6));
 
-        u->ipv4_socket_bind_link = TAKE_PTR(ipv4);
-        u->ipv6_socket_bind_link = TAKE_PTR(ipv6);
+        crt->ipv4_socket_bind_link = TAKE_PTR(ipv4);
+        crt->ipv6_socket_bind_link = TAKE_PTR(ipv6);
 
         return 0;
 }
 
 int bpf_socket_bind_install(Unit *u) {
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
 
-        r = socket_bind_install_impl(u);
-        if (r == -ENOMEM)
-                return r;
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
 
-        fdset_close(u->initial_socket_bind_link_fds);
+        r = socket_bind_install_impl(u);
+        fdset_close(crt->initial_socket_bind_link_fds);
         return r;
 }
 
-int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
+int bpf_socket_bind_serialize(Unit *u, FILE *f, FDSet *fds) {
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
 
-        r = bpf_serialize_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link);
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = bpf_serialize_link(f, fds, "ipv4-socket-bind-bpf-link", crt->ipv4_socket_bind_link);
         if (r < 0)
                 return r;
 
-        return bpf_serialize_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link);
+        return bpf_serialize_link(f, fds, "ipv6-socket-bind-bpf-link", crt->ipv6_socket_bind_link);
 }
 
 #else /* ! BPF_FRAMEWORK */
@@ -247,7 +264,7 @@ int bpf_socket_bind_install(Unit *u) {
                                     "bpf-socket-bind: Failed to install; BPF framework is not supported");
 }
 
-int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
+int bpf_socket_bind_serialize(Unit *u, FILE *f, FDSet *fds) {
         return 0;
 }
 #endif
diff --git a/src/core/bpf-socket-bind.h b/src/core/bpf-socket-bind.h
index 7d426df..28b25f6 100644
--- a/src/core/bpf-socket-bind.h
+++ b/src/core/bpf-socket-bind.h
@@ -12,4 +12,4 @@ int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd);
 
 int bpf_socket_bind_install(Unit *u);
 
-int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds);
+int bpf_socket_bind_serialize(Unit *u, FILE *f, FDSet *fds);
diff --git a/src/core/bpf-util.c b/src/core/bpf-util.c
index 6fe229e..b337ba9 100644
--- a/src/core/bpf-util.c
+++ b/src/core/bpf-util.c
@@ -20,8 +20,7 @@ bool cgroup_bpf_supported(void) {
         }
 
         if (r == 0) {
-                log_info_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
-                               "Not running with unified cgroup hierarchy, disabling cgroup BPF features.");
+                log_info("Not running with unified cgroup hierarchy, disabling cgroup BPF features.");
                 return (supported = false);
         }
 
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 61ac4df..34fd2a2 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -10,6 +10,7 @@
 #include "bpf-devices.h"
 #include "bpf-firewall.h"
 #include "bpf-foreign.h"
+#include "bpf-restrict-ifaces.h"
 #include "bpf-socket-bind.h"
 #include "btrfs-util.h"
 #include "bus-error.h"
@@ -32,7 +33,8 @@
 #include "percent-util.h"
 #include "process-util.h"
 #include "procfs-util.h"
-#include "restrict-ifaces.h"
+#include "set.h"
+#include "serialize.h"
 #include "special.h"
 #include "stdio-util.h"
 #include "string-table.h"
@@ -115,10 +117,16 @@ bool unit_has_host_root_cgroup(Unit *u) {
 static int set_attribute_and_warn(Unit *u, const char *controller, const char *attribute, const char *value) {
         int r;
 
-        r = cg_set_attribute(controller, u->cgroup_path, attribute, value);
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
+
+        r = cg_set_attribute(controller, crt->cgroup_path, attribute, value);
         if (r < 0)
                 log_unit_full_errno(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%.*s': %m",
-                                    strna(attribute), empty_to_root(u->cgroup_path), (int) strcspn(value, NEWLINE), value);
+                                    strna(attribute), empty_to_root(crt->cgroup_path), (int) strcspn(value, NEWLINE), value);
 
         return r;
 }
@@ -172,6 +180,8 @@ void cgroup_context_init(CGroupContext *c) {
 
                 .memory_limit = CGROUP_LIMIT_MAX,
 
+                .memory_zswap_writeback = true,
+
                 .io_weight = CGROUP_WEIGHT_INVALID,
                 .startup_io_weight = CGROUP_WEIGHT_INVALID,
 
@@ -189,6 +199,319 @@ void cgroup_context_init(CGroupContext *c) {
         };
 }
 
+int cgroup_context_add_io_device_weight_dup(CGroupContext *c, const CGroupIODeviceWeight *w) {
+        _cleanup_free_ CGroupIODeviceWeight *n = NULL;
+
+        assert(c);
+        assert(w);
+
+        n = new(CGroupIODeviceWeight, 1);
+        if (!n)
+                return -ENOMEM;
+
+        *n = (CGroupIODeviceWeight) {
+                .path = strdup(w->path),
+                .weight = w->weight,
+        };
+        if (!n->path)
+                return -ENOMEM;
+
+        LIST_PREPEND(device_weights, c->io_device_weights, TAKE_PTR(n));
+        return 0;
+}
+
+int cgroup_context_add_io_device_limit_dup(CGroupContext *c, const CGroupIODeviceLimit *l) {
+        _cleanup_free_ CGroupIODeviceLimit *n = NULL;
+
+        assert(c);
+        assert(l);
+
+        n = new0(CGroupIODeviceLimit, 1);
+        if (!n)
+                return -ENOMEM;
+
+        n->path = strdup(l->path);
+        if (!n->path)
+                return -ENOMEM;
+
+        for (CGroupIOLimitType type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
+                n->limits[type] = l->limits[type];
+
+        LIST_PREPEND(device_limits, c->io_device_limits, TAKE_PTR(n));
+        return 0;
+}
+
+int cgroup_context_add_io_device_latency_dup(CGroupContext *c, const CGroupIODeviceLatency *l) {
+        _cleanup_free_ CGroupIODeviceLatency *n = NULL;
+
+        assert(c);
+        assert(l);
+
+        n = new(CGroupIODeviceLatency, 1);
+        if (!n)
+                return -ENOMEM;
+
+        *n = (CGroupIODeviceLatency) {
+                .path = strdup(l->path),
+                .target_usec = l->target_usec,
+        };
+        if (!n->path)
+                return -ENOMEM;
+
+        LIST_PREPEND(device_latencies, c->io_device_latencies, TAKE_PTR(n));
+        return 0;
+}
+
+int cgroup_context_add_block_io_device_weight_dup(CGroupContext *c, const CGroupBlockIODeviceWeight *w) {
+        _cleanup_free_ CGroupBlockIODeviceWeight *n = NULL;
+
+        assert(c);
+        assert(w);
+
+        n = new(CGroupBlockIODeviceWeight, 1);
+        if (!n)
+                return -ENOMEM;
+
+        *n = (CGroupBlockIODeviceWeight) {
+                .path = strdup(w->path),
+                .weight = w->weight,
+        };
+        if (!n->path)
+                return -ENOMEM;
+
+        LIST_PREPEND(device_weights, c->blockio_device_weights, TAKE_PTR(n));
+        return 0;
+}
+
+int cgroup_context_add_block_io_device_bandwidth_dup(CGroupContext *c, const CGroupBlockIODeviceBandwidth *b) {
+        _cleanup_free_ CGroupBlockIODeviceBandwidth *n = NULL;
+
+        assert(c);
+        assert(b);
+
+        n = new(CGroupBlockIODeviceBandwidth, 1);
+        if (!n)
+                return -ENOMEM;
+
+        *n = (CGroupBlockIODeviceBandwidth) {
+                .rbps = b->rbps,
+                .wbps = b->wbps,
+        };
+
+        LIST_PREPEND(device_bandwidths, c->blockio_device_bandwidths, TAKE_PTR(n));
+        return 0;
+}
+
+int cgroup_context_add_device_allow_dup(CGroupContext *c, const CGroupDeviceAllow *a) {
+        _cleanup_free_ CGroupDeviceAllow *n = NULL;
+
+        assert(c);
+        assert(a);
+
+        n = new(CGroupDeviceAllow, 1);
+        if (!n)
+                return -ENOMEM;
+
+        *n = (CGroupDeviceAllow) {
+                .path = strdup(a->path),
+                .permissions = a->permissions,
+        };
+        if (!n->path)
+                return -ENOMEM;
+
+        LIST_PREPEND(device_allow, c->device_allow, TAKE_PTR(n));
+        return 0;
+}
+
+static int cgroup_context_add_socket_bind_item_dup(CGroupContext *c, const CGroupSocketBindItem *i, CGroupSocketBindItem *h) {
+        _cleanup_free_ CGroupSocketBindItem *n = NULL;
+
+        assert(c);
+        assert(i);
+
+        n = new(CGroupSocketBindItem, 1);
+        if (!n)
+                return -ENOMEM;
+
+        *n = (CGroupSocketBindItem) {
+                .address_family = i->address_family,
+                .ip_protocol    = i->ip_protocol,
+                .nr_ports       = i->nr_ports,
+                .port_min       = i->port_min,
+        };
+
+        LIST_PREPEND(socket_bind_items, h, TAKE_PTR(n));
+        return 0;
+}
+
+int cgroup_context_add_socket_bind_item_allow_dup(CGroupContext *c, const CGroupSocketBindItem *i) {
+        return cgroup_context_add_socket_bind_item_dup(c, i, c->socket_bind_allow);
+}
+
+int cgroup_context_add_socket_bind_item_deny_dup(CGroupContext *c, const CGroupSocketBindItem *i) {
+        return cgroup_context_add_socket_bind_item_dup(c, i, c->socket_bind_deny);
+}
+
+int cgroup_context_copy(CGroupContext *dst, const CGroupContext *src) {
+        struct in_addr_prefix *i;
+        char *iface;
+        int r;
+
+        assert(src);
+        assert(dst);
+
+        dst->cpu_accounting = src->cpu_accounting;
+        dst->io_accounting = src->io_accounting;
+        dst->blockio_accounting = src->blockio_accounting;
+        dst->memory_accounting = src->memory_accounting;
+        dst->tasks_accounting = src->tasks_accounting;
+        dst->ip_accounting = src->ip_accounting;
+
+        dst->memory_oom_group = src->memory_oom_group;
+
+        dst->cpu_weight = src->cpu_weight;
+        dst->startup_cpu_weight = src->startup_cpu_weight;
+        dst->cpu_quota_per_sec_usec = src->cpu_quota_per_sec_usec;
+        dst->cpu_quota_period_usec = src->cpu_quota_period_usec;
+
+        dst->cpuset_cpus = src->cpuset_cpus;
+        dst->startup_cpuset_cpus = src->startup_cpuset_cpus;
+        dst->cpuset_mems = src->cpuset_mems;
+        dst->startup_cpuset_mems = src->startup_cpuset_mems;
+
+        dst->io_weight = src->io_weight;
+        dst->startup_io_weight = src->startup_io_weight;
+
+        LIST_FOREACH_BACKWARDS(device_weights, w, LIST_FIND_TAIL(device_weights, src->io_device_weights)) {
+                r = cgroup_context_add_io_device_weight_dup(dst, w);
+                if (r < 0)
+                        return r;
+        }
+
+        LIST_FOREACH_BACKWARDS(device_limits, l, LIST_FIND_TAIL(device_limits, src->io_device_limits)) {
+                r = cgroup_context_add_io_device_limit_dup(dst, l);
+                if (r < 0)
+                        return r;
+        }
+
+        LIST_FOREACH_BACKWARDS(device_latencies, l, LIST_FIND_TAIL(device_latencies, src->io_device_latencies)) {
+                r = cgroup_context_add_io_device_latency_dup(dst, l);
+                if (r < 0)
+                        return r;
+        }
+
+        dst->default_memory_min = src->default_memory_min;
+        dst->default_memory_low = src->default_memory_low;
+        dst->default_startup_memory_low = src->default_startup_memory_low;
+        dst->memory_min = src->memory_min;
+        dst->memory_low = src->memory_low;
+        dst->startup_memory_low = src->startup_memory_low;
+        dst->memory_high = src->memory_high;
+        dst->startup_memory_high = src->startup_memory_high;
+        dst->memory_max = src->memory_max;
+        dst->startup_memory_max = src->startup_memory_max;
+        dst->memory_swap_max = src->memory_swap_max;
+        dst->startup_memory_swap_max = src->startup_memory_swap_max;
+        dst->memory_zswap_max = src->memory_zswap_max;
+        dst->startup_memory_zswap_max = src->startup_memory_zswap_max;
+
+        dst->default_memory_min_set = src->default_memory_min_set;
+        dst->default_memory_low_set = src->default_memory_low_set;
+        dst->default_startup_memory_low_set = src->default_startup_memory_low_set;
+        dst->memory_min_set = src->memory_min_set;
+        dst->memory_low_set = src->memory_low_set;
+        dst->startup_memory_low_set = src->startup_memory_low_set;
+        dst->startup_memory_high_set = src->startup_memory_high_set;
+        dst->startup_memory_max_set = src->startup_memory_max_set;
+        dst->startup_memory_swap_max_set = src->startup_memory_swap_max_set;
+        dst->startup_memory_zswap_max_set = src->startup_memory_zswap_max_set;
+        dst->memory_zswap_writeback = src->memory_zswap_writeback;
+
+        SET_FOREACH(i, src->ip_address_allow) {
+                r = in_addr_prefix_add(&dst->ip_address_allow, i);
+                if (r < 0)
+                        return r;
+        }
+
+        SET_FOREACH(i, src->ip_address_deny) {
+                r = in_addr_prefix_add(&dst->ip_address_deny, i);
+                if (r < 0)
+                        return r;
+        }
+
+        dst->ip_address_allow_reduced = src->ip_address_allow_reduced;
+        dst->ip_address_deny_reduced = src->ip_address_deny_reduced;
+
+        if (!strv_isempty(src->ip_filters_ingress)) {
+                dst->ip_filters_ingress = strv_copy(src->ip_filters_ingress);
+                if (!dst->ip_filters_ingress)
+                        return -ENOMEM;
+        }
+
+        if (!strv_isempty(src->ip_filters_egress)) {
+                dst->ip_filters_egress = strv_copy(src->ip_filters_egress);
+                if (!dst->ip_filters_egress)
+                        return -ENOMEM;
+        }
+
+        LIST_FOREACH_BACKWARDS(programs, l, LIST_FIND_TAIL(programs, src->bpf_foreign_programs)) {
+                r = cgroup_context_add_bpf_foreign_program_dup(dst, l);
+                if (r < 0)
+                        return r;
+        }
+
+        SET_FOREACH(iface, src->restrict_network_interfaces) {
+                r = set_put_strdup(&dst->restrict_network_interfaces, iface);
+                if (r < 0)
+                        return r;
+        }
+        dst->restrict_network_interfaces_is_allow_list = src->restrict_network_interfaces_is_allow_list;
+
+        dst->cpu_shares = src->cpu_shares;
+        dst->startup_cpu_shares = src->startup_cpu_shares;
+
+        dst->blockio_weight = src->blockio_weight;
+        dst->startup_blockio_weight = src->startup_blockio_weight;
+
+        LIST_FOREACH_BACKWARDS(device_weights, l, LIST_FIND_TAIL(device_weights, src->blockio_device_weights)) {
+                r = cgroup_context_add_block_io_device_weight_dup(dst, l);
+                if (r < 0)
+                        return r;
+        }
+
+        LIST_FOREACH_BACKWARDS(device_bandwidths, l, LIST_FIND_TAIL(device_bandwidths, src->blockio_device_bandwidths)) {
+                r = cgroup_context_add_block_io_device_bandwidth_dup(dst, l);
+                if (r < 0)
+                        return r;
+        }
+
+        dst->memory_limit = src->memory_limit;
+
+        dst->device_policy = src->device_policy;
+        LIST_FOREACH_BACKWARDS(device_allow, l, LIST_FIND_TAIL(device_allow, src->device_allow)) {
+                r = cgroup_context_add_device_allow_dup(dst, l);
+                if (r < 0)
+                        return r;
+        }
+
+        LIST_FOREACH_BACKWARDS(socket_bind_items, l, LIST_FIND_TAIL(socket_bind_items, src->socket_bind_allow)) {
+                r = cgroup_context_add_socket_bind_item_allow_dup(dst, l);
+                if (r < 0)
+                        return r;
+
+        }
+
+        LIST_FOREACH_BACKWARDS(socket_bind_items, l, LIST_FIND_TAIL(socket_bind_items, src->socket_bind_deny)) {
+                r = cgroup_context_add_socket_bind_item_deny_dup(dst, l);
+                if (r < 0)
+                        return r;
+        }
+
+        dst->tasks_max = src->tasks_max;
+
+        return 0;
+}
+
 void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
         assert(c);
         assert(a);
@@ -306,10 +629,11 @@ void cgroup_context_done(CGroupContext *c) {
 static int unit_get_kernel_memory_limit(Unit *u, const char *file, uint64_t *ret) {
         assert(u);
 
-        if (!u->cgroup_realized)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -EOWNERDEAD;
 
-        return cg_get_attribute_as_uint64("memory", u->cgroup_path, file, ret);
+        return cg_get_attribute_as_uint64("memory", crt->cgroup_path, file, ret);
 }
 
 static int unit_compare_memory_limit(Unit *u, const char *property_name, uint64_t *ret_unit_value, uint64_t *ret_kernel_value) {
@@ -425,11 +749,12 @@ static int unit_compare_memory_limit(Unit *u, const char *property_name, uint64_
 
 #define FORMAT_CGROUP_DIFF_MAX 128
 
-static char *format_cgroup_memory_limit_comparison(char *buf, size_t l, Unit *u, const char *property_name) {
+static char *format_cgroup_memory_limit_comparison(Unit *u, const char *property_name, char *buf, size_t l) {
         uint64_t kval, sval;
         int r;
 
         assert(u);
+        assert(property_name);
         assert(buf);
         assert(l > 0);
 
@@ -499,18 +824,9 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
         _cleanup_free_ char *disable_controllers_str = NULL, *delegate_controllers_str = NULL, *cpuset_cpus = NULL, *cpuset_mems = NULL, *startup_cpuset_cpus = NULL, *startup_cpuset_mems = NULL;
         CGroupContext *c;
         struct in_addr_prefix *iaai;
-
-        char cda[FORMAT_CGROUP_DIFF_MAX];
-        char cdb[FORMAT_CGROUP_DIFF_MAX];
-        char cdc[FORMAT_CGROUP_DIFF_MAX];
-        char cdd[FORMAT_CGROUP_DIFF_MAX];
-        char cde[FORMAT_CGROUP_DIFF_MAX];
-        char cdf[FORMAT_CGROUP_DIFF_MAX];
-        char cdg[FORMAT_CGROUP_DIFF_MAX];
-        char cdh[FORMAT_CGROUP_DIFF_MAX];
-        char cdi[FORMAT_CGROUP_DIFF_MAX];
-        char cdj[FORMAT_CGROUP_DIFF_MAX];
-        char cdk[FORMAT_CGROUP_DIFF_MAX];
+        char cda[FORMAT_CGROUP_DIFF_MAX], cdb[FORMAT_CGROUP_DIFF_MAX], cdc[FORMAT_CGROUP_DIFF_MAX], cdd[FORMAT_CGROUP_DIFF_MAX],
+                cde[FORMAT_CGROUP_DIFF_MAX], cdf[FORMAT_CGROUP_DIFF_MAX], cdg[FORMAT_CGROUP_DIFF_MAX], cdh[FORMAT_CGROUP_DIFF_MAX],
+                cdi[FORMAT_CGROUP_DIFF_MAX], cdj[FORMAT_CGROUP_DIFF_MAX], cdk[FORMAT_CGROUP_DIFF_MAX];
 
         assert(u);
         assert(f);
@@ -564,6 +880,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
                 "%sStartupMemorySwapMax: %" PRIu64 "%s\n"
                 "%sMemoryZSwapMax: %" PRIu64 "%s\n"
                 "%sStartupMemoryZSwapMax: %" PRIu64 "%s\n"
+                "%sMemoryZSwapWriteback: %s\n"
                 "%sMemoryLimit: %" PRIu64 "\n"
                 "%sTasksMax: %" PRIu64 "\n"
                 "%sDevicePolicy: %s\n"
@@ -597,17 +914,18 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
                 prefix, c->startup_blockio_weight,
                 prefix, c->default_memory_min,
                 prefix, c->default_memory_low,
-                prefix, c->memory_min, format_cgroup_memory_limit_comparison(cda, sizeof(cda), u, "MemoryMin"),
-                prefix, c->memory_low, format_cgroup_memory_limit_comparison(cdb, sizeof(cdb), u, "MemoryLow"),
-                prefix, c->startup_memory_low, format_cgroup_memory_limit_comparison(cdc, sizeof(cdc), u, "StartupMemoryLow"),
-                prefix, c->memory_high, format_cgroup_memory_limit_comparison(cdd, sizeof(cdd), u, "MemoryHigh"),
-                prefix, c->startup_memory_high, format_cgroup_memory_limit_comparison(cde, sizeof(cde), u, "StartupMemoryHigh"),
-                prefix, c->memory_max, format_cgroup_memory_limit_comparison(cdf, sizeof(cdf), u, "MemoryMax"),
-                prefix, c->startup_memory_max, format_cgroup_memory_limit_comparison(cdg, sizeof(cdg), u, "StartupMemoryMax"),
-                prefix, c->memory_swap_max, format_cgroup_memory_limit_comparison(cdh, sizeof(cdh), u, "MemorySwapMax"),
-                prefix, c->startup_memory_swap_max, format_cgroup_memory_limit_comparison(cdi, sizeof(cdi), u, "StartupMemorySwapMax"),
-                prefix, c->memory_zswap_max, format_cgroup_memory_limit_comparison(cdj, sizeof(cdj), u, "MemoryZSwapMax"),
-                prefix, c->startup_memory_zswap_max, format_cgroup_memory_limit_comparison(cdk, sizeof(cdk), u, "StartupMemoryZSwapMax"),
+                prefix, c->memory_min, format_cgroup_memory_limit_comparison(u, "MemoryMin", cda, sizeof(cda)),
+                prefix, c->memory_low, format_cgroup_memory_limit_comparison(u, "MemoryLow", cdb, sizeof(cdb)),
+                prefix, c->startup_memory_low, format_cgroup_memory_limit_comparison(u, "StartupMemoryLow", cdc, sizeof(cdc)),
+                prefix, c->memory_high, format_cgroup_memory_limit_comparison(u, "MemoryHigh", cdd, sizeof(cdd)),
+                prefix, c->startup_memory_high, format_cgroup_memory_limit_comparison(u, "StartupMemoryHigh", cde, sizeof(cde)),
+                prefix, c->memory_max, format_cgroup_memory_limit_comparison(u, "MemoryMax", cdf, sizeof(cdf)),
+                prefix, c->startup_memory_max, format_cgroup_memory_limit_comparison(u, "StartupMemoryMax", cdg, sizeof(cdg)),
+                prefix, c->memory_swap_max, format_cgroup_memory_limit_comparison(u, "MemorySwapMax", cdh, sizeof(cdh)),
+                prefix, c->startup_memory_swap_max, format_cgroup_memory_limit_comparison(u, "StartupMemorySwapMax", cdi, sizeof(cdi)),
+                prefix, c->memory_zswap_max, format_cgroup_memory_limit_comparison(u, "MemoryZSwapMax", cdj, sizeof(cdj)),
+                prefix, c->startup_memory_zswap_max, format_cgroup_memory_limit_comparison(u, "StartupMemoryZSwapMax", cdk, sizeof(cdk)),
+                prefix, yes_no(c->memory_zswap_writeback),
                 prefix, c->memory_limit,
                 prefix, cgroup_tasks_max_resolve(&c->tasks_max),
                 prefix, cgroup_device_policy_to_string(c->device_policy),
@@ -811,7 +1129,7 @@ int cgroup_context_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_typ
         assert(bpffs_path);
 
         if (!path_is_normalized(bpffs_path) || !path_is_absolute(bpffs_path))
-                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not normalized: %m");
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Path is not normalized.");
 
         d = strdup(bpffs_path);
         if (!d)
@@ -867,12 +1185,13 @@ static void unit_set_xattr_graceful(Unit *u, const char *name, const void *data,
         assert(u);
         assert(name);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return;
 
-        r = cg_set_xattr(u->cgroup_path, name, data, size, 0);
+        r = cg_set_xattr(crt->cgroup_path, name, data, size, 0);
         if (r < 0)
-                log_unit_debug_errno(u, r, "Failed to set '%s' xattr on control group %s, ignoring: %m", name, empty_to_root(u->cgroup_path));
+                log_unit_debug_errno(u, r, "Failed to set '%s' xattr on control group %s, ignoring: %m", name, empty_to_root(crt->cgroup_path));
 }
 
 static void unit_remove_xattr_graceful(Unit *u, const char *name) {
@@ -881,12 +1200,13 @@ static void unit_remove_xattr_graceful(Unit *u, const char *name) {
         assert(u);
         assert(name);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return;
 
-        r = cg_remove_xattr(u->cgroup_path, name);
+        r = cg_remove_xattr(crt->cgroup_path, name);
         if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r))
-                log_unit_debug_errno(u, r, "Failed to remove '%s' xattr flag on control group %s, ignoring: %m", name, empty_to_root(u->cgroup_path));
+                log_unit_debug_errno(u, r, "Failed to remove '%s' xattr flag on control group %s, ignoring: %m", name, empty_to_root(crt->cgroup_path));
 }
 
 static void cgroup_oomd_xattr_apply(Unit *u) {
@@ -1013,9 +1333,13 @@ static void cgroup_survive_xattr_apply(Unit *u) {
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
         if (u->survive_final_kill_signal) {
                 r = cg_set_xattr(
-                                u->cgroup_path,
+                                crt->cgroup_path,
                                 "user.survive_final_kill_signal",
                                 "1",
                                 1,
@@ -1023,7 +1347,7 @@ static void cgroup_survive_xattr_apply(Unit *u) {
                 /* user xattr support was added in kernel v5.7 */
                 if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
                         r = cg_set_xattr(
-                                        u->cgroup_path,
+                                        crt->cgroup_path,
                                         "trusted.survive_final_kill_signal",
                                         "1",
                                         1,
@@ -1033,7 +1357,7 @@ static void cgroup_survive_xattr_apply(Unit *u) {
                                              r,
                                              "Failed to set 'survive_final_kill_signal' xattr on control "
                                              "group %s, ignoring: %m",
-                                             empty_to_root(u->cgroup_path));
+                                             empty_to_root(crt->cgroup_path));
         } else {
                 unit_remove_xattr_graceful(u, "user.survive_final_kill_signal");
                 unit_remove_xattr_graceful(u, "trusted.survive_final_kill_signal");
@@ -1170,6 +1494,12 @@ usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution,
 static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) {
         usec_t new_period;
 
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return USEC_INFINITY;
+
         if (quota == USEC_INFINITY)
                 /* Always use default period for infinity quota. */
                 return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
@@ -1182,10 +1512,10 @@ static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t qu
         new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC);
 
         if (new_period != period) {
-                log_unit_full(u, u->warned_clamping_cpu_quota_period ? LOG_DEBUG : LOG_WARNING,
+                log_unit_full(u, crt->warned_clamping_cpu_quota_period ? LOG_DEBUG : LOG_WARNING,
                               "Clamping CPU interval for cpu.max: period is now %s",
                               FORMAT_TIMESPAN(new_period, 1));
-                u->warned_clamping_cpu_quota_period = true;
+                crt->warned_clamping_cpu_quota_period = true;
         }
 
         return new_period;
@@ -1205,17 +1535,25 @@ static void cgroup_apply_unified_cpu_idle(Unit *u, uint64_t weight) {
         bool is_idle;
         const char *idle_val;
 
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
         is_idle = weight == CGROUP_WEIGHT_IDLE;
         idle_val = one_zero(is_idle);
-        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.idle", idle_val);
+        r = cg_set_attribute("cpu", crt->cgroup_path, "cpu.idle", idle_val);
         if (r < 0 && (r != -ENOENT || is_idle))
                 log_unit_full_errno(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%s': %m",
-                                    "cpu.idle", empty_to_root(u->cgroup_path), idle_val);
+                                    "cpu.idle", empty_to_root(crt->cgroup_path), idle_val);
 }
 
 static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota, usec_t period) {
         char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1];
 
+        assert(u);
+
         period = cgroup_cpu_adjust_period_and_log(u, period, quota);
         if (quota != USEC_INFINITY)
                 xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
@@ -1331,6 +1669,12 @@ static int set_bfq_weight(Unit *u, const char *controller, dev_t dev, uint64_t i
         uint64_t bfq_weight;
         int r;
 
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
+
         /* FIXME: drop this function when distro kernels properly support BFQ through "io.weight"
          * See also: https://github.com/systemd/systemd/pull/13335 and
          * https://github.com/torvalds/linux/commit/65752aef0a407e1ef17ec78a7fc31ba4e0b360f9. */
@@ -1343,7 +1687,7 @@ static int set_bfq_weight(Unit *u, const char *controller, dev_t dev, uint64_t i
         else
                 xsprintf(buf, "%" PRIu64 "\n", bfq_weight);
 
-        r = cg_set_attribute(controller, u->cgroup_path, p, buf);
+        r = cg_set_attribute(controller, crt->cgroup_path, p, buf);
 
         /* FIXME: drop this when kernels prior
          * 795fe54c2a82 ("bfq: Add per-device weight") v5.4
@@ -1367,13 +1711,19 @@ static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_
         dev_t dev;
         int r, r1, r2;
 
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
         if (lookup_block_device(dev_path, &dev) < 0)
                 return;
 
         r1 = set_bfq_weight(u, "io", dev, io_weight);
 
         xsprintf(buf, DEVNUM_FORMAT_STR " %" PRIu64 "\n", DEVNUM_FORMAT_VAL(dev), io_weight);
-        r2 = cg_set_attribute("io", u->cgroup_path, "io.weight", buf);
+        r2 = cg_set_attribute("io", crt->cgroup_path, "io.weight", buf);
 
         /* Look at the configured device, when both fail, prefer io.weight errno. */
         r = r2 == -EOPNOTSUPP ? r1 : r2;
@@ -1381,7 +1731,7 @@ static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_
         if (r < 0)
                 log_unit_full_errno(u, LOG_LEVEL_CGROUP_WRITE(r),
                                     r, "Failed to set 'io[.bfq].weight' attribute on '%s' to '%.*s': %m",
-                                    empty_to_root(u->cgroup_path), (int) strcspn(buf, NEWLINE), buf);
+                                    empty_to_root(crt->cgroup_path), (int) strcspn(buf, NEWLINE), buf);
 }
 
 static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) {
@@ -1498,7 +1848,8 @@ void unit_modify_nft_set(Unit *u, bool add) {
         if (cg_all_unified() <= 0)
                 return;
 
-        if (u->cgroup_id == 0)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || crt->cgroup_id == 0)
                 return;
 
         if (!u->manager->fw_ctx) {
@@ -1515,15 +1866,15 @@ void unit_modify_nft_set(Unit *u, bool add) {
                 if (nft_set->source != NFT_SET_SOURCE_CGROUP)
                         continue;
 
-                uint64_t element = u->cgroup_id;
+                uint64_t element = crt->cgroup_id;
 
                 r = nft_set_element_modify_any(u->manager->fw_ctx, add, nft_set->nfproto, nft_set->table, nft_set->set, &element, sizeof(element));
                 if (r < 0)
                         log_warning_errno(r, "Failed to %s NFT set: family %s, table %s, set %s, cgroup %" PRIu64 ", ignoring: %m",
-                                          add? "add" : "delete", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, u->cgroup_id);
+                                          add? "add" : "delete", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, crt->cgroup_id);
                 else
                         log_debug("%s NFT set: family %s, table %s, set %s, cgroup %" PRIu64,
-                                  add? "Added" : "Deleted", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, u->cgroup_id);
+                                  add? "Added" : "Deleted", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, crt->cgroup_id);
         }
 }
 
@@ -1536,18 +1887,20 @@ static void cgroup_apply_socket_bind(Unit *u) {
 static void cgroup_apply_restrict_network_interfaces(Unit *u) {
         assert(u);
 
-        (void) restrict_network_interfaces_install(u);
+        (void) bpf_restrict_ifaces_install(u);
 }
 
 static int cgroup_apply_devices(Unit *u) {
         _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
-        const char *path;
         CGroupContext *c;
         CGroupDevicePolicy policy;
         int r;
 
         assert_se(c = unit_get_cgroup_context(u));
-        assert_se(path = u->cgroup_path);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
 
         policy = c->device_policy;
 
@@ -1561,9 +1914,9 @@ static int cgroup_apply_devices(Unit *u) {
                  * EINVAL here. */
 
                 if (c->device_allow || policy != CGROUP_DEVICE_POLICY_AUTO)
-                        r = cg_set_attribute("devices", path, "devices.deny", "a");
+                        r = cg_set_attribute("devices", crt->cgroup_path, "devices.deny", "a");
                 else
-                        r = cg_set_attribute("devices", path, "devices.allow", "a");
+                        r = cg_set_attribute("devices", crt->cgroup_path, "devices.allow", "a");
                 if (r < 0)
                         log_unit_full_errno(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
                                             "Failed to reset devices.allow/devices.deny: %m");
@@ -1571,10 +1924,14 @@ static int cgroup_apply_devices(Unit *u) {
 
         bool allow_list_static = policy == CGROUP_DEVICE_POLICY_CLOSED ||
                 (policy == CGROUP_DEVICE_POLICY_AUTO && c->device_allow);
-        if (allow_list_static)
-                (void) bpf_devices_allow_list_static(prog, path);
 
-        bool any = allow_list_static;
+        bool any = false;
+        if (allow_list_static) {
+                r = bpf_devices_allow_list_static(prog, crt->cgroup_path);
+                if (r > 0)
+                        any = true;
+        }
+
         LIST_FOREACH(device_allow, a, c->device_allow) {
                 const char *val;
 
@@ -1582,22 +1939,22 @@ static int cgroup_apply_devices(Unit *u) {
                         continue;
 
                 if (path_startswith(a->path, "/dev/"))
-                        r = bpf_devices_allow_list_device(prog, path, a->path, a->permissions);
+                        r = bpf_devices_allow_list_device(prog, crt->cgroup_path, a->path, a->permissions);
                 else if ((val = startswith(a->path, "block-")))
-                        r = bpf_devices_allow_list_major(prog, path, val, 'b', a->permissions);
+                        r = bpf_devices_allow_list_major(prog, crt->cgroup_path, val, 'b', a->permissions);
                 else if ((val = startswith(a->path, "char-")))
-                        r = bpf_devices_allow_list_major(prog, path, val, 'c', a->permissions);
+                        r = bpf_devices_allow_list_major(prog, crt->cgroup_path, val, 'c', a->permissions);
                 else {
                         log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
                         continue;
                 }
 
-                if (r >= 0)
+                if (r > 0)
                         any = true;
         }
 
         if (prog && !any) {
-                log_unit_warning_errno(u, SYNTHETIC_ERRNO(ENODEV), "No devices matched by device filter.");
+                log_unit_warning(u, "No devices matched by device filter.");
 
                 /* The kernel verifier would reject a program we would build with the normal intro and outro
                    but no allow-listing rules (outro would contain an unreachable instruction for successful
@@ -1605,7 +1962,7 @@ static int cgroup_apply_devices(Unit *u) {
                 policy = CGROUP_DEVICE_POLICY_STRICT;
         }
 
-        r = bpf_devices_apply_policy(&prog, policy, any, path, &u->bpf_device_control_installed);
+        r = bpf_devices_apply_policy(&prog, policy, any, crt->cgroup_path, &crt->bpf_device_control_installed);
         if (r < 0) {
                 static bool warned = false;
 
@@ -1652,9 +2009,9 @@ static void cgroup_context_apply(
                 CGroupMask apply_mask,
                 ManagerState state) {
 
+        bool is_host_root, is_local_root;
         const char *path;
         CGroupContext *c;
-        bool is_host_root, is_local_root;
         int r;
 
         assert(u);
@@ -1669,7 +2026,12 @@ static void cgroup_context_apply(
         is_host_root = unit_has_host_root_cgroup(u);
 
         assert_se(c = unit_get_cgroup_context(u));
-        assert_se(path = u->cgroup_path);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
+        path = crt->cgroup_path;
 
         if (is_local_root) /* Make sure we don't try to display messages with an empty path. */
                 path = "/";
@@ -1879,6 +2241,7 @@ static void cgroup_context_apply(
                         cgroup_apply_unified_memory_limit(u, "memory.zswap.max", zswap_max);
 
                         (void) set_attribute_and_warn(u, "memory", "memory.oom.group", one_zero(c->memory_oom_group));
+                        (void) set_attribute_and_warn(u, "memory", "memory.zswap.writeback", one_zero(c->memory_zswap_writeback));
 
                 } else {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 1];
@@ -2137,20 +2500,24 @@ CGroupMask unit_get_members_mask(Unit *u) {
 
         /* Returns the mask of controllers all of the unit's children require, merged */
 
-        if (u->cgroup_members_mask_valid)
-                return u->cgroup_members_mask; /* Use cached value if possible */
-
-        u->cgroup_members_mask = 0;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->cgroup_members_mask_valid)
+                return crt->cgroup_members_mask; /* Use cached value if possible */
 
+        CGroupMask m = 0;
         if (u->type == UNIT_SLICE) {
                 Unit *member;
 
                 UNIT_FOREACH_DEPENDENCY(member, u, UNIT_ATOM_SLICE_OF)
-                        u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
+                        m |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
+        }
+
+        if (crt) {
+                crt->cgroup_members_mask = m;
+                crt->cgroup_members_mask_valid = true;
         }
 
-        u->cgroup_members_mask_valid = true;
-        return u->cgroup_members_mask;
+        return m;
 }
 
 CGroupMask unit_get_siblings_mask(Unit *u) {
@@ -2236,8 +2603,12 @@ void unit_invalidate_cgroup_members_masks(Unit *u) {
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
         /* Recurse invalidate the member masks cache all the way up the tree */
-        u->cgroup_members_mask_valid = false;
+        crt->cgroup_members_mask_valid = false;
 
         slice = UNIT_GET_SLICE(u);
         if (slice)
@@ -2249,11 +2620,12 @@ const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {
         /* Returns the realized cgroup path of the specified unit where all specified controllers are available. */
 
         while (u) {
-
-                if (u->cgroup_path &&
-                    u->cgroup_realized &&
-                    FLAGS_SET(u->cgroup_realized_mask, mask))
-                        return u->cgroup_path;
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                if (crt &&
+                    crt->cgroup_path &&
+                    crt->cgroup_realized &&
+                    FLAGS_SET(crt->cgroup_realized_mask, mask))
+                        return crt->cgroup_path;
 
                 u = UNIT_GET_SLICE(u);
         }
@@ -2303,27 +2675,34 @@ int unit_default_cgroup_path(const Unit *u, char **ret) {
 
 int unit_set_cgroup_path(Unit *u, const char *path) {
         _cleanup_free_ char *p = NULL;
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
 
-        if (streq_ptr(u->cgroup_path, path))
+        crt = unit_get_cgroup_runtime(u);
+
+        if (crt && streq_ptr(crt->cgroup_path, path))
                 return 0;
 
+        unit_release_cgroup(u);
+
+        crt = unit_setup_cgroup_runtime(u);
+        if (!crt)
+                return -ENOMEM;
+
         if (path) {
                 p = strdup(path);
                 if (!p)
                         return -ENOMEM;
-        }
 
-        if (p) {
                 r = hashmap_put(u->manager->cgroup_unit, p, u);
                 if (r < 0)
                         return r;
         }
 
-        unit_release_cgroup(u);
-        u->cgroup_path = TAKE_PTR(p);
+        assert(!crt->cgroup_path);
+        crt->cgroup_path = TAKE_PTR(p);
 
         return 1;
 }
@@ -2337,10 +2716,11 @@ int unit_watch_cgroup(Unit *u) {
         /* Watches the "cgroups.events" attribute of this unit's cgroup for "empty" events, but only if
          * cgroupv2 is available. */
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
-        if (u->cgroup_control_inotify_wd >= 0)
+        if (crt->cgroup_control_inotify_wd >= 0)
                 return 0;
 
         /* Only applies to the unified hierarchy */
@@ -2358,30 +2738,29 @@ int unit_watch_cgroup(Unit *u) {
         if (r < 0)
                 return log_oom();
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events", &events);
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, "cgroup.events", &events);
         if (r < 0)
                 return log_oom();
 
-        u->cgroup_control_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
-        if (u->cgroup_control_inotify_wd < 0) {
+        crt->cgroup_control_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
+        if (crt->cgroup_control_inotify_wd < 0) {
 
                 if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this
                                       * is not an error */
                         return 0;
 
-                return log_unit_error_errno(u, errno, "Failed to add control inotify watch descriptor for control group %s: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, errno, "Failed to add control inotify watch descriptor for control group %s: %m", empty_to_root(crt->cgroup_path));
         }
 
-        r = hashmap_put(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd), u);
+        r = hashmap_put(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(crt->cgroup_control_inotify_wd), u);
         if (r < 0)
-                return log_unit_error_errno(u, r, "Failed to add control inotify watch descriptor for control group %s to hash map: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, r, "Failed to add control inotify watch descriptor for control group %s to hash map: %m", empty_to_root(crt->cgroup_path));
 
         return 0;
 }
 
 int unit_watch_cgroup_memory(Unit *u) {
         _cleanup_free_ char *events = NULL;
-        CGroupContext *c;
         int r;
 
         assert(u);
@@ -2389,10 +2768,11 @@ int unit_watch_cgroup_memory(Unit *u) {
         /* Watches the "memory.events" attribute of this unit's cgroup for "oom_kill" events, but only if
          * cgroupv2 is available. */
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
-        c = unit_get_cgroup_context(u);
+        CGroupContext *c = unit_get_cgroup_context(u);
         if (!c)
                 return 0;
 
@@ -2407,7 +2787,7 @@ int unit_watch_cgroup_memory(Unit *u) {
         if (u->type == UNIT_SLICE)
                 return 0;
 
-        if (u->cgroup_memory_inotify_wd >= 0)
+        if (crt->cgroup_memory_inotify_wd >= 0)
                 return 0;
 
         /* Only applies to the unified hierarchy */
@@ -2421,23 +2801,23 @@ int unit_watch_cgroup_memory(Unit *u) {
         if (r < 0)
                 return log_oom();
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "memory.events", &events);
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, "memory.events", &events);
         if (r < 0)
                 return log_oom();
 
-        u->cgroup_memory_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
-        if (u->cgroup_memory_inotify_wd < 0) {
+        crt->cgroup_memory_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
+        if (crt->cgroup_memory_inotify_wd < 0) {
 
                 if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this
                                       * is not an error */
                         return 0;
 
-                return log_unit_error_errno(u, errno, "Failed to add memory inotify watch descriptor for control group %s: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, errno, "Failed to add memory inotify watch descriptor for control group %s: %m", empty_to_root(crt->cgroup_path));
         }
 
-        r = hashmap_put(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd), u);
+        r = hashmap_put(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(crt->cgroup_memory_inotify_wd), u);
         if (r < 0)
-                return log_unit_error_errno(u, r, "Failed to add memory inotify watch descriptor for control group %s to hash map: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, r, "Failed to add memory inotify watch descriptor for control group %s to hash map: %m", empty_to_root(crt->cgroup_path));
 
         return 0;
 }
@@ -2448,12 +2828,15 @@ int unit_pick_cgroup_path(Unit *u) {
 
         assert(u);
 
-        if (u->cgroup_path)
-                return 0;
-
         if (!UNIT_HAS_CGROUP_CONTEXT(u))
                 return -EINVAL;
 
+        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+        if (!crt)
+                return -ENOMEM;
+        if (crt->cgroup_path)
+                return 0;
+
         r = unit_default_cgroup_path(u, &path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "Failed to generate default cgroup path: %m");
@@ -2483,30 +2866,35 @@ static int unit_update_cgroup(
         if (!UNIT_HAS_CGROUP_CONTEXT(u))
                 return 0;
 
+        if (u->freezer_state != FREEZER_RUNNING)
+                return log_unit_error_errno(u, SYNTHETIC_ERRNO(EBUSY), "Cannot realize cgroup for frozen unit.");
+
         /* Figure out our cgroup path */
         r = unit_pick_cgroup_path(u);
         if (r < 0)
                 return r;
 
+        CGroupRuntime *crt = ASSERT_PTR(unit_get_cgroup_runtime(u));
+
         /* First, create our own group */
-        r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
+        r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, crt->cgroup_path);
         if (r < 0)
-                return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", empty_to_root(crt->cgroup_path));
         created = r;
 
         if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
                 uint64_t cgroup_id = 0;
 
-                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_full_path);
+                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &cgroup_full_path);
                 if (r == 0) {
                         r = cg_path_get_cgroupid(cgroup_full_path, &cgroup_id);
                         if (r < 0)
                                 log_unit_full_errno(u, ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r,
                                                     "Failed to get cgroup ID of cgroup %s, ignoring: %m", cgroup_full_path);
                 } else
-                        log_unit_warning_errno(u, r, "Failed to get full cgroup path on cgroup %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                        log_unit_warning_errno(u, r, "Failed to get full cgroup path on cgroup %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
-                u->cgroup_id = cgroup_id;
+                crt->cgroup_id = cgroup_id;
         }
 
         /* Start watching it */
@@ -2515,23 +2903,23 @@ static int unit_update_cgroup(
 
         /* For v2 we preserve enabled controllers in delegated units, adjust others,
          * for v1 we figure out which controller hierarchies need migration. */
-        if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) {
+        if (created || !crt->cgroup_realized || !unit_cgroup_delegate(u)) {
                 CGroupMask result_mask = 0;
 
                 /* Enable all controllers we need */
-                r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask);
+                r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, crt->cgroup_path, &result_mask);
                 if (r < 0)
-                        log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                        log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
                 /* Remember what's actually enabled now */
-                u->cgroup_enabled_mask = result_mask;
+                crt->cgroup_enabled_mask = result_mask;
 
-                migrate_mask = u->cgroup_realized_mask ^ target_mask;
+                migrate_mask = crt->cgroup_realized_mask ^ target_mask;
         }
 
         /* Keep track that this is now realized */
-        u->cgroup_realized = true;
-        u->cgroup_realized_mask = target_mask;
+        crt->cgroup_realized = true;
+        crt->cgroup_realized_mask = target_mask;
 
         /* Migrate processes in controller hierarchies both downwards (enabling) and upwards (disabling).
          *
@@ -2541,14 +2929,14 @@ static int unit_update_cgroup(
          * delegated units.
          */
         if (cg_all_unified() == 0) {
-                r = cg_migrate_v1_controllers(u->manager->cgroup_supported, migrate_mask, u->cgroup_path, migrate_callback, u);
+                r = cg_migrate_v1_controllers(u->manager->cgroup_supported, migrate_mask, crt->cgroup_path, migrate_callback, u);
                 if (r < 0)
-                        log_unit_warning_errno(u, r, "Failed to migrate controller cgroups from %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                        log_unit_warning_errno(u, r, "Failed to migrate controller cgroups from %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
                 is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
-                r = cg_trim_v1_controllers(u->manager->cgroup_supported, ~target_mask, u->cgroup_path, !is_root_slice);
+                r = cg_trim_v1_controllers(u->manager->cgroup_supported, ~target_mask, crt->cgroup_path, !is_root_slice);
                 if (r < 0)
-                        log_unit_warning_errno(u, r, "Failed to delete controller cgroups %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                        log_unit_warning_errno(u, r, "Failed to delete controller cgroups %s, ignoring: %m", empty_to_root(crt->cgroup_path));
         }
 
         /* Set attributes */
@@ -2578,11 +2966,12 @@ static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suf
         if (!u->manager->system_bus)
                 return -EIO;
 
-        if (!u->cgroup_path)
-                return -EINVAL;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
 
         /* Determine this unit's cgroup path relative to our cgroup root */
-        pp = path_startswith(u->cgroup_path, u->manager->cgroup_root);
+        pp = path_startswith(crt->cgroup_path, u->manager->cgroup_root);
         if (!pp)
                 return -EINVAL;
 
@@ -2626,10 +3015,12 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
         if (r < 0)
                 return r;
 
+        CGroupRuntime *crt = ASSERT_PTR(unit_get_cgroup_runtime(u));
+
         if (isempty(suffix_path))
-                p = u->cgroup_path;
+                p = crt->cgroup_path;
         else {
-                joined = path_join(u->cgroup_path, suffix_path);
+                joined = path_join(crt->cgroup_path, suffix_path);
                 if (!joined)
                         return -ENOMEM;
 
@@ -2701,7 +3092,7 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
                                 continue;
 
                         /* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */
-                        if (delegated_mask & u->cgroup_realized_mask & bit) {
+                        if (delegated_mask & crt->cgroup_realized_mask & bit) {
                                 r = cg_attach(cgroup_controller_to_string(c), p, pid->pid);
                                 if (r >= 0)
                                         continue; /* Success! */
@@ -2734,6 +3125,10 @@ static bool unit_has_mask_realized(
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return false;
+
         /* Returns true if this unit is fully realized. We check four things:
          *
          * 1. Whether the cgroup was created at all
@@ -2749,10 +3144,10 @@ static bool unit_has_mask_realized(
          * enabled through cgroup.subtree_control, and since the BPF pseudo-controllers don't show up there, they
          * simply don't matter. */
 
-        return u->cgroup_realized &&
-                ((u->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 &&
-                ((u->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 &&
-                u->cgroup_invalidated_mask == 0;
+        return crt->cgroup_realized &&
+                ((crt->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 &&
+                ((crt->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 &&
+                crt->cgroup_invalidated_mask == 0;
 }
 
 static bool unit_has_mask_disables_realized(
@@ -2762,14 +3157,18 @@ static bool unit_has_mask_disables_realized(
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return true;
+
         /* Returns true if all controllers which should be disabled are indeed disabled.
          *
          * Unlike unit_has_mask_realized, we don't care what was enabled, only that anything we want to remove is
          * already removed. */
 
-        return !u->cgroup_realized ||
-                (FLAGS_SET(u->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
-                 FLAGS_SET(u->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
+        return !crt->cgroup_realized ||
+                (FLAGS_SET(crt->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
+                 FLAGS_SET(crt->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
 }
 
 static bool unit_has_mask_enables_realized(
@@ -2779,14 +3178,18 @@ static bool unit_has_mask_enables_realized(
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return false;
+
         /* Returns true if all controllers which should be enabled are indeed enabled.
          *
          * Unlike unit_has_mask_realized, we don't care about the controllers that are not present, only that anything
          * we want to add is already added. */
 
-        return u->cgroup_realized &&
-                ((u->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (u->cgroup_realized_mask & CGROUP_MASK_V1) &&
-                ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2);
+        return crt->cgroup_realized &&
+                ((crt->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (crt->cgroup_realized_mask & CGROUP_MASK_V1) &&
+                ((crt->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (crt->cgroup_enabled_mask & CGROUP_MASK_V2);
 }
 
 void unit_add_to_cgroup_realize_queue(Unit *u) {
@@ -2835,8 +3238,10 @@ static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) {
         if (unit_has_mask_enables_realized(u, target_mask, enable_mask))
                 return 0;
 
-        new_target_mask = u->cgroup_realized_mask | target_mask;
-        new_enable_mask = u->cgroup_enabled_mask | enable_mask;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+
+        new_target_mask = (crt ? crt->cgroup_realized_mask : 0) | target_mask;
+        new_enable_mask = (crt ? crt->cgroup_enabled_mask : 0) | enable_mask;
 
         return unit_update_cgroup(u, new_target_mask, new_enable_mask, state);
 }
@@ -2855,9 +3260,13 @@ static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
                 CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
                 int r;
 
+                CGroupRuntime *rt = unit_get_cgroup_runtime(m);
+                if (!rt)
+                        continue;
+
                 /* The cgroup for this unit might not actually be fully realised yet, in which case it isn't
                  * holding any controllers open anyway. */
-                if (!m->cgroup_realized)
+                if (!rt->cgroup_realized)
                         continue;
 
                 /* We must disable those below us first in order to release the controller. */
@@ -2871,8 +3280,8 @@ static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
                 if (unit_has_mask_disables_realized(m, target_mask, enable_mask))
                         continue;
 
-                new_target_mask = m->cgroup_realized_mask & target_mask;
-                new_enable_mask = m->cgroup_enabled_mask & enable_mask;
+                new_target_mask = rt->cgroup_realized_mask & target_mask;
+                new_enable_mask = rt->cgroup_enabled_mask & enable_mask;
 
                 r = unit_update_cgroup(m, new_target_mask, new_enable_mask, state);
                 if (r < 0)
@@ -2959,8 +3368,10 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
         if (r < 0)
                 return r;
 
+        CGroupRuntime *crt = ASSERT_PTR(unit_get_cgroup_runtime(u));
+
         /* Now, reset the invalidation mask */
-        u->cgroup_invalidated_mask = 0;
+        crt->cgroup_invalidated_mask = 0;
         return 0;
 }
 
@@ -3011,11 +3422,13 @@ void unit_add_family_to_cgroup_realize_queue(Unit *u) {
          * masks. */
 
         do {
-                Unit *m;
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
 
                 /* Children of u likely changed when we're called */
-                u->cgroup_members_mask_valid = false;
+                if (crt)
+                        crt->cgroup_members_mask_valid = false;
 
+                Unit *m;
                 UNIT_FOREACH_DEPENDENCY(m, u, UNIT_ATOM_SLICE_OF) {
 
                         /* No point in doing cgroup application for units without active processes. */
@@ -3024,7 +3437,8 @@ void unit_add_family_to_cgroup_realize_queue(Unit *u) {
 
                         /* We only enqueue siblings if they were realized once at least, in the main
                          * hierarchy. */
-                        if (!m->cgroup_realized)
+                        crt = unit_get_cgroup_runtime(m);
+                        if (!crt || !crt->cgroup_realized)
                                 continue;
 
                         /* If the unit doesn't need any new controllers and has current ones
@@ -3075,26 +3489,50 @@ void unit_release_cgroup(Unit *u) {
         /* Forgets all cgroup details for this cgroup — but does *not* destroy the cgroup. This is hence OK to call
          * when we close down everything for reexecution, where we really want to leave the cgroup in place. */
 
-        if (u->cgroup_path) {
-                (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
-                u->cgroup_path = mfree(u->cgroup_path);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
+        if (crt->cgroup_path) {
+                (void) hashmap_remove(u->manager->cgroup_unit, crt->cgroup_path);
+                crt->cgroup_path = mfree(crt->cgroup_path);
         }
 
-        if (u->cgroup_control_inotify_wd >= 0) {
-                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_control_inotify_wd) < 0)
-                        log_unit_debug_errno(u, errno, "Failed to remove cgroup control inotify watch %i for %s, ignoring: %m", u->cgroup_control_inotify_wd, u->id);
+        if (crt->cgroup_control_inotify_wd >= 0) {
+                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, crt->cgroup_control_inotify_wd) < 0)
+                        log_unit_debug_errno(u, errno, "Failed to remove cgroup control inotify watch %i for %s, ignoring: %m", crt->cgroup_control_inotify_wd, u->id);
 
-                (void) hashmap_remove(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd));
-                u->cgroup_control_inotify_wd = -1;
+                (void) hashmap_remove(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(crt->cgroup_control_inotify_wd));
+                crt->cgroup_control_inotify_wd = -1;
         }
 
-        if (u->cgroup_memory_inotify_wd >= 0) {
-                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_memory_inotify_wd) < 0)
-                        log_unit_debug_errno(u, errno, "Failed to remove cgroup memory inotify watch %i for %s, ignoring: %m", u->cgroup_memory_inotify_wd, u->id);
+        if (crt->cgroup_memory_inotify_wd >= 0) {
+                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, crt->cgroup_memory_inotify_wd) < 0)
+                        log_unit_debug_errno(u, errno, "Failed to remove cgroup memory inotify watch %i for %s, ignoring: %m", crt->cgroup_memory_inotify_wd, u->id);
 
-                (void) hashmap_remove(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd));
-                u->cgroup_memory_inotify_wd = -1;
+                (void) hashmap_remove(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(crt->cgroup_memory_inotify_wd));
+                crt->cgroup_memory_inotify_wd = -1;
         }
+
+        *(CGroupRuntime**) ((uint8_t*) u + UNIT_VTABLE(u)->cgroup_runtime_offset) = cgroup_runtime_free(crt);
+}
+
+int unit_cgroup_is_empty(Unit *u) {
+        int r;
+
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -ENXIO;
+        if (!crt->cgroup_path)
+                return -EOWNERDEAD;
+
+        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path);
+        if (r < 0)
+                return log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty, ignoring: %m", empty_to_root(crt->cgroup_path));
+
+        return r;
 }
 
 bool unit_maybe_release_cgroup(Unit *u) {
@@ -3102,17 +3540,16 @@ bool unit_maybe_release_cgroup(Unit *u) {
 
         assert(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return true;
 
-        /* Don't release the cgroup if there are still processes under it. If we get notified later when all the
-         * processes exit (e.g. the processes were in D-state and exited after the unit was marked as failed)
-         * we need the cgroup paths to continue to be tracked by the manager so they can be looked up and cleaned
-         * up later. */
-        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
-        if (r < 0)
-                log_unit_debug_errno(u, r, "Error checking if the cgroup is recursively empty, ignoring: %m");
-        else if (r == 1) {
+        /* Don't release the cgroup if there are still processes under it. If we get notified later when all
+         * the processes exit (e.g. the processes were in D-state and exited after the unit was marked as
+         * failed) we need the cgroup paths to continue to be tracked by the manager so they can be looked up
+         * and cleaned up later. */
+        r = unit_cgroup_is_empty(u);
+        if (r == 1) {
                 unit_release_cgroup(u);
                 return true;
         }
@@ -3127,28 +3564,32 @@ void unit_prune_cgroup(Unit *u) {
         assert(u);
 
         /* Removes the cgroup, if empty and possible, and stops watching it. */
-
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return;
 
-        (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
+        /* Cache the last CPU and memory usage values before we destroy the cgroup */
+        (void) unit_get_cpu_usage(u, /* ret = */ NULL);
+
+        for (CGroupMemoryAccountingMetric metric = 0; metric <= _CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST; metric++)
+                (void) unit_get_memory_accounting(u, metric, /* ret = */ NULL);
 
 #if BPF_FRAMEWORK
-        (void) lsm_bpf_cleanup(u); /* Remove cgroup from the global LSM BPF map */
+        (void) bpf_restrict_fs_cleanup(u); /* Remove cgroup from the global LSM BPF map */
 #endif
 
         unit_modify_nft_set(u, /* add = */ false);
 
         is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
 
-        r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
+        r = cg_trim_everywhere(u->manager->cgroup_supported, crt->cgroup_path, !is_root_slice);
         if (r < 0)
                 /* One reason we could have failed here is, that the cgroup still contains a process.
                  * However, if the cgroup becomes removable at a later time, it might be removed when
                  * the containing slice is stopped. So even if we failed now, this unit shouldn't assume
                  * that the cgroup is still realized the next time it is started. Do not return early
                  * on error, continue cleanup. */
-                log_unit_full_errno(u, r == -EBUSY ? LOG_DEBUG : LOG_WARNING, r, "Failed to destroy cgroup %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                log_unit_full_errno(u, r == -EBUSY ? LOG_DEBUG : LOG_WARNING, r, "Failed to destroy cgroup %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
         if (is_root_slice)
                 return;
@@ -3156,11 +3597,15 @@ void unit_prune_cgroup(Unit *u) {
         if (!unit_maybe_release_cgroup(u)) /* Returns true if the cgroup was released */
                 return;
 
-        u->cgroup_realized = false;
-        u->cgroup_realized_mask = 0;
-        u->cgroup_enabled_mask = 0;
+        crt = unit_get_cgroup_runtime(u); /* The above might have destroyed the runtime object, let's see if it's still there */
+        if (!crt)
+                return;
+
+        crt->cgroup_realized = false;
+        crt->cgroup_realized_mask = 0;
+        crt->cgroup_enabled_mask = 0;
 
-        u->bpf_device_control_installed = bpf_program_free(u->bpf_device_control_installed);
+        crt->bpf_device_control_installed = bpf_program_free(crt->bpf_device_control_installed);
 }
 
 int unit_search_main_pid(Unit *u, PidRef *ret) {
@@ -3171,17 +3616,20 @@ int unit_search_main_pid(Unit *u, PidRef *ret) {
         assert(u);
         assert(ret);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENXIO;
 
-        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f);
+        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, &f);
         if (r < 0)
                 return r;
 
         for (;;) {
                 _cleanup_(pidref_done) PidRef npidref = PIDREF_NULL;
 
-                r = cg_read_pidref(f, &npidref);
+                /* cg_read_pidref() will return an error on unmapped PIDs.
+                 * We can't reasonably deal with units that contain those. */
+                r = cg_read_pidref(f, &npidref, CGROUP_DONT_SKIP_UNMAPPED);
                 if (r < 0)
                         return r;
                 if (r == 0)
@@ -3223,7 +3671,7 @@ static int unit_watch_pids_in_path(Unit *u, const char *path) {
                 for (;;) {
                         _cleanup_(pidref_done) PidRef pid = PIDREF_NULL;
 
-                        r = cg_read_pidref(f, &pid);
+                        r = cg_read_pidref(f, &pid, /* flags = */ 0);
                         if (r == 0)
                                 break;
                         if (r < 0) {
@@ -3270,7 +3718,8 @@ int unit_synthesize_cgroup_empty_event(Unit *u) {
          * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can
          * get as notification source as soon as we stopped having any useful PIDs to watch for. */
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENOENT;
 
         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
@@ -3296,7 +3745,8 @@ int unit_watch_all_pids(Unit *u) {
          * get reliable cgroup empty notifications: we try to use
          * SIGCHLD as replacement. */
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENOENT;
 
         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
@@ -3305,7 +3755,7 @@ int unit_watch_all_pids(Unit *u) {
         if (r > 0) /* On unified we can use proper notifications */
                 return 0;
 
-        return unit_watch_pids_in_path(u, u->cgroup_path);
+        return unit_watch_pids_in_path(u, crt->cgroup_path);
 }
 
 static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
@@ -3370,15 +3820,8 @@ void unit_add_to_cgroup_empty_queue(Unit *u) {
                 return;
 
         /* Let's verify that the cgroup is really empty */
-        if (!u->cgroup_path)
-                return;
-
-        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
-        if (r < 0) {
-                log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", empty_to_root(u->cgroup_path));
-                return;
-        }
-        if (r == 0)
+        r = unit_cgroup_is_empty(u);
+        if (r <= 0)
                 return;
 
         LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
@@ -3406,7 +3849,10 @@ int unit_check_oomd_kill(Unit *u) {
         uint64_t n = 0;
         int r;
 
-        if (!u->cgroup_path)
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
         r = cg_all_unified();
@@ -3415,7 +3861,7 @@ int unit_check_oomd_kill(Unit *u) {
         else if (r == 0)
                 return 0;
 
-        r = cg_get_xattr_malloc(u->cgroup_path, "user.oomd_ooms", &value);
+        r = cg_get_xattr_malloc(crt->cgroup_path, "user.oomd_ooms", &value);
         if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r))
                 return r;
 
@@ -3425,15 +3871,15 @@ int unit_check_oomd_kill(Unit *u) {
                          return r;
         }
 
-        increased = n > u->managed_oom_kill_last;
-        u->managed_oom_kill_last = n;
+        increased = n > crt->managed_oom_kill_last;
+        crt->managed_oom_kill_last = n;
 
         if (!increased)
                 return 0;
 
         n = 0;
         value = mfree(value);
-        r = cg_get_xattr_malloc(u->cgroup_path, "user.oomd_kill", &value);
+        r = cg_get_xattr_malloc(crt->cgroup_path, "user.oomd_kill", &value);
         if (r >= 0 && !isempty(value))
                 (void) safe_atou64(value, &n);
 
@@ -3460,10 +3906,16 @@ int unit_check_oom(Unit *u) {
         uint64_t c;
         int r;
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
-        r = cg_get_keyed_attribute("memory", u->cgroup_path, "memory.events", STRV_MAKE("oom_kill"), &oom_kill);
+        r = cg_get_keyed_attribute(
+                        "memory",
+                        crt->cgroup_path,
+                        "memory.events",
+                        STRV_MAKE("oom_kill"),
+                        &oom_kill);
         if (IN_SET(r, -ENOENT, -ENXIO)) /* Handle gracefully if cgroup or oom_kill attribute don't exist */
                 c = 0;
         else if (r < 0)
@@ -3474,8 +3926,8 @@ int unit_check_oom(Unit *u) {
                         return log_unit_debug_errno(u, r, "Failed to parse oom_kill field: %m");
         }
 
-        increased = c > u->oom_kill_last;
-        u->oom_kill_last = c;
+        increased = c > crt->oom_kill_last;
+        crt->oom_kill_last = c;
 
         if (!increased)
                 return 0;
@@ -3525,7 +3977,9 @@ static void unit_add_to_cgroup_oom_queue(Unit *u) {
 
         if (u->in_cgroup_oom_queue)
                 return;
-        if (!u->cgroup_path)
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return;
 
         LIST_PREPEND(cgroup_oom_queue, u->manager->cgroup_oom_queue, u);
@@ -3541,7 +3995,7 @@ static void unit_add_to_cgroup_oom_queue(Unit *u) {
                         return;
                 }
 
-                r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_NORMAL-8);
+                r = sd_event_source_set_priority(s, EVENT_PRIORITY_CGROUP_OOM);
                 if (r < 0) {
                         log_error_errno(r, "Failed to set priority of cgroup oom event source: %m");
                         return;
@@ -3562,11 +4016,16 @@ static int unit_check_cgroup_events(Unit *u) {
 
         assert(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
-        r = cg_get_keyed_attribute_graceful(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events",
-                                            STRV_MAKE("populated", "frozen"), values);
+        r = cg_get_keyed_attribute_graceful(
+                        SYSTEMD_CGROUP_CONTROLLER,
+                        crt->cgroup_path,
+                        "cgroup.events",
+                        STRV_MAKE("populated", "frozen"),
+                        values);
         if (r < 0)
                 return r;
 
@@ -3580,8 +4039,10 @@ static int unit_check_cgroup_events(Unit *u) {
                         unit_add_to_cgroup_empty_queue(u);
         }
 
-        /* Disregard freezer state changes due to operations not initiated by us */
-        if (values[1] && IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_THAWING)) {
+        /* Disregard freezer state changes due to operations not initiated by us.
+         * See: https://github.com/systemd/systemd/pull/13512/files#r416469963 and
+         *      https://github.com/systemd/systemd/pull/13512#issuecomment-573007207 */
+        if (values[1] && IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_FREEZING_BY_PARENT, FREEZER_THAWING)) {
                 if (streq(values[1], "0"))
                         unit_thawed(u);
                 else
@@ -3670,7 +4131,7 @@ static int cg_bpf_mask_supported(CGroupMask *ret) {
                 mask |= CGROUP_MASK_BPF_SOCKET_BIND;
 
         /* BPF-based cgroup_skb/{egress|ingress} hooks */
-        r = restrict_network_interfaces_supported();
+        r = bpf_restrict_ifaces_supported();
         if (r < 0)
                 return r;
         if (r > 0)
@@ -3747,7 +4208,7 @@ int manager_setup_cgroup(Manager *m) {
         /* Schedule cgroup empty checks early, but after having processed service notification messages or
          * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of
          * notification, and we collected the metadata the notification and SIGCHLD stuff offers first. */
-        r = sd_event_source_set_priority(m->cgroup_empty_event_source, SD_EVENT_PRIORITY_NORMAL-5);
+        r = sd_event_source_set_priority(m->cgroup_empty_event_source, EVENT_PRIORITY_CGROUP_EMPTY);
         if (r < 0)
                 return log_error_errno(r, "Failed to set priority of cgroup empty event source: %m");
 
@@ -3776,7 +4237,7 @@ int manager_setup_cgroup(Manager *m) {
                 /* Process cgroup empty notifications early. Note that when this event is dispatched it'll
                  * just add the unit to a cgroup empty queue, hence let's run earlier than that. Also see
                  * handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
-                r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-9);
+                r = sd_event_source_set_priority(m->cgroup_inotify_event_source, EVENT_PRIORITY_CGROUP_INOTIFY);
                 if (r < 0)
                         return log_error_errno(r, "Failed to set priority of inotify event source: %m");
 
@@ -3885,7 +4346,7 @@ Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
         }
 }
 
-Unit *manager_get_unit_by_pidref_cgroup(Manager *m, PidRef *pid) {
+Unit *manager_get_unit_by_pidref_cgroup(Manager *m, const PidRef *pid) {
         _cleanup_free_ char *cgroup = NULL;
 
         assert(m);
@@ -3896,7 +4357,7 @@ Unit *manager_get_unit_by_pidref_cgroup(Manager *m, PidRef *pid) {
         return manager_get_unit_by_cgroup(m, cgroup);
 }
 
-Unit *manager_get_unit_by_pidref_watching(Manager *m, PidRef *pid) {
+Unit *manager_get_unit_by_pidref_watching(Manager *m, const PidRef *pid) {
         Unit *u, **array;
 
         assert(m);
@@ -3915,7 +4376,7 @@ Unit *manager_get_unit_by_pidref_watching(Manager *m, PidRef *pid) {
         return NULL;
 }
 
-Unit *manager_get_unit_by_pidref(Manager *m, PidRef *pid) {
+Unit *manager_get_unit_by_pidref(Manager *m, const PidRef *pid) {
         Unit *u;
 
         assert(m);
@@ -3994,7 +4455,8 @@ int unit_get_memory_available(Unit *u, uint64_t *ret) {
                 if (!unit_context)
                         return -ENODATA;
 
-                if (!u->cgroup_path)
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                if (!crt || !crt->cgroup_path)
                         continue;
 
                 (void) unit_get_memory_current(u, &current);
@@ -4026,21 +4488,22 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
         if (!UNIT_CGROUP_BOOL(u, memory_accounting))
                 return -ENODATA;
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
         if (unit_has_host_root_cgroup(u))
                 return procfs_memory_get_used(ret);
 
-        if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
+        if ((crt->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
                 return -ENODATA;
 
         r = cg_all_unified();
         if (r < 0)
                 return r;
 
-        return cg_get_attribute_as_uint64("memory", u->cgroup_path, r > 0 ? "memory.current" : "memory.usage_in_bytes", ret);
+        return cg_get_attribute_as_uint64("memory", crt->cgroup_path, r > 0 ? "memory.current" : "memory.usage_in_bytes", ret);
 }
 
 int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uint64_t *ret) {
@@ -4063,7 +4526,10 @@ int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uin
         if (!UNIT_CGROUP_BOOL(u, memory_accounting))
                 return -ENODATA;
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -ENODATA;
+        if (!crt->cgroup_path)
                 /* If the cgroup is already gone, we try to find the last cached value. */
                 goto finish;
 
@@ -4071,7 +4537,7 @@ int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uin
         if (unit_has_host_root_cgroup(u))
                 return -ENODATA;
 
-        if (!FLAGS_SET(u->cgroup_realized_mask, CGROUP_MASK_MEMORY))
+        if (!FLAGS_SET(crt->cgroup_realized_mask, CGROUP_MASK_MEMORY))
                 return -ENODATA;
 
         r = cg_all_unified();
@@ -4080,14 +4546,14 @@ int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uin
         if (r == 0)
                 return -ENODATA;
 
-        r = cg_get_attribute_as_uint64("memory", u->cgroup_path, attributes_table[metric], &bytes);
+        r = cg_get_attribute_as_uint64("memory", crt->cgroup_path, attributes_table[metric], &bytes);
         if (r < 0 && r != -ENODATA)
                 return r;
         updated = r >= 0;
 
 finish:
         if (metric <= _CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST) {
-                uint64_t *last = &u->memory_accounting_last[metric];
+                uint64_t *last = &crt->memory_accounting_last[metric];
 
                 if (updated)
                         *last = bytes;
@@ -4112,17 +4578,18 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret) {
         if (!UNIT_CGROUP_BOOL(u, tasks_accounting))
                 return -ENODATA;
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
         if (unit_has_host_root_cgroup(u))
                 return procfs_tasks_get_current(ret);
 
-        if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
+        if ((crt->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
                 return -ENODATA;
 
-        return cg_get_attribute_as_uint64("pids", u->cgroup_path, "pids.current", ret);
+        return cg_get_attribute_as_uint64("pids", crt->cgroup_path, "pids.current", ret);
 }
 
 static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
@@ -4132,7 +4599,8 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
         assert(u);
         assert(ret);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
@@ -4140,7 +4608,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
                 return procfs_cpu_get_usage(ret);
 
         /* Requisite controllers for CPU accounting are not enabled */
-        if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0)
+        if ((get_cpu_accounting_mask() & ~crt->cgroup_realized_mask) != 0)
                 return -ENODATA;
 
         r = cg_all_unified();
@@ -4150,7 +4618,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
                 _cleanup_free_ char *val = NULL;
                 uint64_t us;
 
-                r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
+                r = cg_get_keyed_attribute("cpu", crt->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
                 if (IN_SET(r, -ENOENT, -ENXIO))
                         return -ENODATA;
                 if (r < 0)
@@ -4162,7 +4630,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
 
                 ns = us * NSEC_PER_USEC;
         } else
-                return cg_get_attribute_as_uint64("cpuacct", u->cgroup_path, "cpuacct.usage", ret);
+                return cg_get_attribute_as_uint64("cpuacct", crt->cgroup_path, "cpuacct.usage", ret);
 
         *ret = ns;
         return 0;
@@ -4178,27 +4646,31 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
          * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
          * call this function with a NULL return value. */
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -ENODATA;
+
         if (!UNIT_CGROUP_BOOL(u, cpu_accounting))
                 return -ENODATA;
 
         r = unit_get_cpu_usage_raw(u, &ns);
-        if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
+        if (r == -ENODATA && crt->cpu_usage_last != NSEC_INFINITY) {
                 /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
                  * cached value. */
 
                 if (ret)
-                        *ret = u->cpu_usage_last;
+                        *ret = crt->cpu_usage_last;
                 return 0;
         }
         if (r < 0)
                 return r;
 
-        if (ns > u->cpu_usage_base)
-                ns -= u->cpu_usage_base;
+        if (ns > crt->cpu_usage_base)
+                ns -= crt->cpu_usage_base;
         else
                 ns = 0;
 
-        u->cpu_usage_last = ns;
+        crt->cpu_usage_last = ns;
         if (ret)
                 *ret = ns;
 
@@ -4221,9 +4693,13 @@ int unit_get_ip_accounting(
         if (!UNIT_CGROUP_BOOL(u, ip_accounting))
                 return -ENODATA;
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -ENODATA;
+
         fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
-                u->ip_accounting_ingress_map_fd :
-                u->ip_accounting_egress_map_fd;
+                crt->ip_accounting_ingress_map_fd :
+                crt->ip_accounting_egress_map_fd;
         if (fd < 0)
                 return -ENODATA;
 
@@ -4238,11 +4714,62 @@ int unit_get_ip_accounting(
          * all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
          * ip_accounting_extra[] field, and add them in here transparently. */
 
-        *ret = value + u->ip_accounting_extra[metric];
+        *ret = value + crt->ip_accounting_extra[metric];
 
         return r;
 }
 
+static uint64_t unit_get_effective_limit_one(Unit *u, CGroupLimitType type) {
+        CGroupContext *cc;
+
+        assert(u);
+        assert(UNIT_HAS_CGROUP_CONTEXT(u));
+
+        if (unit_has_name(u, SPECIAL_ROOT_SLICE))
+                switch (type) {
+                        case CGROUP_LIMIT_MEMORY_MAX:
+                        case CGROUP_LIMIT_MEMORY_HIGH:
+                                return physical_memory();
+                        case CGROUP_LIMIT_TASKS_MAX:
+                                return system_tasks_max();
+                        default:
+                                assert_not_reached();
+                }
+
+        cc = ASSERT_PTR(unit_get_cgroup_context(u));
+        switch (type) {
+                /* Note: on legacy/hybrid hierarchies memory_max stays CGROUP_LIMIT_MAX unless configured
+                 * explicitly. Effective value of MemoryLimit= (cgroup v1) is not implemented. */
+                case CGROUP_LIMIT_MEMORY_MAX:
+                        return cc->memory_max;
+                case CGROUP_LIMIT_MEMORY_HIGH:
+                        return cc->memory_high;
+                case CGROUP_LIMIT_TASKS_MAX:
+                        return cgroup_tasks_max_resolve(&cc->tasks_max);
+                default:
+                        assert_not_reached();
+        }
+}
+
+int unit_get_effective_limit(Unit *u, CGroupLimitType type, uint64_t *ret) {
+        uint64_t infimum;
+
+        assert(u);
+        assert(ret);
+        assert(type >= 0);
+        assert(type < _CGROUP_LIMIT_TYPE_MAX);
+
+        if (!UNIT_HAS_CGROUP_CONTEXT(u))
+                return -EINVAL;
+
+        infimum = unit_get_effective_limit_one(u, type);
+        for (Unit *slice = UNIT_GET_SLICE(u); slice; slice = UNIT_GET_SLICE(slice))
+                infimum = MIN(infimum, unit_get_effective_limit_one(slice, type));
+
+        *ret = infimum;
+        return 0;
+}
+
 static int unit_get_io_accounting_raw(Unit *u, uint64_t ret[static _CGROUP_IO_ACCOUNTING_METRIC_MAX]) {
         static const char *const field_names[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
                 [CGROUP_IO_READ_BYTES]       = "rbytes=",
@@ -4257,7 +4784,8 @@ static int unit_get_io_accounting_raw(Unit *u, uint64_t ret[static _CGROUP_IO_AC
 
         assert(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
         if (unit_has_host_root_cgroup(u))
@@ -4266,13 +4794,13 @@ static int unit_get_io_accounting_raw(Unit *u, uint64_t ret[static _CGROUP_IO_AC
         r = cg_all_unified();
         if (r < 0)
                 return r;
-        if (r == 0) /* TODO: support cgroupv1 */
+        if (r == 0)
                 return -ENODATA;
 
-        if (!FLAGS_SET(u->cgroup_realized_mask, CGROUP_MASK_IO))
+        if (!FLAGS_SET(crt->cgroup_realized_mask, CGROUP_MASK_IO))
                 return -ENODATA;
 
-        r = cg_get_path("io", u->cgroup_path, "io.stat", &path);
+        r = cg_get_path("io", crt->cgroup_path, "io.stat", &path);
         if (r < 0)
                 return r;
 
@@ -4340,26 +4868,30 @@ int unit_get_io_accounting(
         if (!UNIT_CGROUP_BOOL(u, io_accounting))
                 return -ENODATA;
 
-        if (allow_cache && u->io_accounting_last[metric] != UINT64_MAX)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -ENODATA;
+
+        if (allow_cache && crt->io_accounting_last[metric] != UINT64_MAX)
                 goto done;
 
         r = unit_get_io_accounting_raw(u, raw);
-        if (r == -ENODATA && u->io_accounting_last[metric] != UINT64_MAX)
+        if (r == -ENODATA && crt->io_accounting_last[metric] != UINT64_MAX)
                 goto done;
         if (r < 0)
                 return r;
 
         for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++) {
                 /* Saturated subtraction */
-                if (raw[i] > u->io_accounting_base[i])
-                        u->io_accounting_last[i] = raw[i] - u->io_accounting_base[i];
+                if (raw[i] > crt->io_accounting_base[i])
+                        crt->io_accounting_last[i] = raw[i] - crt->io_accounting_base[i];
                 else
-                        u->io_accounting_last[i] = 0;
+                        crt->io_accounting_last[i] = 0;
         }
 
 done:
         if (ret)
-                *ret = u->io_accounting_last[metric];
+                *ret = crt->io_accounting_last[metric];
 
         return 0;
 }
@@ -4369,11 +4901,15 @@ int unit_reset_cpu_accounting(Unit *u) {
 
         assert(u);
 
-        u->cpu_usage_last = NSEC_INFINITY;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return 0;
+
+        crt->cpu_usage_last = NSEC_INFINITY;
 
-        r = unit_get_cpu_usage_raw(u, &u->cpu_usage_base);
+        r = unit_get_cpu_usage_raw(u, &crt->cpu_usage_base);
         if (r < 0) {
-                u->cpu_usage_base = 0;
+                crt->cpu_usage_base = 0;
                 return r;
         }
 
@@ -4383,7 +4919,11 @@ int unit_reset_cpu_accounting(Unit *u) {
 void unit_reset_memory_accounting_last(Unit *u) {
         assert(u);
 
-        FOREACH_ARRAY(i, u->memory_accounting_last, ELEMENTSOF(u->memory_accounting_last))
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
+        FOREACH_ELEMENT(i, crt->memory_accounting_last)
                 *i = UINT64_MAX;
 }
 
@@ -4392,13 +4932,17 @@ int unit_reset_ip_accounting(Unit *u) {
 
         assert(u);
 
-        if (u->ip_accounting_ingress_map_fd >= 0)
-                RET_GATHER(r, bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd));
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return 0;
+
+        if (crt->ip_accounting_ingress_map_fd >= 0)
+                RET_GATHER(r, bpf_firewall_reset_accounting(crt->ip_accounting_ingress_map_fd));
 
-        if (u->ip_accounting_egress_map_fd >= 0)
-                RET_GATHER(r, bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd));
+        if (crt->ip_accounting_egress_map_fd >= 0)
+                RET_GATHER(r, bpf_firewall_reset_accounting(crt->ip_accounting_egress_map_fd));
 
-        zero(u->ip_accounting_extra);
+        zero(crt->ip_accounting_extra);
 
         return r;
 }
@@ -4406,7 +4950,11 @@ int unit_reset_ip_accounting(Unit *u) {
 void unit_reset_io_accounting_last(Unit *u) {
         assert(u);
 
-        FOREACH_ARRAY(i, u->io_accounting_last, _CGROUP_IO_ACCOUNTING_METRIC_MAX)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
+        FOREACH_ARRAY(i, crt->io_accounting_last, _CGROUP_IO_ACCOUNTING_METRIC_MAX)
                 *i = UINT64_MAX;
 }
 
@@ -4415,11 +4963,15 @@ int unit_reset_io_accounting(Unit *u) {
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return 0;
+
         unit_reset_io_accounting_last(u);
 
-        r = unit_get_io_accounting_raw(u, u->io_accounting_base);
+        r = unit_get_io_accounting_raw(u, crt->io_accounting_base);
         if (r < 0) {
-                zero(u->io_accounting_base);
+                zero(crt->io_accounting_base);
                 return r;
         }
 
@@ -4445,6 +4997,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
         if (!UNIT_HAS_CGROUP_CONTEXT(u))
                 return;
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
         if (m == 0)
                 return;
 
@@ -4455,10 +5011,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
         if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
                 m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
 
-        if (FLAGS_SET(u->cgroup_invalidated_mask, m)) /* NOP? */
+        if (FLAGS_SET(crt->cgroup_invalidated_mask, m)) /* NOP? */
                 return;
 
-        u->cgroup_invalidated_mask |= m;
+        crt->cgroup_invalidated_mask |= m;
         unit_add_to_cgroup_realize_queue(u);
 }
 
@@ -4468,10 +5024,14 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
         if (!UNIT_HAS_CGROUP_CONTEXT(u))
                 return;
 
-        if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
+        if (crt->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
                 return;
 
-        u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
+        crt->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
         unit_add_to_cgroup_realize_queue(u);
 
         /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
@@ -4523,66 +5083,102 @@ void manager_invalidate_startup_units(Manager *m) {
                 unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_BLKIO|CGROUP_MASK_CPUSET);
 }
 
+static int unit_cgroup_freezer_kernel_state(Unit *u, FreezerState *ret) {
+        _cleanup_free_ char *val = NULL;
+        FreezerState s;
+        int r;
+
+        assert(u);
+        assert(ret);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
+
+        r = cg_get_keyed_attribute(
+                        SYSTEMD_CGROUP_CONTROLLER,
+                        crt->cgroup_path,
+                        "cgroup.events",
+                        STRV_MAKE("frozen"),
+                        &val);
+        if (IN_SET(r, -ENOENT, -ENXIO))
+                return -ENODATA;
+        if (r < 0)
+                return r;
+
+        if (streq(val, "0"))
+                s = FREEZER_RUNNING;
+        else if (streq(val, "1"))
+                s = FREEZER_FROZEN;
+        else {
+                log_unit_debug(u, "Unexpected cgroup frozen state: %s", val);
+                s = _FREEZER_STATE_INVALID;
+        }
+
+        *ret = s;
+        return 0;
+}
+
 int unit_cgroup_freezer_action(Unit *u, FreezerAction action) {
         _cleanup_free_ char *path = NULL;
-        FreezerState target, kernel = _FREEZER_STATE_INVALID;
-        int r, ret;
+        FreezerState target, current, next;
+        int r;
 
         assert(u);
-        assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW));
+        assert(IN_SET(action, FREEZER_FREEZE, FREEZER_PARENT_FREEZE,
+                              FREEZER_THAW, FREEZER_PARENT_THAW));
 
         if (!cg_freezer_supported())
                 return 0;
 
-        /* Ignore all requests to thaw init.scope or -.slice and reject all requests to freeze them */
-        if (unit_has_name(u, SPECIAL_ROOT_SLICE) || unit_has_name(u, SPECIAL_INIT_SCOPE))
-                return action == FREEZER_FREEZE ? -EPERM : 0;
-
-        if (!u->cgroup_realized)
-                return -EBUSY;
-
-        if (action == FREEZER_THAW) {
-                Unit *slice = UNIT_GET_SLICE(u);
+        unit_next_freezer_state(u, action, &next, &target);
 
-                if (slice) {
-                        r = unit_cgroup_freezer_action(slice, FREEZER_THAW);
-                        if (r < 0)
-                                return log_unit_error_errno(u, r, "Failed to thaw slice %s of unit: %m", slice->id);
-                }
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_realized) {
+                /* No realized cgroup = nothing to freeze */
+                u->freezer_state = freezer_state_finish(next);
+                return 0;
         }
 
-        target = action == FREEZER_FREEZE ? FREEZER_FROZEN : FREEZER_RUNNING;
-
-        r = unit_freezer_state_kernel(u, &kernel);
+        r = unit_cgroup_freezer_kernel_state(u, &current);
         if (r < 0)
-                log_unit_debug_errno(u, r, "Failed to obtain cgroup freezer state: %m");
+                return r;
 
-        if (target == kernel) {
-                u->freezer_state = target;
-                if (action == FREEZER_FREEZE)
-                        return 0;
-                ret = 0;
-        } else
-                ret = 1;
+        if (current == target)
+                next = freezer_state_finish(next);
+        else if (IN_SET(next, FREEZER_FROZEN, FREEZER_FROZEN_BY_PARENT, FREEZER_RUNNING)) {
+                /* We're transitioning into a finished state, which implies that the cgroup's
+                 * current state already matches the target and thus we'd return 0. But, reality
+                 * shows otherwise. This indicates that our freezer_state tracking has diverged
+                 * from the real state of the cgroup, which can happen if someone meddles with the
+                 * cgroup from underneath us. This really shouldn't happen during normal operation,
+                 * though. So, let's warn about it and fix up the state to be valid */
+
+                log_unit_warning(u, "Unit wants to transition to %s freezer state but cgroup is unexpectedly %s, fixing up.",
+                                 freezer_state_to_string(next), freezer_state_to_string(current) ?: "(invalid)");
+
+                if (next == FREEZER_FROZEN)
+                        next = FREEZER_FREEZING;
+                else if (next == FREEZER_FROZEN_BY_PARENT)
+                        next = FREEZER_FREEZING_BY_PARENT;
+                else if (next == FREEZER_RUNNING)
+                        next = FREEZER_THAWING;
+        }
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.freeze", &path);
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, "cgroup.freeze", &path);
         if (r < 0)
                 return r;
 
-        log_unit_debug(u, "%s unit.", action == FREEZER_FREEZE ? "Freezing" : "Thawing");
-
-        if (target != kernel) {
-                if (action == FREEZER_FREEZE)
-                        u->freezer_state = FREEZER_FREEZING;
-                else
-                        u->freezer_state = FREEZER_THAWING;
-        }
+        log_unit_debug(u, "Unit freezer state was %s, now %s.",
+                       freezer_state_to_string(u->freezer_state),
+                       freezer_state_to_string(next));
 
-        r = write_string_file(path, one_zero(action == FREEZER_FREEZE), WRITE_STRING_FILE_DISABLE_BUFFER);
+        r = write_string_file(path, one_zero(target == FREEZER_FROZEN), WRITE_STRING_FILE_DISABLE_BUFFER);
         if (r < 0)
                 return r;
 
-        return ret;
+        u->freezer_state = next;
+        return target != current;
 }
 
 int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
@@ -4592,10 +5188,11 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
         assert(u);
         assert(cpus);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
-        if ((u->cgroup_realized_mask & CGROUP_MASK_CPUSET) == 0)
+        if ((crt->cgroup_realized_mask & CGROUP_MASK_CPUSET) == 0)
                 return -ENODATA;
 
         r = cg_all_unified();
@@ -4604,7 +5201,7 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
         if (r == 0)
                 return -ENODATA;
 
-        r = cg_get_attribute("cpuset", u->cgroup_path, name, &v);
+        r = cg_get_attribute("cpuset", crt->cgroup_path, name, &v);
         if (r == -ENOENT)
                 return -ENODATA;
         if (r < 0)
@@ -4613,6 +5210,422 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
         return parse_cpu_set_full(v, cpus, false, NULL, NULL, 0, NULL);
 }
 
+CGroupRuntime *cgroup_runtime_new(void) {
+        _cleanup_(cgroup_runtime_freep) CGroupRuntime *crt = NULL;
+
+        crt = new(CGroupRuntime, 1);
+        if (!crt)
+                return NULL;
+
+        *crt = (CGroupRuntime) {
+                .cpu_usage_last = NSEC_INFINITY,
+
+                .cgroup_control_inotify_wd = -1,
+                .cgroup_memory_inotify_wd = -1,
+
+                .ip_accounting_ingress_map_fd = -EBADF,
+                .ip_accounting_egress_map_fd = -EBADF,
+
+                .ipv4_allow_map_fd = -EBADF,
+                .ipv6_allow_map_fd = -EBADF,
+                .ipv4_deny_map_fd = -EBADF,
+                .ipv6_deny_map_fd = -EBADF,
+
+                .cgroup_invalidated_mask = _CGROUP_MASK_ALL,
+        };
+
+        FOREACH_ELEMENT(i, crt->memory_accounting_last)
+                *i = UINT64_MAX;
+        FOREACH_ELEMENT(i, crt->io_accounting_base)
+                *i = UINT64_MAX;
+        FOREACH_ELEMENT(i, crt->io_accounting_last)
+                *i = UINT64_MAX;
+        FOREACH_ELEMENT(i, crt->ip_accounting_extra)
+                *i = UINT64_MAX;
+
+        return TAKE_PTR(crt);
+}
+
+CGroupRuntime *cgroup_runtime_free(CGroupRuntime *crt) {
+        if (!crt)
+                return NULL;
+
+        fdset_free(crt->initial_socket_bind_link_fds);
+#if BPF_FRAMEWORK
+        bpf_link_free(crt->ipv4_socket_bind_link);
+        bpf_link_free(crt->ipv6_socket_bind_link);
+#endif
+        hashmap_free(crt->bpf_foreign_by_key);
+
+        bpf_program_free(crt->bpf_device_control_installed);
+
+#if BPF_FRAMEWORK
+        bpf_link_free(crt->restrict_ifaces_ingress_bpf_link);
+        bpf_link_free(crt->restrict_ifaces_egress_bpf_link);
+#endif
+        fdset_free(crt->initial_restrict_ifaces_link_fds);
+
+        safe_close(crt->ipv4_allow_map_fd);
+        safe_close(crt->ipv6_allow_map_fd);
+        safe_close(crt->ipv4_deny_map_fd);
+        safe_close(crt->ipv6_deny_map_fd);
+
+        bpf_program_free(crt->ip_bpf_ingress);
+        bpf_program_free(crt->ip_bpf_ingress_installed);
+        bpf_program_free(crt->ip_bpf_egress);
+        bpf_program_free(crt->ip_bpf_egress_installed);
+
+        set_free(crt->ip_bpf_custom_ingress);
+        set_free(crt->ip_bpf_custom_ingress_installed);
+        set_free(crt->ip_bpf_custom_egress);
+        set_free(crt->ip_bpf_custom_egress_installed);
+
+        free(crt->cgroup_path);
+
+        return mfree(crt);
+}
+
+static const char* const ip_accounting_metric_field_table[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+        [CGROUP_IP_INGRESS_BYTES]   = "ip-accounting-ingress-bytes",
+        [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
+        [CGROUP_IP_EGRESS_BYTES]    = "ip-accounting-egress-bytes",
+        [CGROUP_IP_EGRESS_PACKETS]  = "ip-accounting-egress-packets",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(ip_accounting_metric_field, CGroupIPAccountingMetric);
+
+static const char* const io_accounting_metric_field_base_table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+        [CGROUP_IO_READ_BYTES]       = "io-accounting-read-bytes-base",
+        [CGROUP_IO_WRITE_BYTES]      = "io-accounting-write-bytes-base",
+        [CGROUP_IO_READ_OPERATIONS]  = "io-accounting-read-operations-base",
+        [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-base",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(io_accounting_metric_field_base, CGroupIOAccountingMetric);
+
+static const char* const io_accounting_metric_field_last_table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+        [CGROUP_IO_READ_BYTES]       = "io-accounting-read-bytes-last",
+        [CGROUP_IO_WRITE_BYTES]      = "io-accounting-write-bytes-last",
+        [CGROUP_IO_READ_OPERATIONS]  = "io-accounting-read-operations-last",
+        [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-last",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(io_accounting_metric_field_last, CGroupIOAccountingMetric);
+
+static const char* const memory_accounting_metric_field_last_table[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1] = {
+        [CGROUP_MEMORY_PEAK]      = "memory-accounting-peak",
+        [CGROUP_MEMORY_SWAP_PEAK] = "memory-accounting-swap-peak",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(memory_accounting_metric_field_last, CGroupMemoryAccountingMetric);
+
+static int serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) {
+        _cleanup_free_ char *s = NULL;
+        int r;
+
+        assert(f);
+        assert(key);
+
+        if (mask == 0)
+                return 0;
+
+        r = cg_mask_to_string(mask, &s);
+        if (r < 0)
+                return log_error_errno(r, "Failed to format cgroup mask: %m");
+
+        return serialize_item(f, key, s);
+}
+
+int cgroup_runtime_serialize(Unit *u, FILE *f, FDSet *fds) {
+        int r;
+
+        assert(u);
+        assert(f);
+        assert(fds);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        (void) serialize_item_format(f, "cpu-usage-base", "%" PRIu64, crt->cpu_usage_base);
+        if (crt->cpu_usage_last != NSEC_INFINITY)
+                (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, crt->cpu_usage_last);
+
+        if (crt->managed_oom_kill_last > 0)
+                (void) serialize_item_format(f, "managed-oom-kill-last", "%" PRIu64, crt->managed_oom_kill_last);
+
+        if (crt->oom_kill_last > 0)
+                (void) serialize_item_format(f, "oom-kill-last", "%" PRIu64, crt->oom_kill_last);
+
+        for (CGroupMemoryAccountingMetric metric = 0; metric <= _CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST; metric++) {
+                uint64_t v;
+
+                r = unit_get_memory_accounting(u, metric, &v);
+                if (r >= 0)
+                        (void) serialize_item_format(f, memory_accounting_metric_field_last_to_string(metric), "%" PRIu64, v);
+        }
+
+        for (CGroupIPAccountingMetric m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+                uint64_t v;
+
+                r = unit_get_ip_accounting(u, m, &v);
+                if (r >= 0)
+                        (void) serialize_item_format(f, ip_accounting_metric_field_to_string(m), "%" PRIu64, v);
+        }
+
+        for (CGroupIOAccountingMetric im = 0; im < _CGROUP_IO_ACCOUNTING_METRIC_MAX; im++) {
+                (void) serialize_item_format(f, io_accounting_metric_field_base_to_string(im), "%" PRIu64, crt->io_accounting_base[im]);
+
+                if (crt->io_accounting_last[im] != UINT64_MAX)
+                        (void) serialize_item_format(f, io_accounting_metric_field_last_to_string(im), "%" PRIu64, crt->io_accounting_last[im]);
+        }
+
+        if (crt->cgroup_path)
+                (void) serialize_item(f, "cgroup", crt->cgroup_path);
+        if (crt->cgroup_id != 0)
+                (void) serialize_item_format(f, "cgroup-id", "%" PRIu64, crt->cgroup_id);
+
+        (void) serialize_bool(f, "cgroup-realized", crt->cgroup_realized);
+        (void) serialize_cgroup_mask(f, "cgroup-realized-mask", crt->cgroup_realized_mask);
+        (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", crt->cgroup_enabled_mask);
+        (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", crt->cgroup_invalidated_mask);
+
+        (void) bpf_socket_bind_serialize(u, f, fds);
+
+        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-ingress-installed", crt->ip_bpf_ingress_installed);
+        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-egress-installed", crt->ip_bpf_egress_installed);
+        (void) bpf_program_serialize_attachment(f, fds, "bpf-device-control-installed", crt->bpf_device_control_installed);
+        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-ingress-installed", crt->ip_bpf_custom_ingress_installed);
+        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-egress-installed", crt->ip_bpf_custom_egress_installed);
+
+        (void) bpf_restrict_ifaces_serialize(u, f, fds);
+
+        return 0;
+}
+
+#define MATCH_DESERIALIZE(u, key, l, v, parse_func, target)             \
+        ({                                                              \
+                bool _deserialize_matched = streq(l, key);              \
+                if (_deserialize_matched) {                             \
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u); \
+                        if (!crt)                                       \
+                                log_oom_debug();                        \
+                        else {                                          \
+                                int _deserialize_r = parse_func(v);     \
+                                if (_deserialize_r < 0)                 \
+                                        log_unit_debug_errno(u, _deserialize_r, \
+                                                             "Failed to parse \"%s=%s\", ignoring.", l, v); \
+                                else                                    \
+                                        crt->target = _deserialize_r; \
+                        }                                               \
+                }                                                       \
+                _deserialize_matched;                                   \
+        })
+
+#define MATCH_DESERIALIZE_IMMEDIATE(u, key, l, v, parse_func, target)   \
+        ({                                                              \
+                 bool _deserialize_matched = streq(l, key);             \
+                 if (_deserialize_matched) {                            \
+                         CGroupRuntime *crt = unit_setup_cgroup_runtime(u); \
+                         if (!crt)                                      \
+                                 log_oom_debug();                       \
+                         else {                                         \
+                                 int _deserialize_r = parse_func(v, &crt->target); \
+                                 if (_deserialize_r < 0)                \
+                                         log_unit_debug_errno(u, _deserialize_r, \
+                                                              "Failed to parse \"%s=%s\", ignoring", l, v); \
+                         }                                              \
+                 }                                                      \
+                _deserialize_matched;                                   \
+        })
+
+#define MATCH_DESERIALIZE_METRIC(u, key, l, v, parse_func, target)             \
+        ({                                                              \
+                bool _deserialize_matched = streq(l, key);              \
+                if (_deserialize_matched) {                             \
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u); \
+                        if (!crt)                                       \
+                                log_oom_debug();                        \
+                        else {                                          \
+                                int _deserialize_r = parse_func(v);     \
+                                if (_deserialize_r < 0)                 \
+                                        log_unit_debug_errno(u, _deserialize_r, \
+                                                             "Failed to parse \"%s=%s\", ignoring.", l, v); \
+                                else                                    \
+                                        crt->target = _deserialize_r; \
+                        }                                               \
+                }                                                       \
+                _deserialize_matched;                                   \
+        })
+
+int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value, FDSet *fds) {
+        int r;
+
+        assert(u);
+        assert(value);
+
+        if (!UNIT_HAS_CGROUP_CONTEXT(u))
+                return 0;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cpu-usage-base", key, value, safe_atou64, cpu_usage_base) ||
+            MATCH_DESERIALIZE_IMMEDIATE(u, "cpuacct-usage-base", key, value, safe_atou64, cpu_usage_base))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cpu-usage-last", key, value, safe_atou64, cpu_usage_last))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "managed-oom-kill-last", key, value, safe_atou64, managed_oom_kill_last))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "oom-kill-last", key, value, safe_atou64, oom_kill_last))
+                return 1;
+
+        if (streq(key, "cgroup")) {
+                r = unit_set_cgroup_path(u, value);
+                if (r < 0)
+                        log_unit_debug_errno(u, r, "Failed to set cgroup path %s, ignoring: %m", value);
+
+                (void) unit_watch_cgroup(u);
+                (void) unit_watch_cgroup_memory(u);
+                return 1;
+        }
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-id", key, value, safe_atou64, cgroup_id))
+                return 1;
+
+        if (MATCH_DESERIALIZE(u, "cgroup-realized", key, value, parse_boolean, cgroup_realized))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-realized-mask", key, value, cg_mask_from_string, cgroup_realized_mask))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-enabled-mask", key, value, cg_mask_from_string, cgroup_enabled_mask))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-invalidated-mask", key, value, cg_mask_from_string, cgroup_invalidated_mask))
+                return 1;
+
+        if (STR_IN_SET(key, "ipv4-socket-bind-bpf-link-fd", "ipv6-socket-bind-bpf-link-fd")) {
+                int fd;
+
+                fd = deserialize_fd(fds, value);
+                if (fd >= 0)
+                        (void) bpf_socket_bind_add_initial_link_fd(u, fd);
+
+                return 1;
+        }
+
+        if (STR_IN_SET(key,
+                       "ip-bpf-ingress-installed", "ip-bpf-egress-installed",
+                       "bpf-device-control-installed",
+                       "ip-bpf-custom-ingress-installed", "ip-bpf-custom-egress-installed")) {
+
+                CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                if (!crt)
+                        log_oom_debug();
+                else {
+                        if (streq(key, "ip-bpf-ingress-installed"))
+                                (void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_ingress_installed);
+
+                        if (streq(key, "ip-bpf-egress-installed"))
+                                (void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_egress_installed);
+
+                        if (streq(key, "bpf-device-control-installed"))
+                                (void) bpf_program_deserialize_attachment(value, fds, &crt->bpf_device_control_installed);
+
+                        if (streq(key, "ip-bpf-custom-ingress-installed"))
+                                (void) bpf_program_deserialize_attachment_set(value, fds, &crt->ip_bpf_custom_ingress_installed);
+
+                        if (streq(key, "ip-bpf-custom-egress-installed"))
+                                (void) bpf_program_deserialize_attachment_set(value, fds, &crt->ip_bpf_custom_egress_installed);
+                }
+
+                return 1;
+        }
+
+        if (streq(key, "restrict-ifaces-bpf-fd")) {
+                int fd;
+
+                fd = deserialize_fd(fds, value);
+                if (fd >= 0)
+                        (void) bpf_restrict_ifaces_add_initial_link_fd(u, fd);
+                return 1;
+        }
+
+        CGroupMemoryAccountingMetric mm = memory_accounting_metric_field_last_from_string(key);
+        if (mm >= 0) {
+                uint64_t c;
+
+                r = safe_atou64(value, &c);
+                if (r < 0)
+                        log_unit_debug(u, "Failed to parse memory accounting last value %s, ignoring.", value);
+                else {
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                        if (!crt)
+                                log_oom_debug();
+                        else
+                                crt->memory_accounting_last[mm] = c;
+                }
+
+                return 1;
+        }
+
+        CGroupIPAccountingMetric ipm = ip_accounting_metric_field_from_string(key);
+        if (ipm >= 0) {
+                uint64_t c;
+
+                r = safe_atou64(value, &c);
+                if (r < 0)
+                        log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", value);
+                else {
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                        if (!crt)
+                                log_oom_debug();
+                        else
+                                crt->ip_accounting_extra[ipm] = c;
+                }
+
+                return 1;
+        }
+
+        CGroupIOAccountingMetric iom = io_accounting_metric_field_base_from_string(key);
+        if (iom >= 0) {
+                uint64_t c;
+
+                r = safe_atou64(value, &c);
+                if (r < 0)
+                        log_unit_debug(u, "Failed to parse IO accounting base value %s, ignoring.", value);
+                else {
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                        if (!crt)
+                                log_oom_debug();
+                        else
+                                crt->io_accounting_base[iom] = c;
+                }
+
+                return 1;
+        }
+
+        iom = io_accounting_metric_field_last_from_string(key);
+        if (iom >= 0) {
+                uint64_t c;
+
+                r = safe_atou64(value, &c);
+                if (r < 0)
+                        log_unit_debug(u, "Failed to parse IO accounting last value %s, ignoring.", value);
+                else {
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                        if (!crt)
+                                log_oom_debug();
+                        else
+                                crt->io_accounting_last[iom] = c;
+                }
+                return 1;
+        }
+
+        return 0;
+}
+
 static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
         [CGROUP_DEVICE_POLICY_AUTO]   = "auto",
         [CGROUP_DEVICE_POLICY_CLOSED] = "closed",
@@ -4621,17 +5634,10 @@ static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] =
 
 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);
 
-static const char* const freezer_action_table[_FREEZER_ACTION_MAX] = {
-        [FREEZER_FREEZE] = "freeze",
-        [FREEZER_THAW] = "thaw",
-};
-
-DEFINE_STRING_TABLE_LOOKUP(freezer_action, FreezerAction);
-
 static const char* const cgroup_pressure_watch_table[_CGROUP_PRESSURE_WATCH_MAX] = {
-        [CGROUP_PRESSURE_WATCH_OFF] = "off",
+        [CGROUP_PRESSURE_WATCH_OFF]  = "off",
         [CGROUP_PRESSURE_WATCH_AUTO] = "auto",
-        [CGROUP_PRESSURE_WATCH_ON] = "on",
+        [CGROUP_PRESSURE_WATCH_ON]   = "on",
         [CGROUP_PRESSURE_WATCH_SKIP] = "skip",
 };
 
@@ -4663,3 +5669,11 @@ static const char* const cgroup_memory_accounting_metric_table[_CGROUP_MEMORY_AC
 };
 
 DEFINE_STRING_TABLE_LOOKUP(cgroup_memory_accounting_metric, CGroupMemoryAccountingMetric);
+
+static const char *const cgroup_effective_limit_type_table[_CGROUP_LIMIT_TYPE_MAX] = {
+        [CGROUP_LIMIT_MEMORY_MAX]  = "EffectiveMemoryMax",
+        [CGROUP_LIMIT_MEMORY_HIGH] = "EffectiveMemoryHigh",
+        [CGROUP_LIMIT_TASKS_MAX]   = "EffectiveTasksMax",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(cgroup_effective_limit_type, CGroupLimitType);
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index f1b674b..72fe275 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -3,7 +3,10 @@
 
 #include <stdbool.h>
 
-#include "bpf-lsm.h"
+#include "sd-event.h"
+
+#include "bpf-program.h"
+#include "bpf-restrict-fs.h"
 #include "cgroup-util.h"
 #include "cpu-set-util.h"
 #include "firewall-util.h"
@@ -35,6 +38,7 @@ typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
 typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
 typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram;
 typedef struct CGroupSocketBindItem CGroupSocketBindItem;
+typedef struct CGroupRuntime CGroupRuntime;
 
 typedef enum CGroupDevicePolicy {
         /* When devices listed, will allow those, plus built-in ones, if none are listed will allow
@@ -53,7 +57,9 @@ typedef enum CGroupDevicePolicy {
 
 typedef enum FreezerAction {
         FREEZER_FREEZE,
+        FREEZER_PARENT_FREEZE,
         FREEZER_THAW,
+        FREEZER_PARENT_THAW,
 
         _FREEZER_ACTION_MAX,
         _FREEZER_ACTION_INVALID = -EINVAL,
@@ -129,6 +135,9 @@ typedef enum CGroupPressureWatch {
         _CGROUP_PRESSURE_WATCH_INVALID = -EINVAL,
 } CGroupPressureWatch;
 
+/* The user-supplied cgroup-related configuration options. This remains mostly immutable while the service
+ * manager is running (except for an occasional SetProperty() configuration change), outside of reload
+ * cycles. When adding members make sure to update cgroup_context_copy() accordingly. */
 struct CGroupContext {
         bool cpu_accounting;
         bool io_accounting;
@@ -188,6 +197,8 @@ struct CGroupContext {
         bool startup_memory_swap_max_set:1;
         bool startup_memory_zswap_max_set:1;
 
+        bool memory_zswap_writeback;
+
         Set *ip_address_allow;
         Set *ip_address_deny;
         /* These two flags indicate that redundant entries have been removed from
@@ -276,6 +287,95 @@ typedef enum CGroupMemoryAccountingMetric {
         _CGROUP_MEMORY_ACCOUNTING_METRIC_INVALID = -EINVAL,
 } CGroupMemoryAccountingMetric;
 
+/* Used for limits whose value sets have infimum */
+typedef enum CGroupLimitType {
+        CGROUP_LIMIT_MEMORY_MAX,
+        CGROUP_LIMIT_MEMORY_HIGH,
+        CGROUP_LIMIT_TASKS_MAX,
+        _CGROUP_LIMIT_TYPE_MAX,
+        _CGROUP_LIMIT_INVALID = -EINVAL,
+} CGroupLimitType;
+
+/* The dynamic, regular updated information about a unit that as a realized cgroup. This is only allocated when a unit is first realized */
+typedef struct CGroupRuntime {
+        /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+        nsec_t cpu_usage_base;
+        nsec_t cpu_usage_last; /* the most recently read value */
+
+        /* Most recently read value of memory accounting metrics */
+        uint64_t memory_accounting_last[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1];
+
+        /* The current counter of OOM kills initiated by systemd-oomd */
+        uint64_t managed_oom_kill_last;
+
+        /* The current counter of the oom_kill field in the memory.events cgroup attribute */
+        uint64_t oom_kill_last;
+
+        /* Where the io.stat data was at the time the unit was started */
+        uint64_t io_accounting_base[_CGROUP_IO_ACCOUNTING_METRIC_MAX];
+        uint64_t io_accounting_last[_CGROUP_IO_ACCOUNTING_METRIC_MAX]; /* the most recently read value */
+
+        /* Counterparts in the cgroup filesystem */
+        char *cgroup_path;
+        uint64_t cgroup_id;
+        CGroupMask cgroup_realized_mask;           /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */
+        CGroupMask cgroup_enabled_mask;            /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */
+        CGroupMask cgroup_invalidated_mask;        /* A mask specifying controllers which shall be considered invalidated, and require re-realization */
+        CGroupMask cgroup_members_mask;            /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */
+
+        /* Inotify watch descriptors for watching cgroup.events and memory.events on cgroupv2 */
+        int cgroup_control_inotify_wd;
+        int cgroup_memory_inotify_wd;
+
+        /* Device Controller BPF program */
+        BPFProgram *bpf_device_control_installed;
+
+        /* IP BPF Firewalling/accounting */
+        int ip_accounting_ingress_map_fd;
+        int ip_accounting_egress_map_fd;
+        uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
+
+        int ipv4_allow_map_fd;
+        int ipv6_allow_map_fd;
+        int ipv4_deny_map_fd;
+        int ipv6_deny_map_fd;
+        BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
+        BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+
+        Set *ip_bpf_custom_ingress;
+        Set *ip_bpf_custom_ingress_installed;
+        Set *ip_bpf_custom_egress;
+        Set *ip_bpf_custom_egress_installed;
+
+        /* BPF programs managed (e.g. loaded to kernel) by an entity external to systemd,
+         * attached to unit cgroup by provided program fd and attach type. */
+        Hashmap *bpf_foreign_by_key;
+
+        FDSet *initial_socket_bind_link_fds;
+#if BPF_FRAMEWORK
+        /* BPF links to BPF programs attached to cgroup/bind{4|6} hooks and
+         * responsible for allowing or denying a unit to bind(2) to a socket
+         * address. */
+        struct bpf_link *ipv4_socket_bind_link;
+        struct bpf_link *ipv6_socket_bind_link;
+#endif
+
+        FDSet *initial_restrict_ifaces_link_fds;
+#if BPF_FRAMEWORK
+        struct bpf_link *restrict_ifaces_ingress_bpf_link;
+        struct bpf_link *restrict_ifaces_egress_bpf_link;
+#endif
+
+        bool cgroup_realized:1;
+        bool cgroup_members_mask_valid:1;
+
+        /* Reset cgroup accounting next time we fork something off */
+        bool reset_accounting:1;
+
+        /* Whether we warned about clamping the CPU quota period */
+        bool warned_clamping_cpu_quota_period:1;
+} CGroupRuntime;
+
 typedef struct Unit Unit;
 typedef struct Manager Manager;
 typedef enum ManagerState ManagerState;
@@ -285,6 +385,7 @@ uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state);
 usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period);
 
 void cgroup_context_init(CGroupContext *c);
+int cgroup_context_copy(CGroupContext *dst, const CGroupContext *src);
 void cgroup_context_done(CGroupContext *c);
 void cgroup_context_dump(Unit *u, FILE* f, const char *prefix);
 void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem *item, FILE *f);
@@ -309,6 +410,17 @@ static inline bool cgroup_context_want_memory_pressure(const CGroupContext *c) {
 int cgroup_context_add_device_allow(CGroupContext *c, const char *dev, CGroupDevicePermissions p);
 int cgroup_context_add_or_update_device_allow(CGroupContext *c, const char *dev, CGroupDevicePermissions p);
 int cgroup_context_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path);
+static inline int cgroup_context_add_bpf_foreign_program_dup(CGroupContext *c, const CGroupBPFForeignProgram *p) {
+        return cgroup_context_add_bpf_foreign_program(c, p->attach_type, p->bpffs_path);
+}
+int cgroup_context_add_io_device_limit_dup(CGroupContext *c, const CGroupIODeviceLimit *l);
+int cgroup_context_add_io_device_weight_dup(CGroupContext *c, const CGroupIODeviceWeight *w);
+int cgroup_context_add_io_device_latency_dup(CGroupContext *c, const CGroupIODeviceLatency *l);
+int cgroup_context_add_block_io_device_weight_dup(CGroupContext *c, const CGroupBlockIODeviceWeight *w);
+int cgroup_context_add_block_io_device_bandwidth_dup(CGroupContext *c, const CGroupBlockIODeviceBandwidth *b);
+int cgroup_context_add_device_allow_dup(CGroupContext *c, const CGroupDeviceAllow *a);
+int cgroup_context_add_socket_bind_item_allow_dup(CGroupContext *c, const CGroupSocketBindItem *i);
+int cgroup_context_add_socket_bind_item_deny_dup(CGroupContext *c, const CGroupSocketBindItem *i);
 
 void unit_modify_nft_set(Unit *u, bool add);
 
@@ -336,6 +448,7 @@ int unit_watch_cgroup(Unit *u);
 int unit_watch_cgroup_memory(Unit *u);
 void unit_add_to_cgroup_realize_queue(Unit *u);
 
+int unit_cgroup_is_empty(Unit *u);
 void unit_release_cgroup(Unit *u);
 /* Releases the cgroup only if it is recursively empty.
  * Returns true if the cgroup was released, false otherwise. */
@@ -353,9 +466,9 @@ void manager_shutdown_cgroup(Manager *m, bool delete);
 unsigned manager_dispatch_cgroup_realize_queue(Manager *m);
 
 Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
-Unit *manager_get_unit_by_pidref_cgroup(Manager *m, PidRef *pid);
-Unit *manager_get_unit_by_pidref_watching(Manager *m, PidRef *pid);
-Unit* manager_get_unit_by_pidref(Manager *m, PidRef *pid);
+Unit *manager_get_unit_by_pidref_cgroup(Manager *m, const PidRef *pid);
+Unit *manager_get_unit_by_pidref_watching(Manager *m, const PidRef *pid);
+Unit* manager_get_unit_by_pidref(Manager *m, const PidRef *pid);
 Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
 
 uint64_t unit_get_ancestor_memory_min(Unit *u);
@@ -374,6 +487,7 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret);
 int unit_get_cpu_usage(Unit *u, nsec_t *ret);
 int unit_get_io_accounting(Unit *u, CGroupIOAccountingMetric metric, bool allow_cache, uint64_t *ret);
 int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
+int unit_get_effective_limit(Unit *u, CGroupLimitType type, uint64_t *ret);
 
 int unit_reset_cpu_accounting(Unit *u);
 void unit_reset_memory_accounting_last(Unit *u);
@@ -413,6 +527,13 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action);
 const char* freezer_action_to_string(FreezerAction a) _const_;
 FreezerAction freezer_action_from_string(const char *s) _pure_;
 
+CGroupRuntime *cgroup_runtime_new(void);
+CGroupRuntime *cgroup_runtime_free(CGroupRuntime *crt);
+DEFINE_TRIVIAL_CLEANUP_FUNC(CGroupRuntime*, cgroup_runtime_free);
+
+int cgroup_runtime_serialize(Unit *u, FILE *f, FDSet *fds);
+int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value, FDSet *fds);
+
 const char* cgroup_pressure_watch_to_string(CGroupPressureWatch a) _const_;
 CGroupPressureWatch cgroup_pressure_watch_from_string(const char *s) _pure_;
 
@@ -425,5 +546,8 @@ CGroupIPAccountingMetric cgroup_ip_accounting_metric_from_string(const char *s)
 const char* cgroup_io_accounting_metric_to_string(CGroupIOAccountingMetric m) _const_;
 CGroupIOAccountingMetric cgroup_io_accounting_metric_from_string(const char *s) _pure_;
 
+const char* cgroup_effective_limit_type_to_string(CGroupLimitType m) _const_;
+CGroupLimitType cgroup_effective_limit_type_from_string(const char *s) _pure_;
+
 const char* cgroup_memory_accounting_metric_to_string(CGroupMemoryAccountingMetric m) _const_;
 CGroupMemoryAccountingMetric cgroup_memory_accounting_metric_from_string(const char *s) _pure_;
diff --git a/src/core/core-varlink.c b/src/core/core-varlink.c
index cd91381..3e6168d 100644
--- a/src/core/core-varlink.c
+++ b/src/core/core-varlink.c
@@ -69,6 +69,10 @@ static int build_managed_oom_json_array_element(Unit *u, const char *property, J
         if (!c)
                 return -EINVAL;
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -EINVAL;
+
         if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)))
                 /* systemd-oomd should always treat inactive units as though they didn't enable any action since they
                  * should not have a valid cgroup */
@@ -83,19 +87,24 @@ static int build_managed_oom_json_array_element(Unit *u, const char *property, J
 
         return json_build(ret_v, JSON_BUILD_OBJECT(
                                  JSON_BUILD_PAIR("mode", JSON_BUILD_STRING(mode)),
-                                 JSON_BUILD_PAIR("path", JSON_BUILD_STRING(u->cgroup_path)),
+                                 JSON_BUILD_PAIR("path", JSON_BUILD_STRING(crt->cgroup_path)),
                                  JSON_BUILD_PAIR("property", JSON_BUILD_STRING(property)),
                                  JSON_BUILD_PAIR_CONDITION(use_limit, "limit", JSON_BUILD_UNSIGNED(c->moom_mem_pressure_limit))));
 }
 
 int manager_varlink_send_managed_oom_update(Unit *u) {
         _cleanup_(json_variant_unrefp) JsonVariant *arr = NULL, *v = NULL;
+        CGroupRuntime *crt;
         CGroupContext *c;
         int r;
 
         assert(u);
 
-        if (!UNIT_VTABLE(u)->can_set_managed_oom || !u->manager || !u->cgroup_path)
+        if (!UNIT_VTABLE(u)->can_set_managed_oom || !u->manager)
+                return 0;
+
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
         if (MANAGER_IS_SYSTEM(u->manager)) {
@@ -119,10 +128,10 @@ int manager_varlink_send_managed_oom_update(Unit *u) {
         if (r < 0)
                 return r;
 
-        for (size_t i = 0; i < ELEMENTSOF(managed_oom_mode_properties); i++) {
+        FOREACH_ELEMENT(i, managed_oom_mode_properties) {
                 _cleanup_(json_variant_unrefp) JsonVariant *e = NULL;
 
-                r = build_managed_oom_json_array_element(u, managed_oom_mode_properties[i], &e);
+                r = build_managed_oom_json_array_element(u, *i, &e);
                 if (r < 0)
                         return r;
 
@@ -173,16 +182,16 @@ static int build_managed_oom_cgroups_json(Manager *m, JsonVariant **ret) {
                         if (!c)
                                 continue;
 
-                        for (size_t j = 0; j < ELEMENTSOF(managed_oom_mode_properties); j++) {
+                        FOREACH_ELEMENT(i, managed_oom_mode_properties) {
                                 _cleanup_(json_variant_unrefp) JsonVariant *e = NULL;
 
                                 /* For the initial varlink call we only care about units that enabled (i.e. mode is not
                                  * set to "auto") oomd properties. */
-                                if (!(streq(managed_oom_mode_properties[j], "ManagedOOMSwap") && c->moom_swap == MANAGED_OOM_KILL) &&
-                                    !(streq(managed_oom_mode_properties[j], "ManagedOOMMemoryPressure") && c->moom_mem_pressure == MANAGED_OOM_KILL))
+                                if (!(streq(*i, "ManagedOOMSwap") && c->moom_swap == MANAGED_OOM_KILL) &&
+                                    !(streq(*i, "ManagedOOMMemoryPressure") && c->moom_mem_pressure == MANAGED_OOM_KILL))
                                         continue;
 
-                                r = build_managed_oom_json_array_element(u, managed_oom_mode_properties[j], &e);
+                                r = build_managed_oom_json_array_element(u, *i, &e);
                                 if (r < 0)
                                         return r;
 
@@ -359,7 +368,7 @@ static int build_group_json(const char *group_name, gid_t gid, JsonVariant **ret
                                        JSON_BUILD_PAIR("gid", JSON_BUILD_UNSIGNED(gid)),
                                        JSON_BUILD_PAIR("service", JSON_BUILD_CONST_STRING("io.systemd.DynamicUser")),
                                        JSON_BUILD_PAIR("disposition", JSON_BUILD_CONST_STRING("dynamic"))))));
-    }
+}
 
 static bool group_match_lookup_parameters(LookupParameters *p, const char *name, gid_t gid) {
         assert(p);
@@ -491,6 +500,43 @@ static void vl_disconnect(VarlinkServer *s, Varlink *link, void *userdata) {
                 m->managed_oom_varlink = varlink_unref(link);
 }
 
+static int manager_setup_varlink_server(Manager *m, VarlinkServer **ret) {
+        _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
+        int r;
+
+        assert(m);
+        assert(ret);
+
+        r = varlink_server_new(&s, VARLINK_SERVER_ACCOUNT_UID|VARLINK_SERVER_INHERIT_USERDATA);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to allocate varlink server object: %m");
+
+        varlink_server_set_userdata(s, m);
+
+        r = varlink_server_add_interface_many(
+                        s,
+                        &vl_interface_io_systemd_UserDatabase,
+                        &vl_interface_io_systemd_ManagedOOM);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to add interfaces to varlink server: %m");
+
+        r = varlink_server_bind_method_many(
+                        s,
+                        "io.systemd.UserDatabase.GetUserRecord",  vl_method_get_user_record,
+                        "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
+                        "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships,
+                        "io.systemd.ManagedOOM.SubscribeManagedOOMCGroups", vl_method_subscribe_managed_oom_cgroups);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to register varlink methods: %m");
+
+        r = varlink_server_bind_disconnect(s, vl_disconnect);
+        if (r < 0)
+                return log_debug_errno(r, "Failed to register varlink disconnect handler: %m");
+
+        *ret = TAKE_PTR(s);
+        return 0;
+}
+
 static int manager_varlink_init_system(Manager *m) {
         _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
         int r;
@@ -527,7 +573,7 @@ static int manager_varlink_init_system(Manager *m) {
                 }
         }
 
-        r = varlink_server_attach_event(s, m->event, SD_EVENT_PRIORITY_NORMAL);
+        r = varlink_server_attach_event(s, m->event, EVENT_PRIORITY_IPC);
         if (r < 0)
                 return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
 
@@ -585,7 +631,7 @@ static int manager_varlink_init_user(Manager *m) {
         if (r < 0)
                 return r;
 
-        r = varlink_attach_event(link, m->event, SD_EVENT_PRIORITY_NORMAL);
+        r = varlink_attach_event(link, m->event, EVENT_PRIORITY_IPC);
         if (r < 0)
                 return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
 
@@ -597,43 +643,6 @@ static int manager_varlink_init_user(Manager *m) {
         return 1;
 }
 
-int manager_setup_varlink_server(Manager *m, VarlinkServer **ret) {
-        _cleanup_(varlink_server_unrefp) VarlinkServer *s = NULL;
-        int r;
-
-        assert(m);
-        assert(ret);
-
-        r = varlink_server_new(&s, VARLINK_SERVER_ACCOUNT_UID|VARLINK_SERVER_INHERIT_USERDATA);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to allocate varlink server object: %m");
-
-        varlink_server_set_userdata(s, m);
-
-        r = varlink_server_add_interface_many(
-                        s,
-                        &vl_interface_io_systemd_UserDatabase,
-                        &vl_interface_io_systemd_ManagedOOM);
-        if (r < 0)
-                return log_error_errno(r, "Failed to add interfaces to varlink server: %m");
-
-        r = varlink_server_bind_method_many(
-                        s,
-                        "io.systemd.UserDatabase.GetUserRecord",  vl_method_get_user_record,
-                        "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
-                        "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships,
-                        "io.systemd.ManagedOOM.SubscribeManagedOOMCGroups",  vl_method_subscribe_managed_oom_cgroups);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to register varlink methods: %m");
-
-        r = varlink_server_bind_disconnect(s, vl_disconnect);
-        if (r < 0)
-                return log_debug_errno(r, "Failed to register varlink disconnect handler: %m");
-
-        *ret = TAKE_PTR(s);
-        return 0;
-}
-
 int manager_varlink_init(Manager *m) {
         return MANAGER_IS_SYSTEM(m) ? manager_varlink_init_system(m) : manager_varlink_init_user(m);
 }
diff --git a/src/core/core-varlink.h b/src/core/core-varlink.h
index 7f810d1..20507a4 100644
--- a/src/core/core-varlink.h
+++ b/src/core/core-varlink.h
@@ -6,10 +6,6 @@
 int manager_varlink_init(Manager *m);
 void manager_varlink_done(Manager *m);
 
-/* Creates a new VarlinkServer and binds methods. Does not set up sockets or attach events.
- * Used for manager serialize/deserialize. */
-int manager_setup_varlink_server(Manager *m, VarlinkServer **ret_s);
-
 /* The manager is expected to send an update to systemd-oomd if one of the following occurs:
  * - The value of ManagedOOM*= properties change
  * - A unit with ManagedOOM*= properties changes unit active state */
diff --git a/src/core/crash-handler.c b/src/core/crash-handler.c
index f5c31b6..4a3fc01 100644
--- a/src/core/crash-handler.c
+++ b/src/core/crash-handler.c
@@ -27,7 +27,13 @@ _noreturn_ void freeze_or_exit_or_reboot(void) {
                 _exit(EXIT_EXCEPTION);
         }
 
-        if (arg_crash_reboot) {
+        if (arg_crash_action == CRASH_POWEROFF) {
+                log_notice("Shutting down...");
+                (void) reboot(RB_POWER_OFF);
+                log_struct_errno(LOG_EMERG, errno,
+                                 LOG_MESSAGE("Failed to power off: %m"),
+                                 "MESSAGE_ID=" SD_MESSAGE_CRASH_FAILED_STR);
+        } else if (arg_crash_action == CRASH_REBOOT) {
                 log_notice("Rebooting in 10s...");
                 (void) sleep(10);
 
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 8a9570f..49e84b4 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -487,6 +487,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("StartupMemorySwapMax", "t", NULL, offsetof(CGroupContext, startup_memory_swap_max), 0),
         SD_BUS_PROPERTY("MemoryZSwapMax", "t", NULL, offsetof(CGroupContext, memory_zswap_max), 0),
         SD_BUS_PROPERTY("StartupMemoryZSwapMax", "t", NULL, offsetof(CGroupContext, startup_memory_zswap_max), 0),
+        SD_BUS_PROPERTY("MemoryZSwapWriteback", "b", bus_property_get_bool, offsetof(CGroupContext, memory_zswap_writeback), 0),
         SD_BUS_PROPERTY("MemoryLimit", "t", NULL, offsetof(CGroupContext, memory_limit), 0),
         SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
         SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
@@ -1279,6 +1280,9 @@ int bus_cgroup_set_property(
         if (streq(name, "MemoryLimitScale"))
                 return bus_cgroup_set_memory_scale(u, name, &c->memory_limit, message, flags, error);
 
+        if (streq(name, "MemoryZSwapWriteback"))
+                return bus_cgroup_set_boolean(u, name, &c->memory_zswap_writeback, CGROUP_MASK_MEMORY, message, flags, error);
+
         if (streq(name, "TasksAccounting"))
                 return bus_cgroup_set_boolean(u, name, &c->tasks_accounting, CGROUP_MASK_PIDS, message, flags, error);
 
@@ -1300,17 +1304,18 @@ int bus_cgroup_set_property(
 
                 if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
                         c->cpu_quota_per_sec_usec = u64;
-                        u->warned_clamping_cpu_quota_period = false;
+                        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                        if (crt)
+                                crt->warned_clamping_cpu_quota_period = false;
                         unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
 
                         if (c->cpu_quota_per_sec_usec == USEC_INFINITY)
                                 unit_write_setting(u, flags, "CPUQuota", "CPUQuota=");
                         else
-                                /* config_parse_cpu_quota() requires an integer, so truncating division is used on
-                                 * purpose here. */
                                 unit_write_settingf(u, flags, "CPUQuota",
-                                                    "CPUQuota=%0.f%%",
-                                                    (double) (c->cpu_quota_per_sec_usec / 10000));
+                                                    "CPUQuota=" USEC_FMT ".%02" PRI_USEC "%%",
+                                                    c->cpu_quota_per_sec_usec / 10000,
+                                                    (c->cpu_quota_per_sec_usec % 10000) / 100);
                 }
 
                 return 1;
@@ -1324,7 +1329,9 @@ int bus_cgroup_set_property(
 
                 if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
                         c->cpu_quota_period_usec = u64;
-                        u->warned_clamping_cpu_quota_period = false;
+                        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                        if (crt)
+                                crt->warned_clamping_cpu_quota_period = false;
                         unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
                         if (c->cpu_quota_period_usec == USEC_INFINITY)
                                 unit_write_setting(u, flags, "CPUQuotaPeriodSec", "CPUQuotaPeriodSec=");
@@ -2188,7 +2195,7 @@ int bus_cgroup_set_property(
                                 c->restrict_network_interfaces_is_allow_list = is_allow_list;
 
                         STRV_FOREACH(s, l) {
-                                if (!ifname_valid(*s)) {
+                                if (!ifname_valid_full(*s, IFNAME_VALID_ALTERNATIVE)) {
                                         log_full(LOG_WARNING, "Invalid interface name, ignoring: %s", *s);
                                         continue;
                                 }
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index 2d05ba7..21c260b 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -67,6 +67,7 @@ static BUS_DEFINE_PROPERTY_GET(property_get_cpu_sched_policy, "i", ExecContext,
 static BUS_DEFINE_PROPERTY_GET(property_get_cpu_sched_priority, "i", ExecContext, exec_context_get_cpu_sched_priority);
 static BUS_DEFINE_PROPERTY_GET(property_get_coredump_filter, "t", ExecContext, exec_context_get_coredump_filter);
 static BUS_DEFINE_PROPERTY_GET(property_get_timer_slack_nsec, "t", ExecContext, exec_context_get_timer_slack_nsec);
+static BUS_DEFINE_PROPERTY_GET(property_get_set_login_environment, "b", ExecContext, exec_context_get_set_login_environment);
 
 static int property_get_environment_files(
                 sd_bus *bus,
@@ -1038,7 +1039,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("User", "s", NULL, offsetof(ExecContext, user), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Group", "s", NULL, offsetof(ExecContext, group), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("DynamicUser", "b", bus_property_get_bool, offsetof(ExecContext, dynamic_user), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("SetLoginEnvironment", "b", bus_property_get_tristate, offsetof(ExecContext, set_login_environment), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("SetLoginEnvironment", "b", property_get_set_login_environment, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RemoveIPC", "b", bus_property_get_bool, offsetof(ExecContext, remove_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SetCredential", "a(say)", property_get_set_credential, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SetCredentialEncrypted", "a(say)", property_get_set_credential, 0, SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1305,18 +1306,24 @@ int bus_set_transient_exec_command(
                 sd_bus_message *message,
                 UnitWriteFlags flags,
                 sd_bus_error *error) {
-        bool is_ex_prop = endswith(name, "Ex");
-        unsigned n = 0;
+
+        const char *ex_prop = endswith(ASSERT_PTR(name), "Ex");
+        size_t n = 0;
         int r;
 
+        assert(u);
+        assert(exec_command);
+        assert(message);
+        assert(error);
+
         /* Drop Ex from the written setting. E.g. ExecStart=, not ExecStartEx=. */
-        const char *written_name = is_ex_prop ? strndupa(name, strlen(name) - 2) : name;
+        const char *written_name = ex_prop ? strndupa_safe(name, ex_prop - name) : name;
 
-        r = sd_bus_message_enter_container(message, 'a', is_ex_prop ? "(sasas)" : "(sasb)");
+        r = sd_bus_message_enter_container(message, 'a', ex_prop ? "(sasas)" : "(sasb)");
         if (r < 0)
                 return r;
 
-        while ((r = sd_bus_message_enter_container(message, 'r', is_ex_prop ? "sasas" : "sasb")) > 0) {
+        while ((r = sd_bus_message_enter_container(message, 'r', ex_prop ? "sasas" : "sasb")) > 0) {
                 _cleanup_strv_free_ char **argv = NULL, **ex_opts = NULL;
                 const char *path;
                 int b;
@@ -1338,7 +1345,7 @@ int bus_set_transient_exec_command(
                         return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
                                                  "\"%s\" argv cannot be empty", name);
 
-                r = is_ex_prop ? sd_bus_message_read_strv(message, &ex_opts) : sd_bus_message_read(message, "b", &b);
+                r = ex_prop ? sd_bus_message_read_strv(message, &ex_opts) : sd_bus_message_read(message, "b", &b);
                 if (r < 0)
                         return r;
 
@@ -1347,29 +1354,28 @@ int bus_set_transient_exec_command(
                         return r;
 
                 if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
-                        ExecCommand *c;
+                        _cleanup_(exec_command_freep) ExecCommand *c = NULL;
 
-                        c = new0(ExecCommand, 1);
+                        c = new(ExecCommand, 1);
                         if (!c)
                                 return -ENOMEM;
 
-                        c->path = strdup(path);
-                        if (!c->path) {
-                                free(c);
-                                return -ENOMEM;
-                        }
+                        *c = (ExecCommand) {
+                                .argv = TAKE_PTR(argv),
+                        };
 
-                        c->argv = TAKE_PTR(argv);
+                        r = path_simplify_alloc(path, &c->path);
+                        if (r < 0)
+                                return r;
 
-                        if (is_ex_prop) {
+                        if (ex_prop) {
                                 r = exec_command_flags_from_strv(ex_opts, &c->flags);
                                 if (r < 0)
                                         return r;
-                        } else
-                                c->flags = b ? EXEC_COMMAND_IGNORE_FAILURE : 0;
+                        } else if (b)
+                                c->flags |= EXEC_COMMAND_IGNORE_FAILURE;
 
-                        path_simplify(c->path);
-                        exec_command_append_list(exec_command, c);
+                        exec_command_append_list(exec_command, TAKE_PTR(c));
                 }
 
                 n++;
@@ -1738,6 +1744,9 @@ int bus_exec_context_set_transient_property(
         if (streq(name, "PrivateMounts"))
                 return bus_set_transient_tristate(u, name, &c->private_mounts, message, flags, error);
 
+        if (streq(name, "MountAPIVFS"))
+                return bus_set_transient_tristate(u, name, &c->mount_apivfs, message, flags, error);
+
         if (streq(name, "PrivateNetwork"))
                 return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
 
@@ -1897,7 +1906,7 @@ int bus_exec_context_set_transient_property(
                                 c->restrict_filesystems_allow_list = allow_list;
 
                         STRV_FOREACH(s, l) {
-                                r = lsm_bpf_parse_filesystem(
+                                r = bpf_restrict_fs_parse_filesystem(
                                               *s,
                                               &c->restrict_filesystems,
                                               FILESYSTEM_PARSE_LOG|
@@ -1948,7 +1957,7 @@ int bus_exec_context_set_transient_property(
 
                                 r = strv_extend_strv(&c->supplementary_groups, l, true);
                                 if (r < 0)
-                                        return -ENOMEM;
+                                        return r;
 
                                 joined = strv_join(c->supplementary_groups, " ");
                                 if (!joined)
@@ -2705,51 +2714,51 @@ int bus_exec_context_set_transient_property(
 
                 return 1;
 
-        } else if (streq(name, "MountAPIVFS")) {
-                bool b;
-
-                r = bus_set_transient_bool(u, name, &b, message, flags, error);
-                if (r < 0)
-                        return r;
-
-                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
-                        c->mount_apivfs = b;
-                        c->mount_apivfs_set = true;
-                }
-
-                return 1;
-
         } else if (streq(name, "WorkingDirectory")) {
+                _cleanup_free_ char *simplified = NULL;
+                bool missing_ok = false, is_home = false;
                 const char *s;
-                bool missing_ok;
 
                 r = sd_bus_message_read(message, "s", &s);
                 if (r < 0)
                         return r;
 
-                if (s[0] == '-') {
-                        missing_ok = true;
-                        s++;
-                } else
-                        missing_ok = false;
+                if (!isempty(s)) {
+                        if (s[0] == '-') {
+                                missing_ok = true;
+                                s++;
+                        }
 
-                if (!isempty(s) && !streq(s, "~") && !path_is_absolute(s))
-                        return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "WorkingDirectory= expects an absolute path or '~'");
+                        if (streq(s, "~"))
+                                is_home = true;
+                        else {
+                                if (!path_is_absolute(s))
+                                        return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS,
+                                                                "WorkingDirectory= expects an absolute path or '~'");
 
-                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
-                        if (streq(s, "~")) {
-                                c->working_directory = mfree(c->working_directory);
-                                c->working_directory_home = true;
-                        } else {
-                                r = free_and_strdup(&c->working_directory, empty_to_null(s));
+                                r = path_simplify_alloc(s, &simplified);
                                 if (r < 0)
                                         return r;
 
-                                c->working_directory_home = false;
+                                if (!path_is_normalized(simplified))
+                                        return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS,
+                                                                "WorkingDirectory= expects a normalized path or '~'");
+
+                                if (path_below_api_vfs(simplified))
+                                        return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS,
+                                                                "WorkingDirectory= may not be below /proc/, /sys/ or /dev/");
                         }
+                }
 
+                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                        free_and_replace(c->working_directory, simplified);
+                        c->working_directory_home = is_home;
                         c->working_directory_missing_ok = missing_ok;
-                        unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "WorkingDirectory=%s%s", missing_ok ? "-" : "", s);
+
+                        unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name,
+                                            "WorkingDirectory=%s%s",
+                                            c->working_directory_missing_ok ? "-" : "",
+                                            c->working_directory_home ? "~" : strempty(c->working_directory));
                 }
 
                 return 1;
@@ -3173,7 +3182,7 @@ int bus_exec_context_set_transient_property(
 
                                 r = strv_extend_strv(dirs, l, true);
                                 if (r < 0)
-                                        return -ENOMEM;
+                                        return r;
 
                                 unit_write_settingf(u, flags, name, "%s=%s", name, joined);
                         }
@@ -3200,7 +3209,7 @@ int bus_exec_context_set_transient_property(
                                 _cleanup_free_ char *joined = NULL;
                                 r = strv_extend_strv(&c->exec_search_path, l, true);
                                 if (r < 0)
-                                        return -ENOMEM;
+                                        return r;
                                 joined = strv_join(c->exec_search_path, ":");
                                 if (!joined)
                                         return log_oom();
diff --git a/src/core/dbus-execute.h b/src/core/dbus-execute.h
index 5926bdb..4b7cb86 100644
--- a/src/core/dbus-execute.h
+++ b/src/core/dbus-execute.h
@@ -9,6 +9,7 @@
 #define BUS_EXEC_STATUS_VTABLE(prefix, offset, flags)                   \
         BUS_PROPERTY_DUAL_TIMESTAMP(prefix "StartTimestamp", (offset) + offsetof(ExecStatus, start_timestamp), flags), \
         BUS_PROPERTY_DUAL_TIMESTAMP(prefix "ExitTimestamp", (offset) + offsetof(ExecStatus, exit_timestamp), flags), \
+        BUS_PROPERTY_DUAL_TIMESTAMP(prefix "HandoffTimestamp", (offset) + offsetof(ExecStatus, handoff_timestamp), flags), \
         SD_BUS_PROPERTY(prefix "PID", "u", bus_property_get_pid, (offset) + offsetof(ExecStatus, pid), flags), \
         SD_BUS_PROPERTY(prefix "Code", "i", bus_property_get_int, (offset) + offsetof(ExecStatus, code), flags), \
         SD_BUS_PROPERTY(prefix "Status", "i", bus_property_get_int, (offset) + offsetof(ExecStatus, status), flags)
diff --git a/src/core/dbus-job.c b/src/core/dbus-job.c
index c88d8c2..693efbb 100644
--- a/src/core/dbus-job.c
+++ b/src/core/dbus-job.c
@@ -54,7 +54,7 @@ int bus_job_method_cancel(sd_bus_message *message, void *userdata, sd_bus_error
         if (!sd_bus_track_contains(j->bus_track, sd_bus_message_get_sender(message))) {
 
                 /* And for everybody else consult polkit */
-                r = bus_verify_manage_units_async(j->unit->manager, message, error);
+                r = bus_verify_manage_units_async(j->manager, message, error);
                 if (r < 0)
                         return r;
                 if (r == 0)
@@ -87,22 +87,23 @@ int bus_job_method_get_waiting_jobs(sd_bus_message *message, void *userdata, sd_
         if (r < 0)
                 return r;
 
-        for (int i = 0; i < n; i ++) {
+        FOREACH_ARRAY(i, list, n) {
                 _cleanup_free_ char *unit_path = NULL, *job_path = NULL;
+                Job *job = *i;
 
-                job_path = job_dbus_path(list[i]);
+                job_path = job_dbus_path(job);
                 if (!job_path)
                         return -ENOMEM;
 
-                unit_path = unit_dbus_path(list[i]->unit);
+                unit_path = unit_dbus_path(job->unit);
                 if (!unit_path)
                         return -ENOMEM;
 
                 r = sd_bus_message_append(reply, "(usssoo)",
-                                          list[i]->id,
-                                          list[i]->unit->id,
-                                          job_type_to_string(list[i]->type),
-                                          job_state_to_string(list[i]->state),
+                                          job->id,
+                                          job->unit->id,
+                                          job_type_to_string(job->type),
+                                          job_state_to_string(job->state),
                                           job_path,
                                           unit_path);
                 if (r < 0)
@@ -262,7 +263,7 @@ void bus_job_send_pending_change_signal(Job *j, bool including_new) {
         if (!j->sent_dbus_new_signal && !including_new)
                 return;
 
-        if (MANAGER_IS_RELOADING(j->unit->manager))
+        if (MANAGER_IS_RELOADING(j->manager))
                 return;
 
         bus_job_send_change_signal(j);
@@ -331,12 +332,12 @@ static int bus_job_allocate_bus_track(Job *j) {
         if (j->bus_track)
                 return 0;
 
-        return sd_bus_track_new(j->unit->manager->api_bus, &j->bus_track, bus_job_track_handler, j);
+        return sd_bus_track_new(j->manager->api_bus, &j->bus_track, bus_job_track_handler, j);
 }
 
 int bus_job_coldplug_bus_track(Job *j) {
-        int r;
         _cleanup_strv_free_ char **deserialized_clients = NULL;
+        int r;
 
         assert(j);
 
@@ -361,7 +362,7 @@ int bus_job_track_sender(Job *j, sd_bus_message *m) {
         assert(j);
         assert(m);
 
-        if (sd_bus_message_get_bus(m) != j->unit->manager->api_bus) {
+        if (sd_bus_message_get_bus(m) != j->manager->api_bus) {
                 j->ref_by_private_bus = true;
                 return 0;
         }
diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c
index 745f5cc..2515f54 100644
--- a/src/core/dbus-manager.c
+++ b/src/core/dbus-manager.c
@@ -11,6 +11,7 @@
 #include "bus-common-errors.h"
 #include "bus-get-properties.h"
 #include "bus-log-control-api.h"
+#include "bus-util.h"
 #include "chase.h"
 #include "confidential-virt.h"
 #include "data-fd-util.h"
@@ -39,6 +40,7 @@
 #include "string-util.h"
 #include "strv.h"
 #include "syslog-util.h"
+#include "taint.h"
 #include "user-util.h"
 #include "version.h"
 #include "virt.h"
@@ -125,13 +127,10 @@ static int property_get_tainted(
                 void *userdata,
                 sd_bus_error *error) {
 
-        _cleanup_free_ char *s = NULL;
-        Manager *m = ASSERT_PTR(userdata);
-
         assert(bus);
         assert(reply);
 
-        s = manager_taint_string(m);
+        _cleanup_free_ char *s = taint_string();
         if (!s)
                 return log_oom();
 
@@ -464,18 +463,13 @@ static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char
          * its sleeve: if the name is specified empty we use the client's unit. */
 
         if (isempty(name)) {
-                _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
-                pid_t pid;
-
-                r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
-                if (r < 0)
-                        return r;
+                _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
 
-                r = sd_bus_creds_get_pid(creds, &pid);
+                r = bus_query_sender_pidref(message, &pidref);
                 if (r < 0)
                         return r;
 
-                u = manager_get_unit_by_pid(m, pid);
+                u = manager_get_unit_by_pidref(m, &pidref);
                 if (!u)
                         return sd_bus_error_set(error, BUS_ERROR_NO_SUCH_UNIT, "Client not member of any unit.");
         } else {
@@ -542,7 +536,7 @@ static int method_get_unit(sd_bus_message *message, void *userdata, sd_bus_error
 
 static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bus_error *error) {
         Manager *m = ASSERT_PTR(userdata);
-        pid_t pid;
+        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
         Unit *u;
         int r;
 
@@ -552,27 +546,20 @@ static int method_get_unit_by_pid(sd_bus_message *message, void *userdata, sd_bu
 
         /* Anyone can call this method */
 
-        r = sd_bus_message_read(message, "u", &pid);
+        r = sd_bus_message_read(message, "u", &pidref.pid);
         if (r < 0)
                 return r;
-        if (pid < 0)
-                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid PID " PID_FMT, pid);
-
-        if (pid == 0) {
-                _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
-
-                r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
-                if (r < 0)
-                        return r;
-
-                r = sd_bus_creds_get_pid(creds, &pid);
+        if (pidref.pid < 0)
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid PID " PID_FMT, pidref.pid);
+        if (pidref.pid == 0) {
+                r = bus_query_sender_pidref(message, &pidref);
                 if (r < 0)
                         return r;
         }
 
-        u = manager_get_unit_by_pid(m, pid);
+        u = manager_get_unit_by_pidref(m, &pidref);
         if (!u)
-                return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_PID, "PID "PID_FMT" does not belong to any loaded unit.", pid);
+                return sd_bus_error_setf(error, BUS_ERROR_NO_UNIT_FOR_PID, "PID "PID_FMT" does not belong to any loaded unit.", pidref.pid);
 
         return reply_unit_path(u, message, error);
 }
@@ -581,41 +568,27 @@ static int method_get_unit_by_invocation_id(sd_bus_message *message, void *userd
         _cleanup_free_ char *path = NULL;
         Manager *m = ASSERT_PTR(userdata);
         sd_id128_t id;
-        const void *a;
         Unit *u;
-        size_t sz;
         int r;
 
         assert(message);
 
         /* Anyone can call this method */
 
-        r = sd_bus_message_read_array(message, 'y', &a, &sz);
-        if (r < 0)
-                return r;
-        if (sz == 0)
-                id = SD_ID128_NULL;
-        else if (sz == 16)
-                memcpy(&id, a, sz);
-        else
+        if (bus_message_read_id128(message, &id) < 0)
                 return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid invocation ID");
 
         if (sd_id128_is_null(id)) {
-                _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
-                pid_t pid;
-
-                r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
-                if (r < 0)
-                        return r;
+                _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
 
-                r = sd_bus_creds_get_pid(creds, &pid);
+                r = bus_query_sender_pidref(message, &pidref);
                 if (r < 0)
                         return r;
 
-                u = manager_get_unit_by_pid(m, pid);
+                u = manager_get_unit_by_pidref(m, &pidref);
                 if (!u)
                         return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT,
-                                                 "Client " PID_FMT " not member of any unit.", pid);
+                                                 "Client " PID_FMT " not member of any unit.", pidref.pid);
         } else {
                 u = hashmap_get(m->units_by_invocation_id, &id);
                 if (!u)
@@ -797,6 +770,7 @@ static int method_generic_unit_operation(
 
         assert(message);
         assert(m);
+        assert(handler);
 
         /* Read the first argument from the command and pass the operation to the specified per-unit
          * method. */
@@ -860,11 +834,13 @@ static int method_clean_unit(sd_bus_message *message, void *userdata, sd_bus_err
 }
 
 static int method_freeze_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
-        return method_generic_unit_operation(message, userdata, error, bus_unit_method_freeze, 0);
+        /* Only active units can be frozen, which must be properly loaded already */
+        return method_generic_unit_operation(message, userdata, error, bus_unit_method_freeze, GENERIC_UNIT_VALIDATE_LOADED);
 }
 
 static int method_thaw_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
-        return method_generic_unit_operation(message, userdata, error, bus_unit_method_thaw, 0);
+        /* Same as freeze above */
+        return method_generic_unit_operation(message, userdata, error, bus_unit_method_thaw, GENERIC_UNIT_VALIDATE_LOADED);
 }
 
 static int method_reset_failed_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
@@ -972,9 +948,10 @@ static int method_list_units_by_names(sd_bus_message *message, void *userdata, s
 }
 
 static int method_get_unit_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
-        /* Don't load a unit (since it won't have any processes if it's not loaded), but don't insist on the
-         * unit being loaded (because even improperly loaded units might still have processes around */
-        return method_generic_unit_operation(message, userdata, error, bus_unit_method_get_processes, 0);
+        /* Don't load a unit actively (since it won't have any processes if it's not loaded), but don't
+         * insist on the unit being loaded either (because even improperly loaded units might still have
+         * processes around). */
+        return method_generic_unit_operation(message, userdata, error, bus_unit_method_get_processes, /* flags = */ 0);
 }
 
 static int method_attach_processes_to_unit(sd_bus_message *message, void *userdata, sd_bus_error *error) {
@@ -1430,11 +1407,11 @@ static int dump_impl(
                  * operations, and can cause PID1 to stall. So it seems similar enough in terms of security
                  * considerations and impact, and thus use the same access check for dumps which, given the
                  * large amount of data to fetch, can stall PID1 for quite some time. */
-                r = mac_selinux_access_check(message, "reload", error);
+                r = mac_selinux_access_check(message, "reload", /* error = */ NULL);
                 if (r < 0)
                         goto ratelimited;
 
-                r = bus_verify_bypass_dump_ratelimit_async(m, message, error);
+                r = bus_verify_bypass_dump_ratelimit_async(m, message, /* error = */ NULL);
                 if (r < 0)
                         goto ratelimited;
                 if (r == 0)
@@ -1469,7 +1446,7 @@ static int method_dump(sd_bus_message *message, void *userdata, sd_bus_error *er
 static int reply_dump_by_fd(sd_bus_message *message, char *dump) {
         _cleanup_close_ int fd = -EBADF;
 
-        fd = acquire_data_fd(dump, strlen(dump), 0);
+        fd = acquire_data_fd(dump);
         if (fd < 0)
                 return fd;
 
@@ -1621,10 +1598,10 @@ static int method_reload(sd_bus_message *message, void *userdata, sd_bus_error *
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
         /* Write a log message noting the unit or process who requested the Reload() */
-        log_caller(message, m, "Reloading");
+        log_caller(message, m, "Reload");
 
         /* Check the rate limit after the authorization succeeds, to avoid denial-of-service issues. */
-        if (!ratelimit_below(&m->reload_ratelimit)) {
+        if (!ratelimit_below(&m->reload_reexec_ratelimit)) {
                 log_warning("Reloading request rejected due to rate limit.");
                 return sd_bus_error_setf(error,
                                          SD_BUS_ERROR_LIMITS_EXCEEDED,
@@ -1667,7 +1644,15 @@ static int method_reexecute(sd_bus_message *message, void *userdata, sd_bus_erro
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
         /* Write a log message noting the unit or process who requested the Reexecute() */
-        log_caller(message, m, "Reexecuting");
+        log_caller(message, m, "Reexecution");
+
+        /* Check the rate limit after the authorization succeeds, to avoid denial-of-service issues. */
+        if (!ratelimit_below(&m->reload_reexec_ratelimit)) {
+                log_warning("Reexecution request rejected due to rate limit.");
+                return sd_bus_error_setf(error,
+                                         SD_BUS_ERROR_LIMITS_EXCEEDED,
+                                         "Reexecute() request rejected due to rate limit.");
+        }
 
         /* We don't send a reply back here, the client should
          * just wait for us disconnecting. */
@@ -2329,85 +2314,53 @@ static int send_unit_files_changed(sd_bus *bus, void *userdata) {
         return sd_bus_send(bus, message, NULL);
 }
 
-/* Create an error reply, using the error information from changes[]
- * if possible, and fall back to generating an error from error code c.
- * The error message only describes the first error.
- */
+static void manager_unit_files_changed(Manager *m, const InstallChange *changes, size_t n_changes) {
+        int r;
+
+        assert(m);
+        assert(changes || n_changes == 0);
+
+        if (!install_changes_have_modification(changes, n_changes))
+                return;
+
+        /* See comments for this variable in manager.h */
+        m->unit_file_state_outdated = true;
+
+        r = bus_foreach_bus(m, NULL, send_unit_files_changed, NULL);
+        if (r < 0)
+                log_debug_errno(r, "Failed to send UnitFilesChanged signal, ignoring: %m");
+}
+
 static int install_error(
                 sd_bus_error *error,
                 int c,
                 InstallChange *changes,
                 size_t n_changes) {
 
-        CLEANUP_ARRAY(changes, n_changes, install_changes_free);
+        int r;
 
-        for (size_t i = 0; i < n_changes; i++)
+        /* Create an error reply, using the error information from changes[] if possible, and fall back to
+         * generating an error from error code c. The error message only describes the first error. */
 
-                /* When making changes here, make sure to also change install_changes_dump() in install.c. */
+        assert(changes || n_changes == 0);
 
-                switch (changes[i].type) {
-                case 0 ... _INSTALL_CHANGE_TYPE_MAX: /* not errors */
-                        break;
+        CLEANUP_ARRAY(changes, n_changes, install_changes_free);
 
-                case -EEXIST:
-                        if (changes[i].source)
-                                return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS,
-                                                         "File %s already exists and is a symlink to %s.",
-                                                         changes[i].path, changes[i].source);
-                        return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS,
-                                                 "File %s already exists.",
-                                                 changes[i].path);
-
-                case -ERFKILL:
-                        return sd_bus_error_setf(error, BUS_ERROR_UNIT_MASKED,
-                                                 "Unit file %s is masked.", changes[i].path);
-
-                case -EADDRNOTAVAIL:
-                        return sd_bus_error_setf(error, BUS_ERROR_UNIT_GENERATED,
-                                                 "Unit %s is transient or generated.", changes[i].path);
-
-                case -ETXTBSY:
-                        return sd_bus_error_setf(error, BUS_ERROR_UNIT_BAD_PATH,
-                                                 "File %s is under the systemd unit hierarchy already.", changes[i].path);
-
-                case -EBADSLT:
-                        return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING,
-                                                 "Invalid specifier in %s.", changes[i].path);
-
-                case -EIDRM:
-                        return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING,
-                                                 "Destination unit %s is a non-template unit.", changes[i].path);
-
-                case -EUCLEAN:
-                        return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING,
-                                                 "\"%s\" is not a valid unit name.",
-                                                 changes[i].path);
-
-                case -ELOOP:
-                        return sd_bus_error_setf(error, BUS_ERROR_UNIT_LINKED,
-                                                 "Refusing to operate on alias name or linked unit file: %s",
-                                                 changes[i].path);
-
-                case -EXDEV:
-                        if (changes[i].source)
-                                return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING,
-                                                         "Cannot alias %s as %s.",
-                                                         changes[i].source, changes[i].path);
-                        return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING,
-                                                 "Invalid unit reference %s.", changes[i].path);
-
-                case -ENOENT:
-                        return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_UNIT,
-                                                 "Unit file %s does not exist.", changes[i].path);
+        FOREACH_ARRAY(i, changes, n_changes) {
+                _cleanup_free_ char *err_message = NULL;
+                const char *bus_error;
 
-                case -EUNATCH:
-                        return sd_bus_error_setf(error, BUS_ERROR_BAD_UNIT_SETTING,
-                                                 "Cannot resolve specifiers in %s.", changes[i].path);
+                if (i->type >= 0)
+                        continue;
 
-                default:
-                        assert(changes[i].type < 0); /* other errors */
-                        return sd_bus_error_set_errnof(error, changes[i].type, "File %s: %m", changes[i].path);
-                }
+                r = install_change_dump_error(i, &err_message, &bus_error);
+                if (r == -ENOMEM)
+                        return r;
+                if (r < 0)
+                        return sd_bus_error_set_errnof(error, r, "File %s: %m", i->path);
+
+                return sd_bus_error_set(error, bus_error, err_message);
+        }
 
         return c < 0 ? c : -EINVAL;
 }
@@ -2426,12 +2379,6 @@ static int reply_install_changes_and_free(
 
         CLEANUP_ARRAY(changes, n_changes, install_changes_free);
 
-        if (install_changes_have_modification(changes, n_changes)) {
-                r = bus_foreach_bus(m, NULL, send_unit_files_changed, NULL);
-                if (r < 0)
-                        log_debug_errno(r, "Failed to send UnitFilesChanged signal: %m");
-        }
-
         r = sd_bus_message_new_method_return(message, &reply);
         if (r < 0)
                 return r;
@@ -2446,18 +2393,17 @@ static int reply_install_changes_and_free(
         if (r < 0)
                 return r;
 
-        for (size_t i = 0; i < n_changes; i++) {
-
-                if (changes[i].type < 0) {
+        FOREACH_ARRAY(i, changes, n_changes) {
+                if (i->type < 0) {
                         bad = true;
                         continue;
                 }
 
                 r = sd_bus_message_append(
                                 reply, "(sss)",
-                                install_change_type_to_string(changes[i].type),
-                                changes[i].path,
-                                changes[i].source);
+                                install_change_type_to_string(i->type),
+                                i->path,
+                                i->source);
                 if (r < 0)
                         return r;
 
@@ -2521,7 +2467,7 @@ static int method_enable_unit_files_generic(
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
         r = call(m->runtime_scope, flags, NULL, l, &changes, &n_changes);
-        m->unit_file_state_outdated = m->unit_file_state_outdated || n_changes > 0; /* See comments for this variable in manager.h */
+        manager_unit_files_changed(m, changes, n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2594,7 +2540,7 @@ static int method_preset_unit_files_with_mode(sd_bus_message *message, void *use
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
         r = unit_file_preset(m->runtime_scope, flags, NULL, l, preset_mode, &changes, &n_changes);
-        m->unit_file_state_outdated = m->unit_file_state_outdated || n_changes > 0; /* See comments for this variable in manager.h */
+        manager_unit_files_changed(m, changes, n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2648,7 +2594,7 @@ static int method_disable_unit_files_generic(
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
         r = call(m->runtime_scope, flags, NULL, l, &changes, &n_changes);
-        m->unit_file_state_outdated = m->unit_file_state_outdated || n_changes > 0; /* See comments for this variable in manager.h */
+        manager_unit_files_changed(m, changes, n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2691,7 +2637,7 @@ static int method_revert_unit_files(sd_bus_message *message, void *userdata, sd_
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
         r = unit_file_revert(m->runtime_scope, NULL, l, &changes, &n_changes);
-        m->unit_file_state_outdated = m->unit_file_state_outdated || n_changes > 0; /* See comments for this variable in manager.h */
+        manager_unit_files_changed(m, changes, n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2722,6 +2668,7 @@ static int method_set_default_target(sd_bus_message *message, void *userdata, sd
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
         r = unit_file_set_default(m->runtime_scope, force ? UNIT_FILE_FORCE : 0, NULL, name, &changes, &n_changes);
+        manager_unit_files_changed(m, changes, n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2764,7 +2711,7 @@ static int method_preset_all_unit_files(sd_bus_message *message, void *userdata,
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
         r = unit_file_preset_all(m->runtime_scope, flags, NULL, preset_mode, &changes, &n_changes);
-        m->unit_file_state_outdated = m->unit_file_state_outdated || n_changes > 0; /* See comments for this variable in manager.h */
+        manager_unit_files_changed(m, changes, n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2804,7 +2751,7 @@ static int method_add_dependency_unit_files(sd_bus_message *message, void *userd
                 return -EINVAL;
 
         r = unit_file_add_dependency(m->runtime_scope, flags, NULL, l, target, dep, &changes, &n_changes);
-        m->unit_file_state_outdated = m->unit_file_state_outdated || n_changes > 0; /* See comments for this variable in manager.h */
+        manager_unit_files_changed(m, changes, n_changes);
         if (r < 0)
                 return install_error(error, r, changes, n_changes);
 
@@ -2933,6 +2880,175 @@ static int method_dump_unit_descriptor_store(sd_bus_message *message, void *user
         return method_generic_unit_operation(message, userdata, error, bus_service_method_dump_file_descriptor_store, 0);
 }
 
+static int aux_scope_from_message(Manager *m, sd_bus_message *message, Unit **ret_scope, sd_bus_error *error) {
+        _cleanup_(pidref_done) PidRef sender_pidref = PIDREF_NULL;
+        _cleanup_free_ PidRef *pidrefs = NULL;
+        const char *name;
+        Unit *from, *scope;
+        PidRef *main_pid;
+        CGroupContext *cc;
+        size_t n_pids = 0;
+        uint64_t flags;
+        int r;
+
+        assert(ret_scope);
+
+        r = bus_query_sender_pidref(message, &sender_pidref);
+        if (r < 0)
+                return r;
+
+        from = manager_get_unit_by_pidref(m, &sender_pidref);
+        if (!from)
+                return sd_bus_error_set(error, BUS_ERROR_NO_SUCH_UNIT, "Client not member of any unit.");
+
+        if (!IN_SET(from->type, UNIT_SERVICE, UNIT_SCOPE))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+                                         "Starting auxiliary scope is supported only for service and scope units, refusing.");
+
+        if (!unit_name_is_valid(from->id, UNIT_NAME_PLAIN))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+                                         "Auxiliary scope can be started only for non-template service units and scope units, refusing.");
+
+        r = sd_bus_message_read(message, "s", &name);
+        if (r < 0)
+                return r;
+
+        if (!unit_name_is_valid(name, UNIT_NAME_PLAIN))
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+                                         "Invalid name \"%s\" for auxiliary scope.", name);
+
+        if (unit_name_to_type(name) != UNIT_SCOPE)
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+                                         "Name \"%s\" of auxiliary scope doesn't have .scope suffix.", name);
+
+        main_pid = unit_main_pid(from);
+
+        r = sd_bus_message_enter_container(message, 'a', "h");
+        if (r < 0)
+                return r;
+
+        for (;;) {
+                _cleanup_(pidref_done) PidRef p = PIDREF_NULL;
+                Unit *unit;
+                int fd;
+
+                r = sd_bus_message_read(message, "h", &fd);
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        break;
+
+                r = pidref_set_pidfd(&p, fd);
+                if (r < 0) {
+                        log_unit_warning_errno(from, r, "Failed to create process reference from PIDFD, ignoring: %m");
+                        continue;
+                }
+
+                unit = manager_get_unit_by_pidref(m, &p);
+                if (!unit) {
+                        log_unit_warning(from, "Failed to get unit from PIDFD, ignoring.");
+                        continue;
+                }
+
+                if (!streq(unit->id, from->id)) {
+                        log_unit_warning(from, "PID " PID_FMT " is not running in the same service as the calling process, ignoring.", p.pid);
+                        continue;
+                }
+
+                if (pidref_equal(main_pid, &p)) {
+                        log_unit_warning(from, "Main PID cannot be migrated into auxiliary scope, ignoring.");
+                        continue;
+                }
+
+                if (!GREEDY_REALLOC(pidrefs, n_pids+1))
+                        return -ENOMEM;
+
+                pidrefs[n_pids++] = TAKE_PIDREF(p);
+        }
+
+        if (n_pids == 0)
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "No processes can be migrated to auxiliary scope.");
+
+        r = sd_bus_message_exit_container(message);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_read(message, "t", &flags);
+        if (r < 0)
+                return r;
+
+        if (flags != 0)
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Flags must be zero.");
+
+        r = manager_load_unit(m, name, NULL, error, &scope);
+        if (r < 0)
+                return r;
+
+        if (!unit_is_pristine(scope))
+                return sd_bus_error_setf(error, BUS_ERROR_UNIT_EXISTS,
+                                         "Unit %s was already loaded or has a fragment file.", name);
+
+        r = unit_set_slice(scope, UNIT_GET_SLICE(from));
+        if (r < 0)
+                return r;
+
+        cc = unit_get_cgroup_context(scope);
+
+        r = cgroup_context_copy(cc, unit_get_cgroup_context(from));
+        if (r < 0)
+                return r;
+
+        r = unit_make_transient(scope);
+        if (r < 0)
+                return r;
+
+        r = bus_unit_set_properties(scope, message, UNIT_RUNTIME, true, error);
+        if (r < 0)
+                return r;
+
+        FOREACH_ARRAY(p, pidrefs, n_pids) {
+                r = unit_pid_attachable(scope, p, error);
+                if (r < 0)
+                        return r;
+
+                r = unit_watch_pidref(scope, p, /* exclusive= */ false);
+                if (r < 0 && r != -EEXIST)
+                        return r;
+        }
+
+        /* Now load the missing bits of the unit we just created */
+        unit_add_to_load_queue(scope);
+        manager_dispatch_load_queue(m);
+
+        *ret_scope = TAKE_PTR(scope);
+
+        return 1;
+}
+
+static int method_start_aux_scope(sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Manager *m = ASSERT_PTR(userdata);
+        Unit *u = NULL; /* avoid false maybe-uninitialized warning */
+        int r;
+
+        assert(message);
+
+        r = mac_selinux_access_check(message, "start", error);
+        if (r < 0)
+                return r;
+
+        r = bus_verify_manage_units_async(m, message, error);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
+
+        r = aux_scope_from_message(m, message, &u, error);
+        if (r < 0)
+                return r;
+
+        return bus_unit_queue_job(message, u, JOB_START, JOB_REPLACE, 0, error);
+}
+
 const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_VTABLE_START(0),
 
@@ -2948,6 +3064,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
         BUS_PROPERTY_DUAL_TIMESTAMP("InitRDTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_INITRD]), SD_BUS_VTABLE_PROPERTY_CONST),
         BUS_PROPERTY_DUAL_TIMESTAMP("UserspaceTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_USERSPACE]), SD_BUS_VTABLE_PROPERTY_CONST),
         BUS_PROPERTY_DUAL_TIMESTAMP("FinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
+        BUS_PROPERTY_DUAL_TIMESTAMP("ShutdownStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_SHUTDOWN_START]), SD_BUS_VTABLE_PROPERTY_CONST),
         BUS_PROPERTY_DUAL_TIMESTAMP("SecurityStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_SECURITY_START]), SD_BUS_VTABLE_PROPERTY_CONST),
         BUS_PROPERTY_DUAL_TIMESTAMP("SecurityFinishTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_SECURITY_FINISH]), SD_BUS_VTABLE_PROPERTY_CONST),
         BUS_PROPERTY_DUAL_TIMESTAMP("GeneratorsStartTimestamp", offsetof(Manager, timestamps[MANAGER_TIMESTAMP_GENERATORS_START]), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -3045,6 +3162,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, defaults.oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("DefaultOOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("CtrlAltDelBurstAction", "s", bus_property_get_emergency_action, offsetof(Manager, cad_burst_action), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("SoftRebootsCount", "u", bus_property_get_unsigned, offsetof(Manager, soft_reboots_count), SD_BUS_VTABLE_PROPERTY_CONST),
 
         SD_BUS_METHOD_WITH_ARGS("GetUnit",
                                 SD_BUS_ARGS("s", name),
@@ -3491,6 +3609,11 @@ const sd_bus_vtable bus_manager_vtable[] = {
                                 SD_BUS_RESULT("a(suuutuusu)", entries),
                                 method_dump_unit_descriptor_store,
                                 SD_BUS_VTABLE_UNPRIVILEGED),
+        SD_BUS_METHOD_WITH_ARGS("StartAuxiliaryScope",
+                                SD_BUS_ARGS("s", name, "ah", pidfds, "t", flags, "a(sv)", properties),
+                                SD_BUS_RESULT("o", job),
+                                method_start_aux_scope,
+                                SD_BUS_VTABLE_UNPRIVILEGED),
 
         SD_BUS_SIGNAL_WITH_ARGS("UnitNew",
                                 SD_BUS_ARGS("s", id, "o", unit),
diff --git a/src/core/dbus-mount.c b/src/core/dbus-mount.c
index 7dbbdd0..f6a9ea9 100644
--- a/src/core/dbus-mount.c
+++ b/src/core/dbus-mount.c
@@ -6,6 +6,7 @@
 #include "dbus-kill.h"
 #include "dbus-mount.h"
 #include "dbus-util.h"
+#include "fstab-util.h"
 #include "mount.h"
 #include "string-util.h"
 #include "unit.h"
@@ -62,7 +63,7 @@ const sd_bus_vtable bus_mount_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Where", "s", NULL, offsetof(Mount, where), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("What", "s", property_get_what, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
-        SD_BUS_PROPERTY("Options","s", property_get_options, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("Options", "s", property_get_options, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("Type", "s", property_get_type, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("TimeoutUSec", "t", bus_property_get_usec, offsetof(Mount, timeout_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ControlPID", "u", bus_property_get_pid, offsetof(Mount, control_pid.pid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
@@ -88,6 +89,7 @@ static int bus_mount_set_transient_property(
                 sd_bus_error *error) {
 
         Unit *u = UNIT(m);
+        int r;
 
         assert(m);
         assert(name);
@@ -98,8 +100,31 @@ static int bus_mount_set_transient_property(
         if (streq(name, "Where"))
                 return bus_set_transient_path(u, name, &m->where, message, flags, error);
 
-        if (streq(name, "What"))
-                return bus_set_transient_string(u, name, &m->parameters_fragment.what, message, flags, error);
+        if (streq(name, "What")) {
+                _cleanup_free_ char *path = NULL;
+                const char *v;
+
+                r = sd_bus_message_read(message, "s", &v);
+                if (r < 0)
+                        return r;
+
+                if (!isempty(v)) {
+                        path = fstab_node_to_udev_node(v);
+                        if (!path)
+                                return -ENOMEM;
+
+                        /* path_is_valid is not used - see the comment for config_parse_mount_node */
+                        if (strlen(path) >= PATH_MAX)
+                                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Resolved What=%s too long", path);
+                }
+
+                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                        free_and_replace(m->parameters_fragment.what, path);
+                        unit_write_settingf(u, flags|UNIT_ESCAPE_SPECIFIERS, name, "What=%s", strempty(m->parameters_fragment.what));
+                }
+
+                return 1;
+        }
 
         if (streq(name, "Options"))
                 return bus_set_transient_string(u, name, &m->parameters_fragment.options, message, flags, error);
diff --git a/src/core/dbus-scope.c b/src/core/dbus-scope.c
index 78196a1..165aa65 100644
--- a/src/core/dbus-scope.c
+++ b/src/core/dbus-scope.c
@@ -3,6 +3,7 @@
 #include "alloc-util.h"
 #include "bus-common-errors.h"
 #include "bus-get-properties.h"
+#include "bus-util.h"
 #include "dbus-cgroup.h"
 #include "dbus-kill.h"
 #include "dbus-manager.h"
@@ -84,7 +85,7 @@ static int bus_scope_set_transient_property(
                 return bus_set_transient_oom_policy(u, name, &s->oom_policy, message, flags, error);
 
         if (streq(name, "PIDs")) {
-                _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+                _cleanup_(pidref_done) PidRef sender_pidref = PIDREF_NULL;
                 unsigned n = 0;
 
                 r = sd_bus_message_enter_container(message, 'a', "u");
@@ -94,7 +95,7 @@ static int bus_scope_set_transient_property(
                 for (;;) {
                         _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
                         uint32_t upid;
-                        pid_t pid;
+                        PidRef *p;
 
                         r = sd_bus_message_read(message, "u", &upid);
                         if (r < 0)
@@ -103,28 +104,27 @@ static int bus_scope_set_transient_property(
                                 break;
 
                         if (upid == 0) {
-                                if (!creds) {
-                                        r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
+                                if (!pidref_is_set(&sender_pidref)) {
+                                        r = bus_query_sender_pidref(message, &sender_pidref);
                                         if (r < 0)
                                                 return r;
                                 }
 
-                                r = sd_bus_creds_get_pid(creds, &pid);
+                                p = &sender_pidref;
+                        } else {
+                                r = pidref_set_pid(&pidref, upid);
                                 if (r < 0)
                                         return r;
-                        } else
-                                pid = (uid_t) upid;
 
-                        r = pidref_set_pid(&pidref, pid);
-                        if (r < 0)
-                                return r;
+                                p = &pidref;
+                        }
 
-                        r = unit_pid_attachable(u, &pidref, error);
+                        r = unit_pid_attachable(u, p, error);
                         if (r < 0)
                                 return r;
 
                         if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
-                                r = unit_watch_pidref(u, &pidref, /* exclusive= */ false);
+                                r = unit_watch_pidref(u, p, /* exclusive= */ false);
                                 if (r < 0 && r != -EEXIST)
                                         return r;
                         }
diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c
index cc478f4..ff970df 100644
--- a/src/core/dbus-service.c
+++ b/src/core/dbus-service.c
@@ -166,9 +166,7 @@ static int bus_service_method_mount(sd_bus_message *message, void *userdata, sd_
         r = bus_verify_manage_units_async_full(
                         u,
                         is_image ? "mount-image" : "bind-mount",
-                        CAP_SYS_ADMIN,
                         N_("Authentication is required to mount on '$(unit)'."),
-                        true,
                         message,
                         error);
         if (r < 0)
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
index e77e9e5..03c5b4a 100644
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -86,6 +86,7 @@ const sd_bus_vtable bus_socket_vtable[] = {
         SD_BUS_PROPERTY("Transparent", "b", bus_property_get_bool, offsetof(Socket, transparent), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Broadcast", "b", bus_property_get_bool, offsetof(Socket, broadcast), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PassCredentials", "b", bus_property_get_bool, offsetof(Socket, pass_cred), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("PassFileDescriptorsToExec", "b", bus_property_get_bool, offsetof(Socket, pass_fds_to_exec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PassSecurity", "b", bus_property_get_bool, offsetof(Socket, pass_sec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PassPacketInfo", "b", bus_property_get_bool, offsetof(Socket, pass_pktinfo), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Timestamping", "s", property_get_timestamping, offsetof(Socket, timestamping), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -190,6 +191,9 @@ static int bus_socket_set_transient_property(
         if (streq(name, "PassCredentials"))
                 return bus_set_transient_bool(u, name, &s->pass_cred, message, flags, error);
 
+        if (streq(name, "PassFileDescriptorsToExec"))
+                return bus_set_transient_bool(u, name, &s->pass_fds_to_exec, message, flags, error);
+
         if (streq(name, "PassSecurity"))
                 return bus_set_transient_bool(u, name, &s->pass_sec, message, flags, error);
 
diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c
index 1a037b7..953cd51 100644
--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@@ -7,6 +7,7 @@
 #include "bus-common-errors.h"
 #include "bus-get-properties.h"
 #include "bus-polkit.h"
+#include "bus-util.h"
 #include "cgroup-util.h"
 #include "condition.h"
 #include "dbus-job.h"
@@ -177,7 +178,7 @@ static int property_get_dependencies(
         return sd_bus_message_close_container(reply);
 }
 
-static int property_get_requires_mounts_for(
+static int property_get_mounts_for(
                 sd_bus *bus,
                 const char *path,
                 const char *interface,
@@ -408,9 +409,7 @@ int bus_unit_method_start_generic(
         r = bus_verify_manage_units_async_full(
                         u,
                         verb,
-                        CAP_SYS_ADMIN,
                         polkit_message_for_job[job_type],
-                        true,
                         message,
                         error);
         if (r < 0)
@@ -491,9 +490,7 @@ int bus_unit_method_enqueue_job(sd_bus_message *message, void *userdata, sd_bus_
         r = bus_verify_manage_units_async_full(
                         u,
                         jtype,
-                        CAP_SYS_ADMIN,
                         polkit_message_for_job[type],
-                        true,
                         message,
                         error);
         if (r < 0)
@@ -549,9 +546,7 @@ int bus_unit_method_kill(sd_bus_message *message, void *userdata, sd_bus_error *
         r = bus_verify_manage_units_async_full(
                         u,
                         "kill",
-                        CAP_KILL,
                         N_("Authentication is required to send a UNIX signal to the processes of '$(unit)'."),
-                        true,
                         message,
                         error);
         if (r < 0)
@@ -579,9 +574,7 @@ int bus_unit_method_reset_failed(sd_bus_message *message, void *userdata, sd_bus
         r = bus_verify_manage_units_async_full(
                         u,
                         "reset-failed",
-                        CAP_SYS_ADMIN,
                         N_("Authentication is required to reset the \"failed\" state of '$(unit)'."),
-                        true,
                         message,
                         error);
         if (r < 0)
@@ -611,9 +604,7 @@ int bus_unit_method_set_properties(sd_bus_message *message, void *userdata, sd_b
         r = bus_verify_manage_units_async_full(
                         u,
                         "set-property",
-                        CAP_SYS_ADMIN,
                         N_("Authentication is required to set properties on '$(unit)'."),
-                        true,
                         message,
                         error);
         if (r < 0)
@@ -641,9 +632,7 @@ int bus_unit_method_ref(sd_bus_message *message, void *userdata, sd_bus_error *e
         r = bus_verify_manage_units_async_full(
                         u,
                         "ref",
-                        CAP_SYS_ADMIN,
-                        NULL,
-                        false,
+                        /* polkit_message= */ NULL,
                         message,
                         error);
         if (r < 0)
@@ -712,9 +701,7 @@ int bus_unit_method_clean(sd_bus_message *message, void *userdata, sd_bus_error
         r = bus_verify_manage_units_async_full(
                         u,
                         "clean",
-                        CAP_DAC_OVERRIDE,
                         N_("Authentication is required to delete files and directories associated with '$(unit)'."),
-                        true,
                         message,
                         error);
         if (r < 0)
@@ -736,22 +723,13 @@ int bus_unit_method_clean(sd_bus_message *message, void *userdata, sd_bus_error
 }
 
 static int bus_unit_method_freezer_generic(sd_bus_message *message, void *userdata, sd_bus_error *error, FreezerAction action) {
-        const char* perm;
-        int (*method)(Unit*);
         Unit *u = ASSERT_PTR(userdata);
-        bool reply_no_delay = false;
         int r;
 
         assert(message);
         assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW));
 
-        if (action == FREEZER_FREEZE) {
-                perm = "stop";
-                method = unit_freeze;
-        } else {
-                perm = "start";
-                method = unit_thaw;
-        }
+        const char *perm = action == FREEZER_FREEZE ? "stop" : "start";
 
         r = mac_selinux_unit_access_check(u, message, perm, error);
         if (r < 0)
@@ -760,9 +738,7 @@ static int bus_unit_method_freezer_generic(sd_bus_message *message, void *userda
         r = bus_verify_manage_units_async_full(
                         u,
                         perm,
-                        CAP_SYS_ADMIN,
                         N_("Authentication is required to freeze or thaw the processes of '$(unit)' unit."),
-                        true,
                         message,
                         error);
         if (r < 0)
@@ -770,19 +746,21 @@ static int bus_unit_method_freezer_generic(sd_bus_message *message, void *userda
         if (r == 0)
                 return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */
 
-        r = method(u);
+        r = unit_freezer_action(u, action);
         if (r == -EOPNOTSUPP)
-                return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Unit '%s' does not support freezing.", u->id);
+                return sd_bus_error_set(error, SD_BUS_ERROR_NOT_SUPPORTED, "Unit does not support freeze/thaw");
         if (r == -EBUSY)
-                return sd_bus_error_set(error, BUS_ERROR_UNIT_BUSY, "Unit has a pending job.");
+                return sd_bus_error_set(error, BUS_ERROR_UNIT_BUSY, "Unit has a pending job");
         if (r == -EHOSTDOWN)
-                return sd_bus_error_set(error, BUS_ERROR_UNIT_INACTIVE, "Unit is inactive.");
+                return sd_bus_error_set(error, BUS_ERROR_UNIT_INACTIVE, "Unit is not active");
         if (r == -EALREADY)
-                return sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Previously requested freezer operation for unit '%s' is still in progress.", u->id);
+                return sd_bus_error_set(error, BUS_ERROR_UNIT_BUSY, "Previously requested freezer operation for unit is still in progress");
+        if (r == -ECHILD)
+                return sd_bus_error_set(error, SD_BUS_ERROR_FAILED, "Unit is frozen by a parent slice");
         if (r < 0)
                 return r;
-        if (r == 0)
-                reply_no_delay = true;
+
+        bool reply_now = r == 0;
 
         if (u->pending_freezer_invocation) {
                 bus_unit_send_pending_freezer_message(u, true);
@@ -791,7 +769,7 @@ static int bus_unit_method_freezer_generic(sd_bus_message *message, void *userda
 
         u->pending_freezer_invocation = sd_bus_message_ref(message);
 
-        if (reply_no_delay) {
+        if (reply_now) {
                 r = bus_unit_send_pending_freezer_message(u, false);
                 if (r < 0)
                         return r;
@@ -879,7 +857,8 @@ const sd_bus_vtable bus_unit_vtable[] = {
         SD_BUS_PROPERTY("StopPropagatedFrom", "as", property_get_dependencies, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("JoinsNamespaceOf", "as", property_get_dependencies, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SliceOf", "as", property_get_dependencies, 0, SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("RequiresMountsFor", "as", property_get_requires_mounts_for, offsetof(Unit, requires_mounts_for), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RequiresMountsFor", "as", property_get_mounts_for, offsetof(Unit, mounts_for[UNIT_MOUNT_REQUIRES]), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("WantsMountsFor", "as", property_get_mounts_for, offsetof(Unit, mounts_for[UNIT_MOUNT_WANTS]), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Documentation", "as", NULL, offsetof(Unit, documentation), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Description", "s", property_get_description, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("AccessSELinuxContext", "s", NULL, offsetof(Unit, access_selinux_context), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1235,12 +1214,32 @@ static int property_get_cgroup(
          * indicates the root cgroup, which we report as "/". c) all
          * other cases we report as-is. */
 
-        if (u->cgroup_path)
-                t = empty_to_root(u->cgroup_path);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+
+        if (crt && crt->cgroup_path)
+                t = empty_to_root(crt->cgroup_path);
 
         return sd_bus_message_append(reply, "s", t);
 }
 
+static int property_get_cgroup_id(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        Unit *u = ASSERT_PTR(userdata);
+
+        assert(bus);
+        assert(reply);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        return sd_bus_message_append(reply, "t", crt ? crt->cgroup_id : UINT64_C(0));
+}
+
 static int append_process(sd_bus_message *reply, const char *p, PidRef *pid, Set *pids) {
         _cleanup_free_ char *buf = NULL, *cmdline = NULL;
         int r;
@@ -1299,7 +1298,7 @@ static int append_cgroup(sd_bus_message *reply, const char *p, Set *pids) {
                  * threaded domain cgroup contains the PIDs of all processes in the subtree and is not
                  * readable in the subtree proper. */
 
-                r = cg_read_pidref(f, &pidref);
+                r = cg_read_pidref(f, &pidref, /* flags = */ 0);
                 if (IN_SET(r, 0, -EOPNOTSUPP))
                         break;
                 if (r < 0)
@@ -1369,8 +1368,10 @@ int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bu
         if (r < 0)
                 return r;
 
-        if (u->cgroup_path) {
-                r = append_cgroup(reply, u->cgroup_path, pids);
+        CGroupRuntime *crt;
+        crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->cgroup_path) {
+                r = append_cgroup(reply, crt->cgroup_path, pids);
                 if (r < 0)
                         return r;
         }
@@ -1441,6 +1442,28 @@ static int property_get_io_counter(
         return sd_bus_message_append(reply, "t", value);
 }
 
+static int property_get_effective_limit(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        uint64_t value = CGROUP_LIMIT_MAX;
+        Unit *u = ASSERT_PTR(userdata);
+        ssize_t type;
+
+        assert(bus);
+        assert(reply);
+        assert(property);
+
+        assert_se((type = cgroup_effective_limit_type_from_string(property)) >= 0);
+        (void) unit_get_effective_limit(u, type, &value);
+        return sd_bus_message_append(reply, "t", value);
+}
+
 int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd_bus_error *error) {
         _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
         _cleanup_set_free_ Set *pids = NULL;
@@ -1478,7 +1501,7 @@ int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd
         if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)))
                 return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not active, refusing.");
 
-        r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID, &creds);
+        r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_EUID|SD_BUS_CREDS_PID|SD_BUS_CREDS_PIDFD, &creds);
         if (r < 0)
                 return r;
 
@@ -1489,7 +1512,6 @@ int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd
                 _cleanup_(pidref_freep) PidRef *pidref = NULL;
                 uid_t process_uid, sender_uid;
                 uint32_t upid;
-                pid_t pid;
 
                 r = sd_bus_message_read(message, "u", &upid);
                 if (r < 0)
@@ -1498,13 +1520,14 @@ int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd
                         break;
 
                 if (upid == 0) {
-                        r = sd_bus_creds_get_pid(creds, &pid);
+                        _cleanup_(pidref_done) PidRef p = PIDREF_NULL;
+                        r = bus_creds_get_pidref(creds, &p);
                         if (r < 0)
                                 return r;
-                } else
-                        pid = (uid_t) upid;
 
-                r = pidref_new_from_pid(pid, &pidref);
+                        r = pidref_dup(&p, &pidref);
+                } else
+                        r = pidref_new_from_pid(upid, &pidref);
                 if (r < 0)
                         return r;
 
@@ -1530,9 +1553,9 @@ int bus_unit_method_attach_processes(sd_bus_message *message, void *userdata, sd
                                 return sd_bus_error_set_errnof(error, r, "Failed to retrieve process UID: %m");
 
                         if (process_uid != sender_uid)
-                                return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by client's UID. Refusing.", pid);
+                                return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by client's UID. Refusing.", pidref->pid);
                         if (process_uid != u->ref_uid)
-                                return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by target unit's UID. Refusing.", pid);
+                                return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "Process " PID_FMT " not owned by target unit's UID. Refusing.", pidref->pid);
                 }
 
                 r = set_ensure_consume(&pids, &pidref_hash_ops_free, TAKE_PTR(pidref));
@@ -1555,17 +1578,20 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
         SD_BUS_PROPERTY("ControlGroup", "s", property_get_cgroup, 0, 0),
-        SD_BUS_PROPERTY("ControlGroupId", "t", NULL, offsetof(Unit, cgroup_id), 0),
+        SD_BUS_PROPERTY("ControlGroupId", "t", property_get_cgroup_id, 0, 0),
         SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
         SD_BUS_PROPERTY("MemoryPeak", "t", property_get_memory_accounting, 0, 0),
         SD_BUS_PROPERTY("MemorySwapCurrent", "t", property_get_memory_accounting, 0, 0),
         SD_BUS_PROPERTY("MemorySwapPeak", "t", property_get_memory_accounting, 0, 0),
         SD_BUS_PROPERTY("MemoryZSwapCurrent", "t", property_get_memory_accounting, 0, 0),
         SD_BUS_PROPERTY("MemoryAvailable", "t", property_get_available_memory, 0, 0),
+        SD_BUS_PROPERTY("EffectiveMemoryMax", "t", property_get_effective_limit, 0, 0),
+        SD_BUS_PROPERTY("EffectiveMemoryHigh", "t", property_get_effective_limit, 0, 0),
         SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
         SD_BUS_PROPERTY("EffectiveCPUs", "ay", property_get_cpuset_cpus, 0, 0),
         SD_BUS_PROPERTY("EffectiveMemoryNodes", "ay", property_get_cpuset_mems, 0, 0),
         SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
+        SD_BUS_PROPERTY("EffectiveTasksMax", "t", property_get_effective_limit, 0, 0),
         SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
         SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),
         SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0),
@@ -1576,16 +1602,16 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
         SD_BUS_PROPERTY("IOWriteOperations", "t", property_get_io_counter, 0, 0),
 
         SD_BUS_METHOD_WITH_ARGS("GetProcesses",
-                                 SD_BUS_NO_ARGS,
-                                 SD_BUS_ARGS("a(sus)", processes),
-                                 bus_unit_method_get_processes,
-                                 SD_BUS_VTABLE_UNPRIVILEGED),
+                                SD_BUS_NO_ARGS,
+                                SD_BUS_ARGS("a(sus)", processes),
+                                bus_unit_method_get_processes,
+                                SD_BUS_VTABLE_UNPRIVILEGED),
 
         SD_BUS_METHOD_WITH_ARGS("AttachProcesses",
-                                 SD_BUS_ARGS("s", subcgroup, "au", pids),
-                                 SD_BUS_NO_RESULT,
-                                 bus_unit_method_attach_processes,
-                                 SD_BUS_VTABLE_UNPRIVILEGED),
+                                SD_BUS_ARGS("s", subcgroup, "au", pids),
+                                SD_BUS_NO_RESULT,
+                                bus_unit_method_attach_processes,
+                                SD_BUS_VTABLE_UNPRIVILEGED),
 
         SD_BUS_VTABLE_END
 };
@@ -2210,7 +2236,7 @@ static int bus_unit_set_transient_property(
                 return bus_set_transient_emergency_action(u, name, &u->job_timeout_action, message, flags, error);
 
         if (streq(name, "JobTimeoutRebootArgument"))
-                return bus_set_transient_string(u, name, &u->job_timeout_reboot_arg, message, flags, error);
+                return bus_set_transient_reboot_parameter(u, name, &u->job_timeout_reboot_arg, message, flags, error);
 
         if (streq(name, "StartLimitIntervalUSec"))
                 return bus_set_transient_usec(u, name, &u->start_ratelimit.interval, message, flags, error);
@@ -2234,7 +2260,7 @@ static int bus_unit_set_transient_property(
                 return bus_set_transient_exit_status(u, name, &u->success_action_exit_status, message, flags, error);
 
         if (streq(name, "RebootArgument"))
-                return bus_set_transient_string(u, name, &u->reboot_arg, message, flags, error);
+                return bus_set_transient_reboot_parameter(u, name, &u->reboot_arg, message, flags, error);
 
         if (streq(name, "CollectMode"))
                 return bus_set_transient_collect_mode(u, name, &u->collect_mode, message, flags, error);
@@ -2261,7 +2287,9 @@ static int bus_unit_set_transient_property(
                                 u->documentation = strv_free(u->documentation);
                                 unit_write_settingf(u, flags, name, "%s=", name);
                         } else {
-                                strv_extend_strv(&u->documentation, l, false);
+                                r = strv_extend_strv(&u->documentation, l, /* filter_duplicates= */ false);
+                                if (r < 0)
+                                        return r;
 
                                 STRV_FOREACH(p, l)
                                         unit_write_settingf(u, flags, name, "%s=%s", name, *p);
@@ -2308,7 +2336,7 @@ static int bus_unit_set_transient_property(
 
                 return 1;
 
-        } else if (streq(name, "RequiresMountsFor")) {
+        } else if (STR_IN_SET(name, "RequiresMountsFor", "WantsMountsFor")) {
                 _cleanup_strv_free_ char **l = NULL;
 
                 r = sd_bus_message_read_strv(message, &l);
@@ -2328,9 +2356,9 @@ static int bus_unit_set_transient_property(
                                 return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path specified in %s is not normalized: %s", name, *p);
 
                         if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
-                                r = unit_require_mounts_for(u, *p, UNIT_DEPENDENCY_FILE);
+                                r = unit_add_mounts_for(u, *p, UNIT_DEPENDENCY_FILE, unit_mount_dependency_type_from_string(name));
                                 if (r < 0)
-                                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Failed to add required mount \"%s\": %m", *p);
+                                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Failed to add requested mount \"%s\": %m", *p);
 
                                 unit_write_settingf(u, flags, name, "%s=%s", name, *p);
                         }
diff --git a/src/core/dbus-util.c b/src/core/dbus-util.c
index d680a64..b871d89 100644
--- a/src/core/dbus-util.c
+++ b/src/core/dbus-util.c
@@ -6,6 +6,7 @@
 #include "escape.h"
 #include "parse-util.h"
 #include "path-util.h"
+#include "reboot-util.h"
 #include "unit-printf.h"
 #include "user-util.h"
 #include "unit.h"
@@ -39,6 +40,7 @@ static bool valid_user_group_name_or_id_relaxed(const char *u) {
 
 BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(user_relaxed, valid_user_group_name_or_id_relaxed);
 BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(path, path_is_absolute);
+BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(reboot_parameter, reboot_parameter_is_valid);
 
 int bus_set_transient_string(
                 Unit *u,
@@ -151,9 +153,7 @@ int bus_set_transient_usec_internal(
 int bus_verify_manage_units_async_full(
                 Unit *u,
                 const char *verb,
-                int capability,
                 const char *polkit_message,
-                bool interactive,
                 sd_bus_message *call,
                 sd_bus_error *error) {
 
@@ -171,11 +171,8 @@ int bus_verify_manage_units_async_full(
 
         return bus_verify_polkit_async(
                         call,
-                        capability,
                         "org.freedesktop.systemd1.manage-units",
                         details,
-                        interactive,
-                        UID_INVALID,
                         &u->manager->polkit_registry,
                         error);
 }
diff --git a/src/core/dbus-util.h b/src/core/dbus-util.h
index 9464b25..0fc3a94 100644
--- a/src/core/dbus-util.h
+++ b/src/core/dbus-util.h
@@ -239,6 +239,7 @@ int bus_set_transient_mode_t(Unit *u, const char *name, mode_t *p, sd_bus_messag
 int bus_set_transient_unsigned(Unit *u, const char *name, unsigned *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
 int bus_set_transient_user_relaxed(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
 int bus_set_transient_path(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_reboot_parameter(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
 int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
 int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
 int bus_set_transient_tristate(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
@@ -249,7 +250,7 @@ static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, s
 static inline int bus_set_transient_usec_fix_0(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
         return bus_set_transient_usec_internal(u, name, p, true, message, flags, error);
 }
-int bus_verify_manage_units_async_full(Unit *u, const char *verb, int capability, const char *polkit_message, bool interactive, sd_bus_message *call, sd_bus_error *error);
+int bus_verify_manage_units_async_full(Unit *u, const char *verb, const char *polkit_message, sd_bus_message *call, sd_bus_error *error);
 
 int bus_read_mount_options(sd_bus_message *message, sd_bus_error *error, MountOptions **ret_options, char **ret_format_str, const char *separator);
 
diff --git a/src/core/dbus.c b/src/core/dbus.c
index ba2cec4..1c6f6fc 100644
--- a/src/core/dbus.c
+++ b/src/core/dbus.c
@@ -232,6 +232,8 @@ static int mac_selinux_filter(sd_bus_message *message, void *userdata, sd_bus_er
                 return 0;
 
         path = sd_bus_message_get_path(message);
+        if (!path)
+                return 0;
 
         if (object_path_startswith("/org/freedesktop/systemd1", path)) {
                 r = mac_selinux_access_check(message, verb, error);
@@ -241,25 +243,20 @@ static int mac_selinux_filter(sd_bus_message *message, void *userdata, sd_bus_er
                 return 0;
         }
 
-        if (streq_ptr(path, "/org/freedesktop/systemd1/unit/self")) {
-                _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
-                pid_t pid;
-
-                r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
-                if (r < 0)
-                        return 0;
+        if (streq(path, "/org/freedesktop/systemd1/unit/self")) {
+                _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
 
-                r = sd_bus_creds_get_pid(creds, &pid);
+                r = bus_query_sender_pidref(message, &pidref);
                 if (r < 0)
                         return 0;
 
-                u = manager_get_unit_by_pid(m, pid);
+                u = manager_get_unit_by_pidref(m, &pidref);
         } else {
                 r = manager_get_job_from_dbus_path(m, path, &j);
                 if (r >= 0)
                         u = j->unit;
                 else
-                        manager_load_unit_from_dbus_path(m, path, NULL, &u);
+                        (void) manager_load_unit_from_dbus_path(m, path, NULL, &u);
         }
         if (!u)
                 return 0;
@@ -280,24 +277,19 @@ static int find_unit(Manager *m, sd_bus *bus, const char *path, Unit **unit, sd_
         assert(bus);
         assert(path);
 
-        if (streq_ptr(path, "/org/freedesktop/systemd1/unit/self")) {
-                _cleanup_(sd_bus_creds_unrefp) sd_bus_creds *creds = NULL;
+        if (streq(path, "/org/freedesktop/systemd1/unit/self")) {
+                _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
                 sd_bus_message *message;
-                pid_t pid;
 
                 message = sd_bus_get_current_message(bus);
                 if (!message)
                         return 0;
 
-                r = sd_bus_query_sender_creds(message, SD_BUS_CREDS_PID, &creds);
-                if (r < 0)
-                        return r;
-
-                r = sd_bus_creds_get_pid(creds, &pid);
+                r = bus_query_sender_pidref(message, &pidref);
                 if (r < 0)
                         return r;
 
-                u = manager_get_unit_by_pid(m, pid);
+                u = manager_get_unit_by_pidref(m, &pidref);
                 if (!u)
                         return 0;
         } else {
@@ -739,7 +731,7 @@ static int bus_on_connection(sd_event_source *s, int fd, uint32_t revents, void
                 log_debug("Accepting direct incoming connection from " PID_FMT " (%s) [%s]", pid, strna(comm), strna(description));
         }
 
-        r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+        r = sd_bus_attach_event(bus, m->event, EVENT_PRIORITY_IPC);
         if (r < 0) {
                 log_warning_errno(r, "Failed to attach new connection bus to event loop: %m");
                 return 0;
@@ -847,7 +839,7 @@ int bus_init_api(Manager *m) {
                 if (r < 0)
                         return log_error_errno(r, "Failed to connect to API bus: %m");
 
-                r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+                r = sd_bus_attach_event(bus, m->event, EVENT_PRIORITY_IPC);
                 if (r < 0)
                         return log_error_errno(r, "Failed to attach API bus to event loop: %m");
 
@@ -904,7 +896,7 @@ int bus_init_system(Manager *m) {
                 if (r < 0)
                         return log_error_errno(r, "Failed to connect to system bus: %m");
 
-                r = sd_bus_attach_event(bus, m->event, SD_EVENT_PRIORITY_NORMAL);
+                r = sd_bus_attach_event(bus, m->event, EVENT_PRIORITY_IPC);
                 if (r < 0)
                         return log_error_errno(r, "Failed to attach system bus to event loop: %m");
 
@@ -1073,7 +1065,7 @@ void bus_done(Manager *m) {
         assert(!m->subscribed);
 
         m->deserialized_subscribed = strv_free(m->deserialized_subscribed);
-        bus_verify_polkit_async_registry_free(m->polkit_registry);
+        m->polkit_registry = hashmap_free(m->polkit_registry);
 }
 
 int bus_fdset_add_all(Manager *m, FDSet *fds) {
@@ -1121,31 +1113,29 @@ int bus_foreach_bus(
                 int (*send_message)(sd_bus *bus, void *userdata),
                 void *userdata) {
 
-        sd_bus *b;
-        int r, ret = 0;
+        int r = 0;
+
+        assert(m);
+        assert(send_message);
 
         /* Send to all direct buses, unconditionally */
+        sd_bus *b;
         SET_FOREACH(b, m->private_buses) {
 
                 /* Don't bother with enqueuing these messages to clients that haven't started yet */
                 if (sd_bus_is_ready(b) <= 0)
                         continue;
 
-                r = send_message(b, userdata);
-                if (r < 0)
-                        ret = r;
+                RET_GATHER(r, send_message(b, userdata));
         }
 
         /* Send to API bus, but only if somebody is subscribed */
         if (m->api_bus &&
             (sd_bus_track_count(m->subscribed) > 0 ||
-             sd_bus_track_count(subscribed2) > 0)) {
-                r = send_message(m->api_bus, userdata);
-                if (r < 0)
-                        ret = r;
-        }
+             sd_bus_track_count(subscribed2) > 0))
+                RET_GATHER(r, send_message(m->api_bus, userdata));
 
-        return ret;
+        return r;
 }
 
 void bus_track_serialize(sd_bus_track *t, FILE *f, const char *prefix) {
@@ -1189,22 +1179,46 @@ int bus_track_coldplug(Manager *m, sd_bus_track **t, bool recursive, char **l) {
 }
 
 int bus_verify_manage_units_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
-        return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.manage-units", NULL, false, UID_INVALID, &m->polkit_registry, error);
+        return bus_verify_polkit_async(
+                        call,
+                        "org.freedesktop.systemd1.manage-units",
+                        /* details= */ NULL,
+                        &m->polkit_registry,
+                        error);
 }
 
 int bus_verify_manage_unit_files_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
-        return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.manage-unit-files", NULL, false, UID_INVALID, &m->polkit_registry, error);
+        return bus_verify_polkit_async(
+                        call,
+                        "org.freedesktop.systemd1.manage-unit-files",
+                        /* details= */ NULL,
+                        &m->polkit_registry,
+                        error);
 }
 
 int bus_verify_reload_daemon_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
-        return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.reload-daemon", NULL, false, UID_INVALID, &m->polkit_registry, error);
+        return bus_verify_polkit_async(
+                        call,
+                        "org.freedesktop.systemd1.reload-daemon",
+                        /* details= */ NULL,
+                        &m->polkit_registry, error);
 }
 
 int bus_verify_set_environment_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
-        return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.set-environment", NULL, false, UID_INVALID, &m->polkit_registry, error);
+        return bus_verify_polkit_async(
+                        call,
+                        "org.freedesktop.systemd1.set-environment",
+                        /* details= */ NULL,
+                        &m->polkit_registry,
+                        error);
 }
 int bus_verify_bypass_dump_ratelimit_async(Manager *m, sd_bus_message *call, sd_bus_error *error) {
-        return bus_verify_polkit_async(call, CAP_SYS_ADMIN, "org.freedesktop.systemd1.bypass-dump-ratelimit", NULL, false, UID_INVALID, &m->polkit_registry, error);
+        return bus_verify_polkit_async(
+                        call,
+                        "org.freedesktop.systemd1.bypass-dump-ratelimit",
+                        /* details= */ NULL,
+                        &m->polkit_registry,
+                        error);
 }
 
 uint64_t manager_bus_n_queued_write(Manager *m) {
diff --git a/src/core/device.c b/src/core/device.c
index 6b2d7c3..d856767 100644
--- a/src/core/device.c
+++ b/src/core/device.c
@@ -119,10 +119,9 @@ static int device_set_sysfs(Device *d, const char *sysfs) {
 }
 
 static void device_init(Unit *u) {
-        Device *d = DEVICE(u);
+        Device *d = ASSERT_PTR(DEVICE(u));
 
-        assert(d);
-        assert(UNIT(d)->load_state == UNIT_STUB);
+        assert(u->load_state == UNIT_STUB);
 
         /* In contrast to all other unit types we timeout jobs waiting
          * for devices by default. This is because they otherwise wait
@@ -137,9 +136,7 @@ static void device_init(Unit *u) {
 }
 
 static void device_done(Unit *u) {
-        Device *d = DEVICE(u);
-
-        assert(d);
+        Device *d = ASSERT_PTR(DEVICE(u));
 
         device_unset_sysfs(d);
         d->deserialized_sysfs = mfree(d->deserialized_sysfs);
@@ -258,9 +255,8 @@ static void device_update_found_by_name(Manager *m, const char *path, DeviceFoun
 }
 
 static int device_coldplug(Unit *u) {
-        Device *d = DEVICE(u);
+        Device *d = ASSERT_PTR(DEVICE(u));
 
-        assert(d);
         assert(d->state == DEVICE_DEAD);
 
         /* First, let's put the deserialized state and found mask into effect, if we have it. */
@@ -336,9 +332,7 @@ static int device_coldplug(Unit *u) {
 }
 
 static void device_catchup(Unit *u) {
-        Device *d = DEVICE(u);
-
-        assert(d);
+        Device *d = ASSERT_PTR(DEVICE(u));
 
         /* Second, let's update the state with the enumerated state */
         device_update_found_one(d, d->enumerated_found, DEVICE_FOUND_MASK);
@@ -405,11 +399,9 @@ static int device_found_from_string_many(const char *name, DeviceFound *ret) {
 }
 
 static int device_serialize(Unit *u, FILE *f, FDSet *fds) {
+        Device *d = ASSERT_PTR(DEVICE(u));
         _cleanup_free_ char *s = NULL;
-        Device *d = DEVICE(u);
 
-        assert(d);
-        assert(u);
         assert(f);
         assert(fds);
 
@@ -428,11 +420,9 @@ static int device_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int device_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Device *d = DEVICE(u);
+        Device *d = ASSERT_PTR(DEVICE(u));
         int r;
 
-        assert(d);
-        assert(u);
         assert(key);
         assert(value);
         assert(fds);
@@ -472,10 +462,11 @@ static int device_deserialize_item(Unit *u, const char *key, const char *value,
 }
 
 static void device_dump(Unit *u, FILE *f, const char *prefix) {
-        Device *d = DEVICE(u);
+        Device *d = ASSERT_PTR(DEVICE(u));
         _cleanup_free_ char *s = NULL;
 
-        assert(d);
+        assert(f);
+        assert(prefix);
 
         (void) device_found_to_string_many(d->found, &s);
 
@@ -495,15 +486,15 @@ static void device_dump(Unit *u, FILE *f, const char *prefix) {
 }
 
 static UnitActiveState device_active_state(Unit *u) {
-        assert(u);
+        Device *d = ASSERT_PTR(DEVICE(u));
 
-        return state_translation_table[DEVICE(u)->state];
+        return state_translation_table[d->state];
 }
 
 static const char *device_sub_state_to_string(Unit *u) {
-        assert(u);
+        Device *d = ASSERT_PTR(DEVICE(u));
 
-        return device_state_to_string(DEVICE(u)->state);
+        return device_state_to_string(d->state);
 }
 
 static int device_update_description(Unit *u, sd_device *dev, const char *path) {
@@ -538,12 +529,11 @@ static int device_update_description(Unit *u, sd_device *dev, const char *path)
 }
 
 static int device_add_udev_wants(Unit *u, sd_device *dev) {
+        Device *d = ASSERT_PTR(DEVICE(u));
         _cleanup_strv_free_ char **added = NULL;
         const char *wants, *property;
-        Device *d = DEVICE(u);
         int r;
 
-        assert(d);
         assert(dev);
 
         property = MANAGER_IS_USER(u->manager) ? "SYSTEMD_USER_WANTS" : "SYSTEMD_WANTS";
@@ -646,6 +636,8 @@ static void device_upgrade_mount_deps(Unit *u) {
 
         /* Let's upgrade Requires= to BindsTo= on us. (Used when SYSTEMD_MOUNT_DEVICE_BOUND is set) */
 
+        assert(u);
+
         HASHMAP_FOREACH_KEY(v, other, unit_get_dependencies(u, UNIT_REQUIRED_BY)) {
                 if (other->type != UNIT_MOUNT)
                         continue;
@@ -706,16 +698,18 @@ static int device_setup_unit(Manager *m, sd_device *dev, const char *path, bool
                 unit_add_to_load_queue(u);
         }
 
-        if (!DEVICE(u)->path) {
-                DEVICE(u)->path = strdup(path);
-                if (!DEVICE(u)->path)
+        Device *d = ASSERT_PTR(DEVICE(u));
+
+        if (!d->path) {
+                d->path = strdup(path);
+                if (!d->path)
                         return log_oom();
         }
 
         /* If this was created via some dependency and has not actually been seen yet ->sysfs will not be
          * initialized. Hence initialize it if necessary. */
         if (sysfs) {
-                r = device_set_sysfs(DEVICE(u), sysfs);
+                r = device_set_sysfs(d, sysfs);
                 if (r < 0)
                         return log_unit_error_errno(u, r, "Failed to set sysfs path %s: %m", sysfs);
 
@@ -730,11 +724,11 @@ static int device_setup_unit(Manager *m, sd_device *dev, const char *path, bool
          * by systemd before the device appears on its radar. In this case the device unit is partially
          * initialized and includes the deps on the mount unit but at that time the "bind mounts" flag wasn't
          * present. Fix this up now. */
-        if (dev && device_is_bound_by_mounts(DEVICE(u), dev))
+        if (dev && device_is_bound_by_mounts(d, dev))
                 device_upgrade_mount_deps(u);
 
         if (units) {
-                r = set_ensure_put(units, NULL, DEVICE(u));
+                r = set_ensure_put(units, NULL, d);
                 if (r < 0)
                         return log_unit_error_errno(u, r, "Failed to store unit: %m");
         }
@@ -950,10 +944,7 @@ static int device_setup_units(Manager *m, sd_device *dev, Set **ready_units, Set
 }
 
 static Unit *device_following(Unit *u) {
-        Device *d = DEVICE(u);
-        Device *first = NULL;
-
-        assert(d);
+        Device *d = ASSERT_PTR(DEVICE(u)), *first = NULL;
 
         if (startswith(u->id, "sys-"))
                 return NULL;
@@ -973,16 +964,15 @@ static Unit *device_following(Unit *u) {
         return UNIT(first);
 }
 
-static int device_following_set(Unit *u, Set **_set) {
-        Device *d = DEVICE(u);
+static int device_following_set(Unit *u, Set **ret) {
+        Device *d = ASSERT_PTR(DEVICE(u));
         _cleanup_set_free_ Set *set = NULL;
         int r;
 
-        assert(d);
-        assert(_set);
+        assert(ret);
 
         if (LIST_JUST_US(same_sysfs, d)) {
-                *_set = NULL;
+                *ret = NULL;
                 return 0;
         }
 
@@ -1002,7 +992,7 @@ static int device_following_set(Unit *u, Set **_set) {
                         return r;
         }
 
-        *_set = TAKE_PTR(set);
+        *ret = TAKE_PTR(set);
         return 1;
 }
 
@@ -1061,6 +1051,9 @@ static void device_enumerate(Manager *m) {
                 _cleanup_set_free_ Set *ready_units = NULL, *not_ready_units = NULL;
                 Device *d;
 
+                if (device_is_processed(dev) <= 0)
+                        continue;
+
                 if (device_setup_units(m, dev, &ready_units, &not_ready_units) < 0)
                         continue;
 
diff --git a/src/core/dynamic-user.c b/src/core/dynamic-user.c
index 2bf9094..11de2ba 100644
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@@ -20,7 +20,7 @@
 #include "stdio-util.h"
 #include "string-util.h"
 #include "strv.h"
-#include "uid-alloc-range.h"
+#include "uid-classification.h"
 #include "user-util.h"
 
 /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
@@ -143,7 +143,6 @@ static int dynamic_user_acquire(Manager *m, const char *name, DynamicUser** ret)
 }
 
 static int make_uid_symlinks(uid_t uid, const char *name, bool b) {
-
         char path1[STRLEN("/run/systemd/dynamic-uid/direct:") + DECIMAL_STR_MAX(uid_t) + 1];
         const char *path2;
         int r = 0, k;
@@ -293,8 +292,8 @@ static int pick_uid(char **suggested_paths, const char *name, uid_t *ret_uid) {
                 }
 
                 /* Some superficial check whether this UID/GID might already be taken by some static user */
-                if (getpwuid(candidate) ||
-                    getgrgid((gid_t) candidate) ||
+                if (getpwuid_malloc(candidate, /* ret= */ NULL) >= 0 ||
+                    getgrgid_malloc((gid_t) candidate, /* ret= */ NULL) >= 0 ||
                     search_ipc(candidate, (gid_t) candidate) != 0) {
                         (void) unlink(lock_path);
                         continue;
@@ -419,30 +418,26 @@ static int dynamic_user_realize(
                 /* First, let's parse this as numeric UID */
                 r = parse_uid(d->name, &num);
                 if (r < 0) {
-                        struct passwd *p;
-                        struct group *g;
+                        _cleanup_free_ struct passwd *p = NULL;
+                        _cleanup_free_ struct group *g = NULL;
 
                         if (is_user) {
                                 /* OK, this is not a numeric UID. Let's see if there's a user by this name */
-                                p = getpwnam(d->name);
-                                if (p) {
+                                if (getpwnam_malloc(d->name, &p) >= 0) {
                                         num = p->pw_uid;
                                         gid = p->pw_gid;
                                 } else {
                                         /* if the user does not exist but the group with the same name exists, refuse operation */
-                                        g = getgrnam(d->name);
-                                        if (g)
+                                        if (getgrnam_malloc(d->name, /* ret= */ NULL) >= 0)
                                                 return -EILSEQ;
                                 }
                         } else {
                                 /* Let's see if there's a group by this name */
-                                g = getgrnam(d->name);
-                                if (g)
+                                if (getgrnam_malloc(d->name, &g) >= 0)
                                         num = (uid_t) g->gr_gid;
                                 else {
                                         /* if the group does not exist but the user with the same name exists, refuse operation */
-                                        p = getpwnam(d->name);
-                                        if (p)
+                                        if (getpwnam_malloc(d->name, /* ret= */ NULL) >= 0)
                                                 return -EILSEQ;
                                 }
                         }
@@ -484,13 +479,12 @@ static int dynamic_user_realize(
                         uid_lock_fd = new_uid_lock_fd;
                 }
         } else if (is_user && !uid_is_dynamic(num)) {
-                struct passwd *p;
+                _cleanup_free_ struct passwd *p = NULL;
 
                 /* Statically allocated user may have different uid and gid. So, let's obtain the gid. */
-                errno = 0;
-                p = getpwuid(num);
-                if (!p)
-                        return errno_or_else(ESRCH);
+                r = getpwuid_malloc(num, &p);
+                if (r < 0)
+                        return r;
 
                 gid = p->pw_gid;
         }
@@ -658,7 +652,7 @@ void dynamic_user_deserialize_one(Manager *m, const char *value, FDSet *fds, Dyn
 
         /* Parse the serialization again, after a daemon reload */
 
-        r = extract_many_words(&value, NULL, 0, &name, &s0, &s1, NULL);
+        r = extract_many_words(&value, NULL, 0, &name, &s0, &s1);
         if (r != 3 || !isempty(value)) {
                 log_debug("Unable to parse dynamic user line.");
                 return;
@@ -761,7 +755,6 @@ int dynamic_user_lookup_name(Manager *m, const char *name, uid_t *ret) {
 
 int dynamic_creds_make(Manager *m, const char *user, const char *group, DynamicCreds **ret) {
         _cleanup_(dynamic_creds_unrefp) DynamicCreds *creds = NULL;
-        bool acquired = false;
         int r;
 
         assert(m);
@@ -784,20 +777,14 @@ int dynamic_creds_make(Manager *m, const char *user, const char *group, DynamicC
                 r = dynamic_user_acquire(m, user, &creds->user);
                 if (r < 0)
                         return r;
-
-                acquired = true;
         }
 
-        if (creds->user && (!group || streq_ptr(user, group)))
-                creds->group = dynamic_user_ref(creds->user);
-        else if (group) {
+        if (group && !streq_ptr(user, group)) {
                 r = dynamic_user_acquire(m, group, &creds->group);
-                if (r < 0) {
-                        if (acquired)
-                                creds->user = dynamic_user_unref(creds->user);
+                if (r < 0)
                         return r;
-                }
-        }
+        } else
+                creds->group = ASSERT_PTR(dynamic_user_ref(creds->user));
 
         *ret = TAKE_PTR(creds);
 
diff --git a/src/core/emergency-action.c b/src/core/emergency-action.c
index e2cd931..dbda6e5 100644
--- a/src/core/emergency-action.c
+++ b/src/core/emergency-action.c
@@ -13,22 +13,22 @@
 #include "virt.h"
 
 static const char* const emergency_action_table[_EMERGENCY_ACTION_MAX] = {
-        [EMERGENCY_ACTION_NONE] =               "none",
-        [EMERGENCY_ACTION_REBOOT] =             "reboot",
-        [EMERGENCY_ACTION_REBOOT_FORCE] =       "reboot-force",
-        [EMERGENCY_ACTION_REBOOT_IMMEDIATE] =   "reboot-immediate",
-        [EMERGENCY_ACTION_POWEROFF] =           "poweroff",
-        [EMERGENCY_ACTION_POWEROFF_FORCE] =     "poweroff-force",
+        [EMERGENCY_ACTION_NONE]               = "none",
+        [EMERGENCY_ACTION_EXIT]               = "exit",
+        [EMERGENCY_ACTION_EXIT_FORCE]         = "exit-force",
+        [EMERGENCY_ACTION_REBOOT]             = "reboot",
+        [EMERGENCY_ACTION_REBOOT_FORCE]       = "reboot-force",
+        [EMERGENCY_ACTION_REBOOT_IMMEDIATE]   = "reboot-immediate",
+        [EMERGENCY_ACTION_POWEROFF]           = "poweroff",
+        [EMERGENCY_ACTION_POWEROFF_FORCE]     = "poweroff-force",
         [EMERGENCY_ACTION_POWEROFF_IMMEDIATE] = "poweroff-immediate",
-        [EMERGENCY_ACTION_EXIT] =               "exit",
-        [EMERGENCY_ACTION_EXIT_FORCE] =         "exit-force",
-        [EMERGENCY_ACTION_SOFT_REBOOT] =        "soft-reboot",
-        [EMERGENCY_ACTION_SOFT_REBOOT_FORCE] =  "soft-reboot-force",
-        [EMERGENCY_ACTION_KEXEC] =              "kexec",
-        [EMERGENCY_ACTION_KEXEC_FORCE] =        "kexec-force",
-        [EMERGENCY_ACTION_HALT] =               "halt",
-        [EMERGENCY_ACTION_HALT_FORCE] =         "halt-force",
-        [EMERGENCY_ACTION_HALT_IMMEDIATE] =     "halt-immediate",
+        [EMERGENCY_ACTION_SOFT_REBOOT]        = "soft-reboot",
+        [EMERGENCY_ACTION_SOFT_REBOOT_FORCE]  = "soft-reboot-force",
+        [EMERGENCY_ACTION_KEXEC]              = "kexec",
+        [EMERGENCY_ACTION_KEXEC_FORCE]        = "kexec-force",
+        [EMERGENCY_ACTION_HALT]               = "halt",
+        [EMERGENCY_ACTION_HALT_FORCE]         = "halt-force",
+        [EMERGENCY_ACTION_HALT_IMMEDIATE]     = "halt-immediate",
 };
 
 static void log_and_status(Manager *m, bool warn, const char *message, const char *reason) {
@@ -216,7 +216,7 @@ int parse_emergency_action(
         if (x < 0)
                 return -EINVAL;
 
-        if (runtime_scope != RUNTIME_SCOPE_SYSTEM && x != EMERGENCY_ACTION_NONE && x < _EMERGENCY_ACTION_FIRST_USER_ACTION)
+        if (runtime_scope != RUNTIME_SCOPE_SYSTEM && x > _EMERGENCY_ACTION_LAST_USER_ACTION)
                 return -EOPNOTSUPP;
 
         *ret = x;
diff --git a/src/core/emergency-action.h b/src/core/emergency-action.h
index 33e0ec6..6bec475 100644
--- a/src/core/emergency-action.h
+++ b/src/core/emergency-action.h
@@ -7,15 +7,15 @@
 
 typedef enum EmergencyAction {
         EMERGENCY_ACTION_NONE,
+        EMERGENCY_ACTION_EXIT,
+        EMERGENCY_ACTION_EXIT_FORCE,
+        _EMERGENCY_ACTION_LAST_USER_ACTION = EMERGENCY_ACTION_EXIT_FORCE,
         EMERGENCY_ACTION_REBOOT,
         EMERGENCY_ACTION_REBOOT_FORCE,
         EMERGENCY_ACTION_REBOOT_IMMEDIATE,
         EMERGENCY_ACTION_POWEROFF,
         EMERGENCY_ACTION_POWEROFF_FORCE,
         EMERGENCY_ACTION_POWEROFF_IMMEDIATE,
-        EMERGENCY_ACTION_EXIT,
-        _EMERGENCY_ACTION_FIRST_USER_ACTION = EMERGENCY_ACTION_EXIT,
-        EMERGENCY_ACTION_EXIT_FORCE,
         EMERGENCY_ACTION_SOFT_REBOOT,
         EMERGENCY_ACTION_SOFT_REBOOT_FORCE,
         EMERGENCY_ACTION_KEXEC,
diff --git a/src/core/exec-credential.c b/src/core/exec-credential.c
index 6bcfb68..f4cff57 100644
--- a/src/core/exec-credential.c
+++ b/src/core/exec-credential.c
@@ -9,6 +9,7 @@
 #include "fileio.h"
 #include "glob-util.h"
 #include "io-util.h"
+#include "iovec-util.h"
 #include "label-util.h"
 #include "mkdir-label.h"
 #include "mount-util.h"
@@ -48,6 +49,12 @@ DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
         char, string_hash_func, string_compare_func,
         ExecLoadCredential, exec_load_credential_free);
 
+bool exec_params_need_credentials(const ExecParameters *p) {
+        assert(p);
+
+        return p->flags & (EXEC_SETUP_CREDENTIALS|EXEC_SETUP_CREDENTIALS_FRESH);
+}
+
 bool exec_context_has_credentials(const ExecContext *c) {
         assert(c);
 
@@ -56,16 +63,15 @@ bool exec_context_has_credentials(const ExecContext *c) {
                 !set_isempty(c->import_credentials);
 }
 
-bool exec_context_has_encrypted_credentials(ExecContext *c) {
-        ExecLoadCredential *load_cred;
-        ExecSetCredential *set_cred;
-
+bool exec_context_has_encrypted_credentials(const ExecContext *c) {
         assert(c);
 
+        const ExecLoadCredential *load_cred;
         HASHMAP_FOREACH(load_cred, c->load_credentials)
                 if (load_cred->encrypted)
                         return true;
 
+        const ExecSetCredential *set_cred;
         HASHMAP_FOREACH(set_cred, c->set_credentials)
                 if (set_cred->encrypted)
                         return true;
@@ -106,7 +112,7 @@ int exec_context_get_credential_directory(
         assert(unit);
         assert(ret);
 
-        if (!exec_context_has_credentials(context)) {
+        if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context)) {
                 *ret = NULL;
                 return 0;
         }
@@ -172,6 +178,10 @@ static int write_credential(
         _cleanup_close_ int fd = -EBADF;
         int r;
 
+        assert(dfd >= 0);
+        assert(id);
+        assert(data || size == 0);
+
         r = tempfn_random_child("", "cred", &tmp);
         if (r < 0)
                 return r;
@@ -224,7 +234,6 @@ typedef enum CredentialSearchPath {
 } CredentialSearchPath;
 
 static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
-
         _cleanup_strv_free_ char **l = NULL;
 
         assert(params);
@@ -243,9 +252,8 @@ static char **credential_search_path(const ExecParameters *params, CredentialSea
         }
 
         if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
-                if (params->received_credentials_directory)
-                        if (strv_extend(&l, params->received_credentials_directory) < 0)
-                                return NULL;
+                if (strv_extend(&l, params->received_credentials_directory) < 0)
+                        return NULL;
 
                 if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
                         return NULL;
@@ -271,20 +279,29 @@ static int maybe_decrypt_and_write_credential(
                 size_t size,
                 uint64_t *left) {
 
-        _cleanup_free_ void *plaintext = NULL;
+        _cleanup_(iovec_done_erase) struct iovec plaintext = {};
         size_t add;
         int r;
 
-        if (encrypted) {
-                size_t plaintext_size = 0;
+        assert(dir_fd >= 0);
+        assert(id);
+        assert(left);
 
-                r = decrypt_credential_and_warn(id, now(CLOCK_REALTIME), NULL, NULL, data, size,
-                                                &plaintext, &plaintext_size);
+        if (encrypted) {
+                r = decrypt_credential_and_warn(
+                                id,
+                                now(CLOCK_REALTIME),
+                                /* tpm2_device= */ NULL,
+                                /* tpm2_signature_path= */ NULL,
+                                getuid(),
+                                &IOVEC_MAKE(data, size),
+                                CREDENTIAL_ANY_SCOPE,
+                                &plaintext);
                 if (r < 0)
                         return r;
 
-                data = plaintext;
-                size = plaintext_size;
+                data = plaintext.iov_base;
+                size = plaintext.iov_len;
         }
 
         add = strlen(id) + size;
@@ -302,7 +319,7 @@ static int maybe_decrypt_and_write_credential(
 static int load_credential_glob(
                 const char *path,
                 bool encrypted,
-                char **search_path,
+                char * const *search_path,
                 ReadFullFileFlags flags,
                 int write_dfd,
                 uid_t uid,
@@ -312,6 +329,11 @@ static int load_credential_glob(
 
         int r;
 
+        assert(path);
+        assert(search_path);
+        assert(write_dfd >= 0);
+        assert(left);
+
         STRV_FOREACH(d, search_path) {
                 _cleanup_globfree_ glob_t pglob = {};
                 _cleanup_free_ char *j = NULL;
@@ -326,38 +348,36 @@ static int load_credential_glob(
                 if (r < 0)
                         return r;
 
-                for (size_t n = 0; n < pglob.gl_pathc; n++) {
+                FOREACH_ARRAY(p, pglob.gl_pathv, pglob.gl_pathc) {
                         _cleanup_free_ char *fn = NULL;
                         _cleanup_(erase_and_freep) char *data = NULL;
                         size_t size;
 
                         /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
                         r = read_full_file_full(
-                                AT_FDCWD,
-                                pglob.gl_pathv[n],
-                                UINT64_MAX,
-                                encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
-                                flags,
-                                NULL,
-                                &data, &size);
+                                        AT_FDCWD,
+                                        *p,
+                                        UINT64_MAX,
+                                        encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
+                                        flags,
+                                        NULL,
+                                        &data, &size);
                         if (r < 0)
-                                return log_debug_errno(r, "Failed to read credential '%s': %m",
-                                                        pglob.gl_pathv[n]);
+                                return log_debug_errno(r, "Failed to read credential '%s': %m", *p);
 
-                        r = path_extract_filename(pglob.gl_pathv[n], &fn);
+                        r = path_extract_filename(*p, &fn);
                         if (r < 0)
-                                return log_debug_errno(r, "Failed to extract filename from '%s': %m",
-                                                        pglob.gl_pathv[n]);
+                                return log_debug_errno(r, "Failed to extract filename from '%s': %m", *p);
 
                         r = maybe_decrypt_and_write_credential(
-                                write_dfd,
-                                fn,
-                                encrypted,
-                                uid,
-                                gid,
-                                ownership_ok,
-                                data, size,
-                                left);
+                                        write_dfd,
+                                        fn,
+                                        encrypted,
+                                        uid,
+                                        gid,
+                                        ownership_ok,
+                                        data, size,
+                                        left);
                         if (r == -EEXIST)
                                 continue;
                         if (r < 0)
@@ -423,7 +443,7 @@ static int load_credential(
 
                 /* Pass some minimal info about the unit and the credential name we are looking to acquire
                  * via the source socket address in case we read off an AF_UNIX socket. */
-                if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
+                if (asprintf(&bindname, "@%" PRIx64 "/unit/%s/%s", random_u64(), unit, id) < 0)
                         return -ENOMEM;
 
                 missing_ok = false;
@@ -447,7 +467,7 @@ static int load_credential(
 
         maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
 
-        if (search_path) {
+        if (search_path)
                 STRV_FOREACH(d, search_path) {
                         _cleanup_free_ char *j = NULL;
 
@@ -465,7 +485,7 @@ static int load_credential(
                         if (r != -ENOENT)
                                 break;
                 }
-        } else if (source)
+        else if (source)
                 r = read_full_file_full(
                                 read_dfd, source,
                                 UINT64_MAX,
@@ -484,7 +504,8 @@ static int load_credential(
                  *
                  * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
                  * we are fine, too. */
-                log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
+                log_full_errno(hashmap_contains(context->set_credentials, id) ? LOG_DEBUG : LOG_INFO,
+                               r, "Couldn't read inherited credential '%s', skipping: %m", path);
                 return 0;
         }
         if (r < 0)
@@ -518,6 +539,9 @@ static int load_cred_recurse_dir_cb(
         _cleanup_free_ char *sub_id = NULL;
         int r;
 
+        assert(path);
+        assert(de);
+
         if (event != RECURSE_DIR_ENTRY)
                 return RECURSE_DIR_CONTINUE;
 
@@ -574,6 +598,8 @@ static int acquire_credentials(
         int r;
 
         assert(context);
+        assert(params);
+        assert(unit);
         assert(p);
 
         dfd = open(p, O_DIRECTORY|O_CLOEXEC);
@@ -618,8 +644,7 @@ static int acquire_credentials(
                                         &left);
                 else
                         /* Directory */
-                        r = recurse_dir(
-                                        sub_fd,
+                        r = recurse_dir(sub_fd,
                                         /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
                                         /* statx_mask= */ 0,
                                         /* n_depth_max= */ UINT_MAX,
@@ -684,7 +709,7 @@ static int acquire_credentials(
         /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
          * add them, so that they can act as a "default" if the same credential is specified multiple times. */
         HASHMAP_FOREACH(sc, context->set_credentials) {
-                _cleanup_(erase_and_freep) void *plaintext = NULL;
+                _cleanup_(iovec_done_erase) struct iovec plaintext = {};
                 const char *data;
                 size_t size, add;
 
@@ -698,11 +723,20 @@ static int acquire_credentials(
                         return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
 
                 if (sc->encrypted) {
-                        r = decrypt_credential_and_warn(sc->id, now(CLOCK_REALTIME), NULL, NULL, sc->data, sc->size, &plaintext, &size);
+                        r = decrypt_credential_and_warn(
+                                        sc->id,
+                                        now(CLOCK_REALTIME),
+                                        /* tpm2_device= */ NULL,
+                                        /* tpm2_signature_path= */ NULL,
+                                        getuid(),
+                                        &IOVEC_MAKE(sc->data, sc->size),
+                                        CREDENTIAL_ANY_SCOPE,
+                                        &plaintext);
                         if (r < 0)
                                 return r;
 
-                        data = plaintext;
+                        data = plaintext.iov_base;
+                        size = plaintext.iov_len;
                 } else {
                         data = sc->data;
                         size = sc->size;
@@ -754,17 +788,42 @@ static int setup_credentials_internal(
                 uid_t uid,
                 gid_t gid) {
 
+        bool final_mounted;
         int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
                                    * if we mounted something; false if we definitely can't mount anything */
-        bool final_mounted;
-        const char *where;
 
         assert(context);
+        assert(params);
+        assert(unit);
         assert(final);
         assert(workspace);
 
+        r = path_is_mount_point(final);
+        if (r < 0)
+                return r;
+        final_mounted = r > 0;
+
+        if (final_mounted) {
+                if (FLAGS_SET(params->flags, EXEC_SETUP_CREDENTIALS_FRESH)) {
+                        r = umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
+                        if (r < 0)
+                                return r;
+
+                        final_mounted = false;
+                } else {
+                        /* We can reuse the previous credential dir */
+                        r = dir_is_empty(final, /* ignore_hidden_or_backup = */ false);
+                        if (r < 0)
+                                return r;
+                        if (r == 0) {
+                                log_debug("Credential dir for unit '%s' already set up, skipping.", unit);
+                                return 0;
+                        }
+                }
+        }
+
         if (reuse_workspace) {
-                r = path_is_mount_point(workspace, NULL, 0);
+                r = path_is_mount_point(workspace);
                 if (r < 0)
                         return r;
                 if (r > 0)
@@ -775,40 +834,19 @@ static int setup_credentials_internal(
         } else
                 workspace_mounted = -1; /* ditto */
 
-        r = path_is_mount_point(final, NULL, 0);
-        if (r < 0)
-                return r;
-        if (r > 0) {
-                /* If the final place already has something mounted, we use that. If the workspace also has
-                 * something mounted we assume it's actually the same mount (but with MS_RDONLY
-                 * different). */
-                final_mounted = true;
-
-                if (workspace_mounted < 0) {
-                        /* If the final place is mounted, but the workspace isn't, then let's bind mount
-                         * the final version to the workspace, and make it writable, so that we can make
-                         * changes */
-
-                        r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
-                        if (r < 0)
-                                return r;
-
-                        r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
-                        if (r < 0)
-                                return r;
-
-                        workspace_mounted = true;
-                }
-        } else
-                final_mounted = false;
+        /* If both the final place and the workspace are mounted, we have no mounts to set up, based on
+         * the assumption that they're actually the same tmpfs (but the latter with MS_RDONLY different).
+         * If the workspace is not mounted, we just bind the final place over and make it writable. */
+        must_mount = must_mount || final_mounted;
 
         if (workspace_mounted < 0) {
-                /* Nothing is mounted on the workspace yet, let's try to mount something now */
-
-                r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
-                if (r < 0) {
-                        /* If that didn't work, try to make a bind mount from the final to the workspace, so
-                         * that we can make it writable there. */
+                if (!final_mounted)
+                        /* Nothing is mounted on the workspace yet, let's try to mount a new tmpfs if
+                         * not using the final place. */
+                        r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
+                if (final_mounted || r < 0) {
+                        /* If using final place or failed to mount new tmpfs, make a bind mount from
+                         * the final to the workspace, so that we can make it writable there. */
                         r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
                         if (r < 0) {
                                 if (!ERRNO_IS_PRIVILEGE(r))
@@ -821,12 +859,19 @@ static int setup_credentials_internal(
                                         return r;
 
                                 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
-                                 * for compat with container envs, and just use the final dir as is. */
+                                 * for compat with container envs, and just use the final dir as is.
+                                 * Final place must not be mounted in this case (refused by must_mount
+                                 * above) */
 
                                 workspace_mounted = false;
                         } else {
                                 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
-                                r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
+                                r = mount_nofollow_verbose(LOG_DEBUG,
+                                                           NULL,
+                                                           workspace,
+                                                           NULL,
+                                                           MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false),
+                                                           NULL);
                                 if (r < 0)
                                         return r;
 
@@ -836,34 +881,26 @@ static int setup_credentials_internal(
                         workspace_mounted = true;
         }
 
-        assert(!must_mount || workspace_mounted > 0);
-        where = workspace_mounted ? workspace : final;
+        assert(workspace_mounted >= 0);
+        assert(!must_mount || workspace_mounted);
+
+        const char *where = workspace_mounted ? workspace : final;
 
         (void) label_fix_full(AT_FDCWD, where, final, 0);
 
         r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
-        if (r < 0)
-                return r;
-
-        if (workspace_mounted) {
-                bool install;
-
-                /* Determine if we should actually install the prepared mount in the final location by bind
-                 * mounting it there. We do so only if the mount is not established there already, and if the
-                 * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
-                 * case we are doing all this in a mount namespace, thus no one else will see that we
-                 * allocated a file system we are getting rid of again here. */
+        if (r < 0) {
+                /* If we're using final place as workspace, and failed to acquire credentials, we might
+                 * have left half-written creds there. Let's get rid of the whole mount, so future
+                 * calls won't reuse it. */
                 if (final_mounted)
-                        install = false; /* already installed */
-                else {
-                        r = dir_is_empty(where, /* ignore_hidden_or_backup= */ false);
-                        if (r < 0)
-                                return r;
+                        (void) umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
 
-                        install = r == 0; /* install only if non-empty */
-                }
+                return r;
+        }
 
-                if (install) {
+        if (workspace_mounted) {
+                if (!final_mounted) {
                         /* Make workspace read-only now, so that any bind mount we make from it defaults to
                          * read-only too */
                         r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
@@ -873,7 +910,7 @@ static int setup_credentials_internal(
                         /* And mount it to the final place, read-only */
                         r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
                 } else
-                        /* Otherwise get rid of it */
+                        /* Otherwise we just get rid of the bind mount of final place */
                         r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
                 if (r < 0)
                         return r;
@@ -905,15 +942,16 @@ int exec_setup_credentials(
 
         assert(context);
         assert(params);
+        assert(unit);
 
-        if (!exec_context_has_credentials(context))
+        if (!exec_params_need_credentials(params) || !exec_context_has_credentials(context))
                 return 0;
 
         if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
                 return -EINVAL;
 
-        /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
-         * and the subdir we mount over with a read-only file system readable by the service's user */
+        /* This is where we'll place stuff when we are done; the main credentials directory is world-readable,
+         * and the subdir we mount over with a read-only file system readable by the service's user. */
         q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
         if (!q)
                 return -ENOMEM;
diff --git a/src/core/exec-credential.h b/src/core/exec-credential.h
index 6f836fb..70bb46b 100644
--- a/src/core/exec-credential.h
+++ b/src/core/exec-credential.h
@@ -34,8 +34,10 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(ExecLoadCredential*, exec_load_credential_free);
 extern const struct hash_ops exec_set_credential_hash_ops;
 extern const struct hash_ops exec_load_credential_hash_ops;
 
-bool exec_context_has_encrypted_credentials(ExecContext *c);
+bool exec_params_need_credentials(const ExecParameters *p);
+
 bool exec_context_has_credentials(const ExecContext *c);
+bool exec_context_has_encrypted_credentials(const ExecContext *c);
 
 int exec_context_get_credential_directory(
                 const ExecContext *context,
diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c
index 8e6de15..ee8db04 100644
--- a/src/core/exec-invoke.c
+++ b/src/core/exec-invoke.c
@@ -22,7 +22,7 @@
 #include "argv-util.h"
 #include "barrier.h"
 #include "bpf-dlopen.h"
-#include "bpf-lsm.h"
+#include "bpf-restrict-fs.h"
 #include "btrfs-util.h"
 #include "capability-util.h"
 #include "cgroup-setup.h"
@@ -41,6 +41,7 @@
 #include "hexdecoct.h"
 #include "io-util.h"
 #include "iovec-util.h"
+#include "journal-send.h"
 #include "missing_ioprio.h"
 #include "missing_prctl.h"
 #include "missing_securebits.h"
@@ -59,52 +60,13 @@
 #include "strv.h"
 #include "terminal-util.h"
 #include "utmp-wtmp.h"
+#include "vpick.h"
 
 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
 
 #define SNDBUF_SIZE (8*1024*1024)
 
-static int shift_fds(int fds[], size_t n_fds) {
-        if (n_fds <= 0)
-                return 0;
-
-        /* Modifies the fds array! (sorts it) */
-
-        assert(fds);
-
-        for (int start = 0;;) {
-                int restart_from = -1;
-
-                for (int i = start; i < (int) n_fds; i++) {
-                        int nfd;
-
-                        /* Already at right index? */
-                        if (fds[i] == i+3)
-                                continue;
-
-                        nfd = fcntl(fds[i], F_DUPFD, i + 3);
-                        if (nfd < 0)
-                                return -errno;
-
-                        safe_close(fds[i]);
-                        fds[i] = nfd;
-
-                        /* Hmm, the fd we wanted isn't free? Then
-                         * let's remember that and try again from here */
-                        if (nfd != i+3 && restart_from < 0)
-                                restart_from = i;
-                }
-
-                if (restart_from < 0)
-                        break;
-
-                start = restart_from;
-        }
-
-        return 0;
-}
-
 static int flag_fds(
                 const int fds[],
                 size_t n_socket_fds,
@@ -198,9 +160,11 @@ static int connect_journal_socket(
         const char *j;
         int r;
 
-        j = log_namespace ?
-                strjoina("/run/systemd/journal.", log_namespace, "/stdout") :
-                "/run/systemd/journal/stdout";
+        assert(fd >= 0);
+
+        j = journal_stream_path(log_namespace);
+        if (!j)
+                return -EINVAL;
 
         if (gid_is_valid(gid)) {
                 oldgid = getgid();
@@ -449,7 +413,7 @@ static int setup_input(
         case EXEC_INPUT_DATA: {
                 int fd;
 
-                fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
+                fd = acquire_data_fd_full(context->stdin_data, context->stdin_data_size, /* flags = */ 0);
                 if (fd < 0)
                         return fd;
 
@@ -670,12 +634,8 @@ static int chown_terminal(int fd, uid_t uid) {
         assert(fd >= 0);
 
         /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
-        if (isatty(fd) < 1) {
-                if (IN_SET(errno, EINVAL, ENOTTY))
-                        return 0; /* not a tty */
-
-                return -errno;
-        }
+        if (!isatty_safe(fd))
+                return 0;
 
         /* This might fail. What matters are the results. */
         r = fchmod_and_chown(fd, TTY_MODE, uid, GID_INVALID);
@@ -1126,7 +1086,8 @@ static int setup_pam(
                 gid_t gid,
                 const char *tty,
                 char ***env, /* updated on success */
-                const int fds[], size_t n_fds) {
+                const int fds[], size_t n_fds,
+                int exec_fd) {
 
 #if HAVE_PAM
 
@@ -1141,7 +1102,7 @@ static int setup_pam(
         sigset_t old_ss;
         int pam_code = PAM_SUCCESS, r;
         bool close_session = false;
-        pid_t pam_pid = 0, parent_pid;
+        pid_t parent_pid;
         int flags = 0;
 
         assert(name);
@@ -1196,7 +1157,7 @@ static int setup_pam(
 
         pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | flags);
         if (pam_code != PAM_SUCCESS)
-                log_debug("pam_setcred() failed, ignoring: %s", pam_strerror(handle, pam_code));
+                log_debug("pam_setcred(PAM_ESTABLISH_CRED) failed, ignoring: %s", pam_strerror(handle, pam_code));
 
         pam_code = pam_open_session(handle, flags);
         if (pam_code != PAM_SUCCESS)
@@ -1212,15 +1173,15 @@ static int setup_pam(
 
         /* Block SIGTERM, so that we know that it won't get lost in the child */
 
-        assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
+        assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM) >= 0);
 
         parent_pid = getpid_cached();
 
-        r = safe_fork("(sd-pam)", 0, &pam_pid);
+        r = safe_fork("(sd-pam)", 0, NULL);
         if (r < 0)
                 goto fail;
         if (r == 0) {
-                int sig, ret = EXIT_PAM;
+                int ret = EXIT_PAM;
 
                 /* The child's job is to reset the PAM session on termination */
                 barrier_set_role(&barrier, BARRIER_CHILD);
@@ -1229,17 +1190,18 @@ static int setup_pam(
                  * those fds are open here that have been opened by PAM. */
                 (void) close_many(fds, n_fds);
 
+                /* Also close the 'exec_fd' in the child, since the service manager waits for the EOF induced
+                 * by the execve() to wait for completion, and if we'd keep the fd open here in the child
+                 * we'd never signal completion. */
+                exec_fd = safe_close(exec_fd);
+
                 /* Drop privileges - we don't need any to pam_close_session and this will make
                  * PR_SET_PDEATHSIG work in most cases.  If this fails, ignore the error - but expect sd-pam
                  * threads to fail to exit normally */
 
-                r = maybe_setgroups(0, NULL);
+                r = fully_set_uid_gid(uid, gid, /* supplementary_gids= */ NULL, /* n_supplementary_gids= */ 0);
                 if (r < 0)
-                        log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
-                if (setresgid(gid, gid, gid) < 0)
-                        log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
-                if (setresuid(uid, uid, uid) < 0)
-                        log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
+                        log_warning_errno(r, "Failed to drop privileges in sd-pam: %m");
 
                 (void) ignore_signals(SIGPIPE);
 
@@ -1258,21 +1220,13 @@ static int setup_pam(
                 /* Check if our parent process might already have died? */
                 if (getppid() == parent_pid) {
                         sigset_t ss;
+                        int sig;
 
                         assert_se(sigemptyset(&ss) >= 0);
                         assert_se(sigaddset(&ss, SIGTERM) >= 0);
 
-                        for (;;) {
-                                if (sigwait(&ss, &sig) < 0) {
-                                        if (errno == EINTR)
-                                                continue;
-
-                                        goto child_finish;
-                                }
-
-                                assert(sig == SIGTERM);
-                                break;
-                        }
+                        assert_se(sigwait(&ss, &sig) == 0);
+                        assert(sig == SIGTERM);
                 }
 
                 /* If our parent died we'll end the session */
@@ -1361,7 +1315,7 @@ static void rename_process_from_path(const char *path) {
         process_name[1+l] = ')';
         process_name[1+l+1] = 0;
 
-        rename_process(process_name);
+        (void) rename_process(process_name);
 }
 
 static bool context_has_address_families(const ExecContext *c) {
@@ -1725,7 +1679,7 @@ static int apply_restrict_filesystems(const ExecContext *c, const ExecParameters
         if (!exec_context_restrict_filesystems_set(c))
                 return 0;
 
-        if (p->bpf_outer_map_fd < 0) {
+        if (p->bpf_restrict_fs_map_fd < 0) {
                 /* LSM BPF is unsupported or lsm_bpf_setup failed */
                 log_exec_debug(c, p, "LSM BPF not supported, skipping RestrictFileSystems=");
                 return 0;
@@ -1736,7 +1690,7 @@ static int apply_restrict_filesystems(const ExecContext *c, const ExecParameters
         if (r < 0)
                 return r;
 
-        return lsm_bpf_restrict_filesystems(c->restrict_filesystems, p->cgroup_id, p->bpf_outer_map_fd, c->restrict_filesystems_allow_list);
+        return bpf_restrict_fs_update(c->restrict_filesystems, p->cgroup_id, p->bpf_restrict_fs_map_fd, c->restrict_filesystems_allow_list);
 }
 #endif
 
@@ -1817,10 +1771,10 @@ static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
 /* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
  * the service payload in. */
 static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
-        [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
-        [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
-        [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
-        [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
+        [EXEC_DIRECTORY_RUNTIME]       = "RUNTIME_DIRECTORY",
+        [EXEC_DIRECTORY_STATE]         = "STATE_DIRECTORY",
+        [EXEC_DIRECTORY_CACHE]         = "CACHE_DIRECTORY",
+        [EXEC_DIRECTORY_LOGS]          = "LOGS_DIRECTORY",
         [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
 };
 
@@ -1907,7 +1861,7 @@ static int build_environment(
                                                     "Failed to determine user credentials for root: %m");
         }
 
-        bool set_user_login_env = c->set_login_environment >= 0 ? c->set_login_environment : (c->user || c->dynamic_user);
+        bool set_user_login_env = exec_context_get_set_login_environment(c);
 
         if (username) {
                 x = strjoin("USER=", username);
@@ -1961,7 +1915,7 @@ static int build_environment(
                  * to inherit the $TERM set for PID 1. This is useful for containers so that the $TERM the
                  * container manager passes to PID 1 ends up all the way in the console login shown. */
 
-                if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
+                if (path_equal(tty_path, "/dev/console") && getppid() == 1)
                         term = getenv("TERM");
                 else if (tty_path && in_charset(skip_dev_prefix(tty_path), ALPHANUMERICAL)) {
                         _cleanup_free_ char *key = NULL;
@@ -2315,10 +2269,10 @@ static int setup_exec_directory(
                 int *exit_status) {
 
         static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
-                [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
-                [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
-                [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
-                [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
+                [EXEC_DIRECTORY_RUNTIME]       = EXIT_RUNTIME_DIRECTORY,
+                [EXEC_DIRECTORY_STATE]         = EXIT_STATE_DIRECTORY,
+                [EXEC_DIRECTORY_CACHE]         = EXIT_CACHE_DIRECTORY,
+                [EXEC_DIRECTORY_LOGS]          = EXIT_LOGS_DIRECTORY,
                 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
         };
         int r;
@@ -2338,10 +2292,10 @@ static int setup_exec_directory(
                         gid = 0;
         }
 
-        for (size_t i = 0; i < context->directories[type].n_items; i++) {
+        FOREACH_ARRAY(i, context->directories[type].items, context->directories[type].n_items) {
                 _cleanup_free_ char *p = NULL, *pp = NULL;
 
-                p = path_join(params->prefix[type], context->directories[type].items[i].path);
+                p = path_join(params->prefix[type], i->path);
                 if (!p) {
                         r = -ENOMEM;
                         goto fail;
@@ -2357,7 +2311,7 @@ static int setup_exec_directory(
                          * doesn't exist, then we likely are upgrading from an older systemd version that
                          * didn't know the more recent addition to the xdg-basedir spec: the $XDG_STATE_HOME
                          * directory. In older systemd versions EXEC_DIRECTORY_STATE was aliased to
-                         * EXEC_DIRECTORY_CONFIGURATION, with the advent of $XDG_STATE_HOME is is now
+                         * EXEC_DIRECTORY_CONFIGURATION, with the advent of $XDG_STATE_HOME it is now
                          * separated. If a service has both dirs configured but only the configuration dir
                          * exists and the state dir does not, we assume we are looking at an update
                          * situation. Hence, create a compatibility symlink, so that all expectations are
@@ -2378,9 +2332,9 @@ static int setup_exec_directory(
                                  * under the configuration hierarchy. */
 
                                 if (type == EXEC_DIRECTORY_STATE)
-                                        q = path_join(params->prefix[EXEC_DIRECTORY_CONFIGURATION], context->directories[type].items[i].path);
+                                        q = path_join(params->prefix[EXEC_DIRECTORY_CONFIGURATION], i->path);
                                 else if (type == EXEC_DIRECTORY_LOGS)
-                                        q = path_join(params->prefix[EXEC_DIRECTORY_CONFIGURATION], "log", context->directories[type].items[i].path);
+                                        q = path_join(params->prefix[EXEC_DIRECTORY_CONFIGURATION], "log", i->path);
                                 else
                                         assert_not_reached();
                                 if (!q) {
@@ -2443,7 +2397,7 @@ static int setup_exec_directory(
                         if (r < 0)
                                 goto fail;
 
-                        if (!path_extend(&pp, context->directories[type].items[i].path)) {
+                        if (!path_extend(&pp, i->path)) {
                                 r = -ENOMEM;
                                 goto fail;
                         }
@@ -2477,7 +2431,7 @@ static int setup_exec_directory(
                                         goto fail;
                         }
 
-                        if (!context->directories[type].items[i].only_create) {
+                        if (!i->only_create) {
                                 /* And link it up from the original place.
                                  * Notes
                                  * 1) If a mount namespace is going to be used, then this symlink remains on
@@ -2514,7 +2468,7 @@ static int setup_exec_directory(
                                 if (r < 0)
                                         goto fail;
 
-                                q = path_join(params->prefix[type], "private", context->directories[type].items[i].path);
+                                q = path_join(params->prefix[type], "private", i->path);
                                 if (!q) {
                                         r = -ENOMEM;
                                         goto fail;
@@ -2568,7 +2522,7 @@ static int setup_exec_directory(
                                                                  params,
                                                                  "%s \'%s\' already exists but the mode is different. "
                                                                  "(File system: %o %sMode: %o)",
-                                                                 exec_directory_type_to_string(type), context->directories[type].items[i].path,
+                                                                 exec_directory_type_to_string(type), i->path,
                                                                  st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
 
                                         continue;
@@ -2599,10 +2553,8 @@ static int setup_exec_directory(
         /* If we are not going to run in a namespace, set up the symlinks - otherwise
          * they are set up later, to allow configuring empty var/run/etc. */
         if (!needs_mount_namespace)
-                for (size_t i = 0; i < context->directories[type].n_items; i++) {
-                        r = create_many_symlinks(params->prefix[type],
-                                                 context->directories[type].items[i].path,
-                                                 context->directories[type].items[i].symlinks);
+                FOREACH_ARRAY(i, context->directories[type].items, context->directories[type].n_items) {
+                        r = create_many_symlinks(params->prefix[type], i->path, i->symlinks);
                         if (r < 0)
                                 goto fail;
                 }
@@ -2669,8 +2621,8 @@ static int compile_bind_mounts(
                 if (!params->prefix[t])
                         continue;
 
-                for (size_t i = 0; i < context->directories[t].n_items; i++)
-                        n += !context->directories[t].items[i].only_create;
+                FOREACH_ARRAY(i, context->directories[t].items, context->directories[t].n_items)
+                        n += !i->only_create;
         }
 
         if (n <= 0) {
@@ -2684,8 +2636,7 @@ static int compile_bind_mounts(
         if (!bind_mounts)
                 return -ENOMEM;
 
-        for (size_t i = 0; i < context->n_bind_mounts; i++) {
-                BindMount *item = context->bind_mounts + i;
+        FOREACH_ARRAY(item, context->bind_mounts, context->n_bind_mounts) {
                 _cleanup_free_ char *s = NULL, *d = NULL;
 
                 s = strdup(item->source);
@@ -2729,18 +2680,18 @@ static int compile_bind_mounts(
                                 return r;
                 }
 
-                for (size_t i = 0; i < context->directories[t].n_items; i++) {
+                FOREACH_ARRAY(i, context->directories[t].items, context->directories[t].n_items) {
                         _cleanup_free_ char *s = NULL, *d = NULL;
 
                         /* When one of the parent directories is in the list, we cannot create the symlink
                          * for the child directory. See also the comments in setup_exec_directory(). */
-                        if (context->directories[t].items[i].only_create)
+                        if (i->only_create)
                                 continue;
 
                         if (exec_directory_is_private(context, t))
-                                s = path_join(params->prefix[t], "private", context->directories[t].items[i].path);
+                                s = path_join(params->prefix[t], "private", i->path);
                         else
-                                s = path_join(params->prefix[t], context->directories[t].items[i].path);
+                                s = path_join(params->prefix[t], i->path);
                         if (!s)
                                 return -ENOMEM;
 
@@ -2749,7 +2700,7 @@ static int compile_bind_mounts(
                                 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
                                  * directory is not created on the root directory. So, let's bind-mount the directory
                                  * on the 'non-private' place. */
-                                d = path_join(params->prefix[t], context->directories[t].items[i].path);
+                                d = path_join(params->prefix[t], i->path);
                         else
                                 d = strdup(s);
                         if (!d)
@@ -2758,10 +2709,8 @@ static int compile_bind_mounts(
                         bind_mounts[h++] = (BindMount) {
                                 .source = TAKE_PTR(s),
                                 .destination = TAKE_PTR(d),
-                                .read_only = false,
                                 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
                                 .recursive = true,
-                                .ignore_enoent = false,
                         };
                 }
         }
@@ -2791,14 +2740,14 @@ static int compile_symlinks(
         assert(params);
         assert(ret_symlinks);
 
-        for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
-                for (size_t i = 0; i < context->directories[dt].n_items; i++) {
+        for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++)
+                FOREACH_ARRAY(i, context->directories[dt].items, context->directories[dt].n_items) {
                         _cleanup_free_ char *private_path = NULL, *path = NULL;
 
-                        STRV_FOREACH(symlink, context->directories[dt].items[i].symlinks) {
+                        STRV_FOREACH(symlink, i->symlinks) {
                                 _cleanup_free_ char *src_abs = NULL, *dst_abs = NULL;
 
-                                src_abs = path_join(params->prefix[dt], context->directories[dt].items[i].path);
+                                src_abs = path_join(params->prefix[dt], i->path);
                                 dst_abs = path_join(params->prefix[dt], *symlink);
                                 if (!src_abs || !dst_abs)
                                         return -ENOMEM;
@@ -2810,14 +2759,14 @@ static int compile_symlinks(
 
                         if (!exec_directory_is_private(context, dt) ||
                             exec_context_with_rootfs(context) ||
-                            context->directories[dt].items[i].only_create)
+                            i->only_create)
                                 continue;
 
-                        private_path = path_join(params->prefix[dt], "private", context->directories[dt].items[i].path);
+                        private_path = path_join(params->prefix[dt], "private", i->path);
                         if (!private_path)
                                 return -ENOMEM;
 
-                        path = path_join(params->prefix[dt], context->directories[dt].items[i].path);
+                        path = path_join(params->prefix[dt], i->path);
                         if (!path)
                                 return -ENOMEM;
 
@@ -2825,18 +2774,16 @@ static int compile_symlinks(
                         if (r < 0)
                                 return r;
                 }
-        }
 
         /* We make the host's os-release available via a symlink, so that we can copy it atomically
          * and readers will never get a half-written version. Note that, while the paths specified here are
          * absolute, when they are processed in namespace.c they will be made relative automatically, i.e.:
          * 'os-release -> .os-release-stage/os-release' is what will be created. */
         if (setup_os_release_symlink) {
-                r = strv_extend(&symlinks, "/run/host/.os-release-stage/os-release");
-                if (r < 0)
-                        return r;
-
-                r = strv_extend(&symlinks, "/run/host/os-release");
+                r = strv_extend_many(
+                                &symlinks,
+                                "/run/host/.os-release-stage/os-release",
+                                "/run/host/os-release");
                 if (r < 0)
                         return r;
         }
@@ -2877,8 +2824,8 @@ static bool insist_on_sandboxing(
 
         /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
          * essential. */
-        for (size_t i = 0; i < n_bind_mounts; i++)
-                if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
+        FOREACH_ARRAY(i, bind_mounts, n_bind_mounts)
+                if (!path_equal(i->source, i->destination))
                         return true;
 
         if (context->log_namespace)
@@ -2887,13 +2834,33 @@ static bool insist_on_sandboxing(
         return false;
 }
 
-static int setup_ephemeral(const ExecContext *context, ExecRuntime *runtime) {
+static int setup_ephemeral(
+                const ExecContext *context,
+                ExecRuntime *runtime,
+                char **root_image,            /* both input and output! modified if ephemeral logic enabled */
+                char **root_directory) {      /* ditto */
+
         _cleanup_close_ int fd = -EBADF;
+        _cleanup_free_ char *new_root = NULL;
         int r;
 
+        assert(context);
+        assert(root_image);
+        assert(root_directory);
+
+        if (!*root_image && !*root_directory)
+                return 0;
+
         if (!runtime || !runtime->ephemeral_copy)
                 return 0;
 
+        assert(runtime->ephemeral_storage_socket[0] >= 0);
+        assert(runtime->ephemeral_storage_socket[1] >= 0);
+
+        new_root = strdup(runtime->ephemeral_copy);
+        if (!new_root)
+                return log_oom_debug();
+
         r = posix_lock(runtime->ephemeral_storage_socket[0], LOCK_EX);
         if (r < 0)
                 return log_debug_errno(r, "Failed to lock ephemeral storage socket: %m");
@@ -2904,28 +2871,23 @@ static int setup_ephemeral(const ExecContext *context, ExecRuntime *runtime) {
         if (fd >= 0)
                 /* We got an fd! That means ephemeral has already been set up, so nothing to do here. */
                 return 0;
-
         if (fd != -EAGAIN)
                 return log_debug_errno(fd, "Failed to receive file descriptor queued on ephemeral storage socket: %m");
 
-        log_debug("Making ephemeral snapshot of %s to %s",
-                  context->root_image ?: context->root_directory, runtime->ephemeral_copy);
+        if (*root_image) {
+                log_debug("Making ephemeral copy of %s to %s", *root_image, new_root);
 
-        if (context->root_image)
-                fd = copy_file(context->root_image, runtime->ephemeral_copy, O_EXCL, 0600,
-                               COPY_LOCK_BSD|COPY_REFLINK|COPY_CRTIME);
-        else
-                fd = btrfs_subvol_snapshot_at(AT_FDCWD, context->root_directory,
-                                              AT_FDCWD, runtime->ephemeral_copy,
-                                              BTRFS_SNAPSHOT_FALLBACK_COPY |
-                                              BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
-                                              BTRFS_SNAPSHOT_RECURSIVE |
-                                              BTRFS_SNAPSHOT_LOCK_BSD);
-        if (fd < 0)
-                return log_debug_errno(fd, "Failed to snapshot %s to %s: %m",
-                                       context->root_image ?: context->root_directory, runtime->ephemeral_copy);
+                fd = copy_file(*root_image,
+                               new_root,
+                               O_EXCL,
+                               0600,
+                               COPY_LOCK_BSD|
+                               COPY_REFLINK|
+                               COPY_CRTIME);
+                if (fd < 0)
+                        return log_debug_errno(fd, "Failed to copy image %s to %s: %m",
+                                               *root_image, new_root);
 
-        if (context->root_image) {
                 /* A root image might be subject to lots of random writes so let's try to disable COW on it
                  * which tends to not perform well in combination with lots of random writes.
                  *
@@ -2934,13 +2896,35 @@ static int setup_ephemeral(const ExecContext *context, ExecRuntime *runtime) {
                  */
                 r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
                 if (r < 0)
-                        log_debug_errno(fd, "Failed to disable copy-on-write for %s, ignoring: %m", runtime->ephemeral_copy);
+                        log_debug_errno(r, "Failed to disable copy-on-write for %s, ignoring: %m", new_root);
+        } else {
+                assert(*root_directory);
+
+                log_debug("Making ephemeral snapshot of %s to %s", *root_directory, new_root);
+
+                fd = btrfs_subvol_snapshot_at(
+                                AT_FDCWD, *root_directory,
+                                AT_FDCWD, new_root,
+                                BTRFS_SNAPSHOT_FALLBACK_COPY |
+                                BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+                                BTRFS_SNAPSHOT_RECURSIVE |
+                                BTRFS_SNAPSHOT_LOCK_BSD);
+                if (fd < 0)
+                        return log_debug_errno(fd, "Failed to snapshot directory %s to %s: %m",
+                                               *root_directory, new_root);
         }
 
         r = send_one_fd(runtime->ephemeral_storage_socket[1], fd, MSG_DONTWAIT);
         if (r < 0)
                 return log_debug_errno(r, "Failed to queue file descriptor on ephemeral storage socket: %m");
 
+        if (*root_image)
+                free_and_replace(*root_image, new_root);
+        else {
+                assert(*root_directory);
+                free_and_replace(*root_directory, new_root);
+        }
+
         return 1;
 }
 
@@ -3000,22 +2984,80 @@ static int verity_settings_prepare(
         return 0;
 }
 
+static int pick_versions(
+                const ExecContext *context,
+                const ExecParameters *params,
+                char **ret_root_image,
+                char **ret_root_directory) {
+
+        int r;
+
+        assert(context);
+        assert(params);
+        assert(ret_root_image);
+        assert(ret_root_directory);
+
+        if (context->root_image) {
+                _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
+
+                r = path_pick(/* toplevel_path= */ NULL,
+                              /* toplevel_fd= */ AT_FDCWD,
+                              context->root_image,
+                              &pick_filter_image_raw,
+                              PICK_ARCHITECTURE|PICK_TRIES|PICK_RESOLVE,
+                              &result);
+                if (r < 0)
+                        return r;
+
+                if (!result.path)
+                        return log_exec_debug_errno(context, params, SYNTHETIC_ERRNO(ENOENT), "No matching entry in .v/ directory %s found.", context->root_image);
+
+                *ret_root_image = TAKE_PTR(result.path);
+                *ret_root_directory = NULL;
+                return r;
+        }
+
+        if (context->root_directory) {
+                _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
+
+                r = path_pick(/* toplevel_path= */ NULL,
+                              /* toplevel_fd= */ AT_FDCWD,
+                              context->root_directory,
+                              &pick_filter_image_dir,
+                              PICK_ARCHITECTURE|PICK_TRIES|PICK_RESOLVE,
+                              &result);
+                if (r < 0)
+                        return r;
+
+                if (!result.path)
+                        return log_exec_debug_errno(context, params, SYNTHETIC_ERRNO(ENOENT), "No matching entry in .v/ directory %s found.", context->root_directory);
+
+                *ret_root_image = NULL;
+                *ret_root_directory = TAKE_PTR(result.path);
+                return r;
+        }
+
+        *ret_root_image = *ret_root_directory = NULL;
+        return 0;
+}
+
 static int apply_mount_namespace(
                 ExecCommandFlags command_flags,
                 const ExecContext *context,
                 const ExecParameters *params,
                 ExecRuntime *runtime,
                 const char *memory_pressure_path,
+                bool needs_sandboxing,
                 char **error_path) {
 
         _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
         _cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
                         **read_write_paths_cleanup = NULL;
         _cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL,
-                        *extension_dir = NULL, *host_os_release_stage = NULL;
-        const char *root_dir = NULL, *root_image = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
+                *extension_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
+        const char *tmp_dir = NULL, *var_tmp_dir = NULL;
         char **read_write_paths;
-        bool needs_sandboxing, setup_os_release_symlink;
+        bool setup_os_release_symlink;
         BindMount *bind_mounts = NULL;
         size_t n_bind_mounts = 0;
         int r;
@@ -3025,14 +3067,21 @@ static int apply_mount_namespace(
         CLEANUP_ARRAY(bind_mounts, n_bind_mounts, bind_mount_free_many);
 
         if (params->flags & EXEC_APPLY_CHROOT) {
-                r = setup_ephemeral(context, runtime);
+                r = pick_versions(
+                                context,
+                                params,
+                                &root_image,
+                                &root_dir);
                 if (r < 0)
                         return r;
 
-                if (context->root_image)
-                        root_image = (runtime ? runtime->ephemeral_copy : NULL) ?: context->root_image;
-                else
-                        root_dir = (runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory;
+                r = setup_ephemeral(
+                                context,
+                                runtime,
+                                &root_image,
+                                &root_dir);
+                if (r < 0)
+                        return r;
         }
 
         r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
@@ -3054,7 +3103,6 @@ static int apply_mount_namespace(
         } else
                 read_write_paths = context->read_write_paths;
 
-        needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command_flags & EXEC_COMMAND_FULLY_PRIVILEGED);
         if (needs_sandboxing) {
                 /* The runtime struct only contains the parent of the private /tmp, which is non-accessible
                  * to world users. Inside of it there's a /tmp that is sticky, and that's the one we want to
@@ -3084,11 +3132,9 @@ static int apply_mount_namespace(
                                params,
                                "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
 
-        if (FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
-                r = exec_context_get_credential_directory(context, params, params->unit_id, &creds_path);
-                if (r < 0)
-                        return r;
-        }
+        r = exec_context_get_credential_directory(context, params, params->unit_id, &creds_path);
+        if (r < 0)
+                return r;
 
         if (params->runtime_scope == RUNTIME_SCOPE_SYSTEM) {
                 propagate_dir = path_join("/run/systemd/propagate/", params->unit_id);
@@ -3246,31 +3292,39 @@ static int apply_working_directory(
                 const char *home,
                 int *exit_status) {
 
-        const char *d, *wd;
+        const char *wd;
+        int r;
 
         assert(context);
         assert(exit_status);
 
         if (context->working_directory_home) {
-
                 if (!home) {
                         *exit_status = EXIT_CHDIR;
                         return -ENXIO;
                 }
 
                 wd = home;
-
         } else
                 wd = empty_to_root(context->working_directory);
 
         if (params->flags & EXEC_APPLY_CHROOT)
-                d = wd;
-        else
-                d = prefix_roota((runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory, wd);
+                r = RET_NERRNO(chdir(wd));
+        else {
+                _cleanup_close_ int dfd = -EBADF;
+
+                r = chase(wd,
+                          (runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory,
+                          CHASE_PREFIX_ROOT|CHASE_AT_RESOLVE_IN_ROOT,
+                          /* ret_path= */ NULL,
+                          &dfd);
+                if (r >= 0)
+                        r = RET_NERRNO(fchdir(dfd));
+        }
 
-        if (chdir(d) < 0 && !context->working_directory_missing_ok) {
+        if (r < 0 && !context->working_directory_missing_ok) {
                 *exit_status = EXIT_CHDIR;
-                return -errno;
+                return r;
         }
 
         return 0;
@@ -3459,7 +3513,7 @@ static int close_remaining_fds(
                 const int *fds, size_t n_fds) {
 
         size_t n_dont_close = 0;
-        int dont_close[n_fds + 15];
+        int dont_close[n_fds + 16];
 
         assert(params);
 
@@ -3495,6 +3549,9 @@ static int close_remaining_fds(
         if (params->user_lookup_fd >= 0)
                 dont_close[n_dont_close++] = params->user_lookup_fd;
 
+        if (params->handoff_timestamp_fd >= 0)
+                dont_close[n_dont_close++] = params->handoff_timestamp_fd;
+
         assert(n_dont_close <= ELEMENTSOF(dont_close));
 
         return close_all_fds(dont_close, n_dont_close);
@@ -3528,26 +3585,29 @@ static int send_user_lookup(
         return 0;
 }
 
-static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
+static int acquire_home(const ExecContext *c, const char **home, char **ret_buf) {
         int r;
 
         assert(c);
         assert(home);
-        assert(buf);
+        assert(ret_buf);
 
         /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
 
-        if (*home)
+        if (*home) /* Already acquired from get_fixed_user()? */
                 return 0;
 
         if (!c->working_directory_home)
                 return 0;
 
-        r = get_home_dir(buf);
+        if (c->dynamic_user)
+                return -EADDRNOTAVAIL;
+
+        r = get_home_dir(ret_buf);
         if (r < 0)
                 return r;
 
-        *home = *buf;
+        *home = *ret_buf;
         return 1;
 }
 
@@ -3641,11 +3701,12 @@ static int add_shifted_fd(int *fds, size_t fds_size, size_t *n_fds, int *fd) {
 }
 
 static int connect_unix_harder(const ExecContext *c, const ExecParameters *p, const OpenFile *of, int ofd) {
+        static const int socket_types[] = { SOCK_DGRAM, SOCK_STREAM, SOCK_SEQPACKET };
+
         union sockaddr_union addr = {
                 .un.sun_family = AF_UNIX,
         };
         socklen_t sa_len;
-        static const int socket_types[] = { SOCK_DGRAM, SOCK_STREAM, SOCK_SEQPACKET };
         int r;
 
         assert(c);
@@ -3655,43 +3716,35 @@ static int connect_unix_harder(const ExecContext *c, const ExecParameters *p, co
 
         r = sockaddr_un_set_path(&addr.un, FORMAT_PROC_FD_PATH(ofd));
         if (r < 0)
-                return log_exec_error_errno(c, p, r, "Failed to set sockaddr for %s: %m", of->path);
-
+                return log_exec_error_errno(c, p, r, "Failed to set sockaddr for '%s': %m", of->path);
         sa_len = r;
 
-        for (size_t i = 0; i < ELEMENTSOF(socket_types); i++) {
+        FOREACH_ELEMENT(i, socket_types) {
                 _cleanup_close_ int fd = -EBADF;
 
-                fd = socket(AF_UNIX, socket_types[i] | SOCK_CLOEXEC, 0);
+                fd = socket(AF_UNIX, *i|SOCK_CLOEXEC, 0);
                 if (fd < 0)
-                        return log_exec_error_errno(c,
-                                                    p,
-                                                    errno,
-                                                    "Failed to create socket for %s: %m",
+                        return log_exec_error_errno(c, p,
+                                                    errno, "Failed to create socket for '%s': %m",
                                                     of->path);
 
                 r = RET_NERRNO(connect(fd, &addr.sa, sa_len));
-                if (r == -EPROTOTYPE)
-                        continue;
-                if (r < 0)
-                        return log_exec_error_errno(c,
-                                                    p,
-                                                    r,
-                                                    "Failed to connect socket for %s: %m",
+                if (r >= 0)
+                        return TAKE_FD(fd);
+                if (r != -EPROTOTYPE)
+                        return log_exec_error_errno(c, p,
+                                                    r, "Failed to connect to socket for '%s': %m",
                                                     of->path);
-
-                return TAKE_FD(fd);
         }
 
-        return log_exec_error_errno(c,
-                                    p,
-                                    SYNTHETIC_ERRNO(EPROTOTYPE), "Failed to connect socket for \"%s\".",
+        return log_exec_error_errno(c, p,
+                                    SYNTHETIC_ERRNO(EPROTOTYPE), "No suitable socket type to connect to socket '%s'.",
                                     of->path);
 }
 
 static int get_open_file_fd(const ExecContext *c, const ExecParameters *p, const OpenFile *of) {
-        struct stat st;
         _cleanup_close_ int fd = -EBADF, ofd = -EBADF;
+        struct stat st;
 
         assert(c);
         assert(p);
@@ -3699,10 +3752,10 @@ static int get_open_file_fd(const ExecContext *c, const ExecParameters *p, const
 
         ofd = open(of->path, O_PATH | O_CLOEXEC);
         if (ofd < 0)
-                return log_exec_error_errno(c, p, errno, "Could not open \"%s\": %m", of->path);
+                return log_exec_error_errno(c, p, errno, "Failed to open '%s' as O_PATH: %m", of->path);
 
         if (fstat(ofd, &st) < 0)
-                return log_exec_error_errno(c, p, errno, "Failed to stat %s: %m", of->path);
+                return log_exec_error_errno(c, p, errno, "Failed to stat '%s': %m", of->path);
 
         if (S_ISSOCK(st.st_mode)) {
                 fd = connect_unix_harder(c, p, of, ofd);
@@ -3710,10 +3763,11 @@ static int get_open_file_fd(const ExecContext *c, const ExecParameters *p, const
                         return fd;
 
                 if (FLAGS_SET(of->flags, OPENFILE_READ_ONLY) && shutdown(fd, SHUT_WR) < 0)
-                        return log_exec_error_errno(c, p, errno, "Failed to shutdown send for socket %s: %m",
+                        return log_exec_error_errno(c, p,
+                                                    errno, "Failed to shutdown send for socket '%s': %m",
                                                     of->path);
 
-                log_exec_debug(c, p, "socket %s opened (fd=%d)", of->path, fd);
+                log_exec_debug(c, p, "Opened socket '%s' as fd %d.", of->path, fd);
         } else {
                 int flags = FLAGS_SET(of->flags, OPENFILE_READ_ONLY) ? O_RDONLY : O_RDWR;
                 if (FLAGS_SET(of->flags, OPENFILE_APPEND))
@@ -3723,9 +3777,9 @@ static int get_open_file_fd(const ExecContext *c, const ExecParameters *p, const
 
                 fd = fd_reopen(ofd, flags | O_CLOEXEC);
                 if (fd < 0)
-                        return log_exec_error_errno(c, p, fd, "Failed to open file %s: %m", of->path);
+                        return log_exec_error_errno(c, p, fd, "Failed to reopen file '%s': %m", of->path);
 
-                log_exec_debug(c, p, "file %s opened (fd=%d)", of->path, fd);
+                log_exec_debug(c, p, "Opened file '%s' as fd %d.", of->path, fd);
         }
 
         return TAKE_FD(fd);
@@ -3744,7 +3798,9 @@ static int collect_open_file_fds(const ExecContext *c, ExecParameters *p, size_t
                 fd = get_open_file_fd(c, p, of);
                 if (fd < 0) {
                         if (FLAGS_SET(of->flags, OPENFILE_GRACEFUL)) {
-                                log_exec_debug_errno(c, p, fd, "Failed to get OpenFile= file descriptor for %s, ignoring: %m", of->path);
+                                log_exec_warning_errno(c, p, fd,
+                                                       "Failed to get OpenFile= file descriptor for '%s', ignoring: %m",
+                                                       of->path);
                                 continue;
                         }
 
@@ -3758,9 +3814,7 @@ static int collect_open_file_fds(const ExecContext *c, ExecParameters *p, size_t
                 if (r < 0)
                         return r;
 
-                p->fds[*n_fds] = TAKE_FD(fd);
-
-                (*n_fds)++;
+                p->fds[(*n_fds)++] = TAKE_FD(fd);
         }
 
         return 0;
@@ -3810,7 +3864,7 @@ static bool exec_context_need_unprivileged_private_users(
                context->private_ipc ||
                context->ipc_namespace_path ||
                context->private_mounts > 0 ||
-               context->mount_apivfs ||
+               context->mount_apivfs > 0 ||
                context->n_bind_mounts > 0 ||
                context->n_temporary_filesystems > 0 ||
                context->root_directory ||
@@ -3920,6 +3974,52 @@ static void exec_params_close(ExecParameters *p) {
         p->stderr_fd = safe_close(p->stderr_fd);
 }
 
+static int exec_fd_mark_hot(
+                const ExecContext *c,
+                ExecParameters *p,
+                bool hot,
+                int *reterr_exit_status) {
+
+        assert(c);
+        assert(p);
+
+        if (p->exec_fd < 0)
+                return 0;
+
+        uint8_t x = hot;
+
+        if (write(p->exec_fd, &x, sizeof(x)) < 0) {
+                if (reterr_exit_status)
+                        *reterr_exit_status = EXIT_EXEC;
+                return log_exec_error_errno(c, p, errno, "Failed to mark exec_fd as %s: %m", hot ? "hot" : "cold");
+        }
+
+        return 1;
+}
+
+static int send_handoff_timestamp(
+                const ExecContext *c,
+                ExecParameters *p,
+                int *reterr_exit_status) {
+
+        assert(c);
+        assert(p);
+
+        if (p->handoff_timestamp_fd < 0)
+                return 0;
+
+        dual_timestamp dt;
+        dual_timestamp_now(&dt);
+
+        if (send(p->handoff_timestamp_fd, (const usec_t[2]) { dt.realtime, dt.monotonic }, sizeof(usec_t) * 2, 0) < 0) {
+                if (reterr_exit_status)
+                        *reterr_exit_status = EXIT_EXEC;
+                return log_exec_error_errno(c, p, errno, "Failed to send handoff timestamp: %m");
+        }
+
+        return 1;
+}
+
 int exec_invoke(
                 const ExecCommand *command,
                 const ExecContext *context,
@@ -3974,6 +4074,8 @@ int exec_invoke(
         assert(params);
         assert(exit_status);
 
+        /* This should be mostly redundant, as the log level is also passed as an argument of the executor,
+         * and is already applied earlier. Just for safety. */
         if (context->log_level_max >= 0)
                 log_set_max_level(context->log_level_max);
 
@@ -4049,7 +4151,7 @@ int exec_invoke(
                 return log_exec_error_errno(context, params, r, "Failed to get OpenFile= file descriptors: %m");
         }
 
-        int keep_fds[n_fds + 3];
+        int keep_fds[n_fds + 4];
         memcpy_safe(keep_fds, params->fds, n_fds * sizeof(int));
         n_keep_fds = n_fds;
 
@@ -4059,8 +4161,14 @@ int exec_invoke(
                 return log_exec_error_errno(context, params, r, "Failed to collect shifted fd: %m");
         }
 
+        r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, &params->handoff_timestamp_fd);
+        if (r < 0) {
+                *exit_status = EXIT_FDS;
+                return log_exec_error_errno(context, params, r, "Failed to collect shifted fd: %m");
+        }
+
 #if HAVE_LIBBPF
-        r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, &params->bpf_outer_map_fd);
+        r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, &params->bpf_restrict_fs_map_fd);
         if (r < 0) {
                 *exit_status = EXIT_FDS;
                 return log_exec_error_errno(context, params, r, "Failed to collect shifted fd: %m");
@@ -4099,7 +4207,7 @@ int exec_invoke(
 
                         *exit_status = EXIT_CONFIRM;
                         return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ECANCELED),
-                                                    "Execution cancelled by the user");
+                                                    "Execution cancelled by the user.");
                 }
         }
 
@@ -4141,12 +4249,12 @@ int exec_invoke(
 
                 if (!uid_is_valid(uid)) {
                         *exit_status = EXIT_USER;
-                        return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ESRCH), "UID validation failed for \""UID_FMT"\"", uid);
+                        return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ESRCH), "UID validation failed for \""UID_FMT"\".", uid);
                 }
 
                 if (!gid_is_valid(gid)) {
                         *exit_status = EXIT_USER;
-                        return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ESRCH), "GID validation failed for \""GID_FMT"\"", gid);
+                        return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ESRCH), "GID validation failed for \""GID_FMT"\".", gid);
                 }
 
                 if (runtime->dynamic_creds->user)
@@ -4186,7 +4294,7 @@ int exec_invoke(
 
         params->user_lookup_fd = safe_close(params->user_lookup_fd);
 
-        r = acquire_home(context, uid, &home, &home_buffer);
+        r = acquire_home(context, &home, &home_buffer);
         if (r < 0) {
                 *exit_status = EXIT_CHDIR;
                 return log_exec_error_errno(context, params, r, "Failed to determine $HOME for user: %m");
@@ -4210,9 +4318,10 @@ int exec_invoke(
                 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
                 if (r == -EUCLEAN) {
                         *exit_status = EXIT_CGROUP;
-                        return log_exec_error_errno(context, params, r, "Failed to attach process to cgroup %s "
+                        return log_exec_error_errno(context, params, r,
+                                                    "Failed to attach process to cgroup '%s', "
                                                     "because the cgroup or one of its parents or "
-                                                    "siblings is in the threaded mode: %m", p);
+                                                    "siblings is in the threaded mode.", p);
                 }
                 if (r < 0) {
                         *exit_status = EXIT_CGROUP;
@@ -4242,13 +4351,20 @@ int exec_invoke(
                 return log_exec_error_errno(context, params, r, "Failed to set up standard input: %m");
         }
 
-        r = setup_output(context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+        _cleanup_free_ char *fname = NULL;
+        r = path_extract_filename(command->path, &fname);
+        if (r < 0) {
+                *exit_status = EXIT_STDOUT;
+                return log_exec_error_errno(context, params, r, "Failed to extract filename from path %s: %m", command->path);
+        }
+
+        r = setup_output(context, params, STDOUT_FILENO, socket_fd, named_iofds, fname, uid, gid, &journal_stream_dev, &journal_stream_ino);
         if (r < 0) {
                 *exit_status = EXIT_STDOUT;
                 return log_exec_error_errno(context, params, r, "Failed to set up standard output: %m");
         }
 
-        r = setup_output(context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+        r = setup_output(context, params, STDERR_FILENO, socket_fd, named_iofds, fname, uid, gid, &journal_stream_dev, &journal_stream_ino);
         if (r < 0) {
                 *exit_status = EXIT_STDERR;
                 return log_exec_error_errno(context, params, r, "Failed to set up standard error output: %m");
@@ -4445,12 +4561,10 @@ int exec_invoke(
                         return log_exec_error_errno(context, params, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
         }
 
-        if (FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
-                r = exec_setup_credentials(context, params, params->unit_id, uid, gid);
-                if (r < 0) {
-                        *exit_status = EXIT_CREDENTIALS;
-                        return log_exec_error_errno(context, params, r, "Failed to set up credentials: %m");
-                }
+        r = exec_setup_credentials(context, params, params->unit_id, uid, gid);
+        if (r < 0) {
+                *exit_status = EXIT_CREDENTIALS;
+                return log_exec_error_errno(context, params, r, "Failed to set up credentials: %m");
         }
 
         r = build_environment(
@@ -4567,7 +4681,7 @@ int exec_invoke(
                  * wins here. (See above.) */
 
                 /* All fds passed in the fds array will be closed in the pam child process. */
-                r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, params->fds, n_fds);
+                r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, params->fds, n_fds, params->exec_fd);
                 if (r < 0) {
                         *exit_status = EXIT_PAM;
                         return log_exec_error_errno(context, params, r, "Failed to set up PAM session: %m");
@@ -4639,7 +4753,7 @@ int exec_invoke(
 
                 if (ns_type_supported(NAMESPACE_IPC)) {
                         r = setup_shareable_ns(runtime->shared->ipcns_storage_socket, CLONE_NEWIPC);
-                        if (r == -EPERM)
+                        if (ERRNO_IS_NEG_PRIVILEGE(r))
                                 log_exec_warning_errno(context, params, r,
                                                        "PrivateIPC=yes is configured, but IPC namespace setup failed, ignoring: %m");
                         else if (r < 0) {
@@ -4657,7 +4771,13 @@ int exec_invoke(
         if (needs_mount_namespace) {
                 _cleanup_free_ char *error_path = NULL;
 
-                r = apply_mount_namespace(command->flags, context, params, runtime, memory_pressure_path, &error_path);
+                r = apply_mount_namespace(command->flags,
+                                          context,
+                                          params,
+                                          runtime,
+                                          memory_pressure_path,
+                                          needs_sandboxing,
+                                          &error_path);
                 if (r < 0) {
                         *exit_status = EXIT_NAMESPACE;
                         return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m",
@@ -4672,7 +4792,7 @@ int exec_invoke(
         }
 
         if (context->memory_ksm >= 0)
-                if (prctl(PR_SET_MEMORY_MERGE, context->memory_ksm) < 0) {
+                if (prctl(PR_SET_MEMORY_MERGE, context->memory_ksm, 0, 0, 0) < 0) {
                         if (ERRNO_IS_NOT_SUPPORTED(errno))
                                 log_exec_debug_errno(context,
                                                      params,
@@ -4731,26 +4851,16 @@ int exec_invoke(
         _cleanup_close_ int executable_fd = -EBADF;
         r = find_executable_full(command->path, /* root= */ NULL, context->exec_search_path, false, &executable, &executable_fd);
         if (r < 0) {
-                if (r != -ENOMEM && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
-                        log_exec_struct_errno(context, params, LOG_INFO, r,
-                                              "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
-                                              LOG_EXEC_INVOCATION_ID(params),
-                                              LOG_EXEC_MESSAGE(params,
-                                                               "Executable %s missing, skipping: %m",
-                                                               command->path),
-                                              "EXECUTABLE=%s", command->path);
-                        *exit_status = EXIT_SUCCESS;
-                        return 0;
-                }
-
                 *exit_status = EXIT_EXEC;
-                return log_exec_struct_errno(context, params, LOG_INFO, r,
-                                             "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
-                                             LOG_EXEC_INVOCATION_ID(params),
-                                             LOG_EXEC_MESSAGE(params,
-                                                              "Failed to locate executable %s: %m",
-                                                              command->path),
-                                             "EXECUTABLE=%s", command->path);
+                log_exec_struct_errno(context, params, LOG_NOTICE, r,
+                                      "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+                                      LOG_EXEC_MESSAGE(params,
+                                                       "Unable to locate executable '%s': %m",
+                                                       command->path),
+                                      "EXECUTABLE=%s", command->path);
+                /* If the error will be ignored by manager, tune down the log level here. Missing executable
+                 * is very much expected in this case. */
+                return r != -ENOMEM && FLAGS_SET(command->flags, EXEC_COMMAND_IGNORE_FAILURE) ? 1 : r;
         }
 
         r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, &executable_fd);
@@ -4791,15 +4901,16 @@ int exec_invoke(
 
         /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that
          * we are more aggressive this time, since we don't need socket_fd and the netns and ipcns fds any
-         * more. We do keep exec_fd however, if we have it, since we need to keep it open until the final
-         * execve(). But first, close the remaining sockets in the context objects. */
+         * more. We do keep exec_fd and handoff_timestamp_fd however, if we have it, since we need to keep
+         * them open until the final execve(). But first, close the remaining sockets in the context
+         * objects. */
 
         exec_runtime_close(runtime);
         exec_params_close(params);
 
         r = close_all_fds(keep_fds, n_keep_fds);
         if (r >= 0)
-                r = shift_fds(params->fds, n_fds);
+                r = pack_fds(params->fds, n_fds);
         if (r >= 0)
                 r = flag_fds(params->fds, n_socket_fds, n_fds, context->non_blocking);
         if (r < 0) {
@@ -4945,8 +5056,10 @@ int exec_invoke(
                 }
         }
 
-        /* Apply working directory here, because the working directory might be on NFS and only the user running
-         * this service might have the correct privilege to change to the working directory */
+        /* Apply working directory here, because the working directory might be on NFS and only the user
+         * running this service might have the correct privilege to change to the working directory. Also, it
+         * is absolutely 💣 crucial 💣 we applied all mount namespacing rearrangements before this, so that
+         * the cwd cannot be used to pin directories outside of the sandbox. */
         r = apply_working_directory(context, params, runtime, home, exit_status);
         if (r < 0)
                 return log_exec_error_errno(context, params, r, "Changing to the requested working directory failed: %m");
@@ -5206,31 +5319,29 @@ int exec_invoke(
 
         log_command_line(context, params, "Executing", executable, final_argv);
 
-        if (params->exec_fd >= 0) {
-                uint8_t hot = 1;
+        /* We have finished with all our initializations. Let's now let the manager know that. From this
+         * point on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
 
-                /* We have finished with all our initializations. Let's now let the manager know that. From this point
-                 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
+        r = exec_fd_mark_hot(context, params, /* hot= */ true, exit_status);
+        if (r < 0)
+                return r;
 
-                if (write(params->exec_fd, &hot, sizeof(hot)) < 0) {
-                        *exit_status = EXIT_EXEC;
-                        return log_exec_error_errno(context, params, errno, "Failed to enable exec_fd: %m");
-                }
+        /* As last thing before the execve(), let's send the handoff timestamp */
+        r = send_handoff_timestamp(context, params, exit_status);
+        if (r < 0) {
+                /* If this handoff timestamp failed, let's undo the marking as hot */
+                (void) exec_fd_mark_hot(context, params, /* hot= */ false, /* reterr_exit_status= */ NULL);
+                return r;
         }
 
-        r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
-
-        if (params->exec_fd >= 0) {
-                uint8_t hot = 0;
+        /* NB: we leave executable_fd, exec_fd, handoff_timestamp_fd open here. This is safe, because they
+         * have O_CLOEXEC set, and the execve() below will thus automatically close them. In fact, for
+         * exec_fd this is pretty much the whole raison d'etre. */
 
-                /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
-                 * that POLLHUP on it no longer means execve() succeeded. */
+        r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
 
-                if (write(params->exec_fd, &hot, sizeof(hot)) < 0) {
-                        *exit_status = EXIT_EXEC;
-                        return log_exec_error_errno(context, params, errno, "Failed to disable exec_fd: %m");
-                }
-        }
+        /* The execve() failed, let's undo the marking as hot */
+        (void) exec_fd_mark_hot(context, params, /* hot= */ false, /* reterr_exit_status= */ NULL);
 
         *exit_status = EXIT_EXEC;
         return log_exec_error_errno(context, params, r, "Failed to execute %s: %m", executable);
diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c
index b1e716e..ecd1e70 100644
--- a/src/core/execute-serialize.c
+++ b/src/core/execute-serialize.c
@@ -230,6 +230,10 @@ static int exec_cgroup_context_serialize(const CGroupContext *c, FILE *f) {
                         return r;
         }
 
+        r = serialize_bool(f, "exec-cgroup-context-memory-zswap-writeback", c->memory_zswap_writeback);
+        if (r < 0)
+                return r;
+
         if (c->memory_limit != CGROUP_LIMIT_MAX) {
                 r = serialize_item_format(f, "exec-cgroup-context-memory-limit", "%" PRIu64, c->memory_limit);
                 if (r < 0)
@@ -373,8 +377,7 @@ static int exec_cgroup_context_serialize(const CGroupContext *c, FILE *f) {
                         if (il->limits[type] == cgroup_io_limit_defaults[type])
                                 continue;
 
-                        key = strjoin("exec-cgroup-context-io-device-limit-",
-                                        cgroup_io_limit_type_to_string(type));
+                        key = strjoin("exec-cgroup-context-io-device-limit-", cgroup_io_limit_type_to_string(type));
                         if (!key)
                                 return -ENOMEM;
 
@@ -678,6 +681,11 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         r = safe_atou64(val, &c->startup_memory_zswap_max);
                         if (r < 0)
                                 return r;
+                } else if ((val = startswith(l, "exec-cgroup-context-memory-zswap-writeback="))) {
+                        r = parse_boolean(val);
+                        if (r < 0)
+                                return r;
+                        c->memory_zswap_writeback = r;
                 } else if ((val = startswith(l, "exec-cgroup-context-memory-limit="))) {
                         r = safe_atou64(val, &c->memory_limit);
                         if (r < 0)
@@ -789,7 +797,7 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         _cleanup_free_ char *path = NULL, *rwm = NULL;
                         CGroupDevicePermissions p;
 
-                        r = extract_many_words(&val, " ", 0, &path, &rwm, NULL);
+                        r = extract_many_words(&val, " ", 0, &path, &rwm);
                         if (r < 0)
                                 return r;
                         if (r == 0)
@@ -806,7 +814,7 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         _cleanup_free_ char *path = NULL, *weight = NULL;
                         CGroupIODeviceWeight *a = NULL;
 
-                        r = extract_many_words(&val, " ", 0, &path, &weight, NULL);
+                        r = extract_many_words(&val, " ", 0, &path, &weight);
                         if (r < 0)
                                 return r;
                         if (r != 2)
@@ -835,7 +843,7 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         _cleanup_free_ char *path = NULL, *target = NULL;
                         CGroupIODeviceLatency *a = NULL;
 
-                        r = extract_many_words(&val, " ", 0, &path, &target, NULL);
+                        r = extract_many_words(&val, " ", 0, &path, &target);
                         if (r < 0)
                                 return r;
                         if (r != 2)
@@ -865,7 +873,7 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         CGroupIODeviceLimit *limit = NULL;
                         CGroupIOLimitType t;
 
-                        r = extract_many_words(&val, "= ", 0, &type, &path, &limits, NULL);
+                        r = extract_many_words(&val, "= ", 0, &type, &path, &limits);
                         if (r < 0)
                                 return r;
                         if (r != 3)
@@ -900,7 +908,7 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         _cleanup_free_ char *path = NULL, *weight = NULL;
                         CGroupBlockIODeviceWeight *a = NULL;
 
-                        r = extract_many_words(&val, " ", 0, &path, &weight, NULL);
+                        r = extract_many_words(&val, " ", 0, &path, &weight);
                         if (r < 0)
                                 return r;
                         if (r != 2)
@@ -921,7 +929,7 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         _cleanup_free_ char *path = NULL, *bw = NULL;
                         CGroupBlockIODeviceBandwidth *a = NULL;
 
-                        r = extract_many_words(&val, " ", 0, &path, &bw, NULL);
+                        r = extract_many_words(&val, " ", 0, &path, &bw);
                         if (r < 0)
                                 return r;
                         if (r != 2)
@@ -951,7 +959,7 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         _cleanup_free_ char *path = NULL, *bw = NULL;
                         CGroupBlockIODeviceBandwidth *a = NULL;
 
-                        r = extract_many_words(&val, " ", 0, &path, &bw, NULL);
+                        r = extract_many_words(&val, " ", 0, &path, &bw);
                         if (r < 0)
                                 return r;
                         if (r != 2)
@@ -1019,7 +1027,7 @@ static int exec_cgroup_context_deserialize(CGroupContext *c, FILE *f) {
                         _cleanup_free_ char *type = NULL, *path = NULL;
                         uint32_t t;
 
-                        r = extract_many_words(&val, " ", 0, &type, &path, NULL);
+                        r = extract_many_words(&val, " ", 0, &type, &path);
                         if (r < 0)
                                 return r;
                         if (r != 2)
@@ -1365,8 +1373,12 @@ static int exec_parameters_serialize(const ExecParameters *p, const ExecContext
         if (r < 0)
                 return r;
 
+        r = serialize_fd(f, fds, "exec-parameters-handoff-timestamp-fd", p->handoff_timestamp_fd);
+        if (r < 0)
+                return r;
+
         if (c && exec_context_restrict_filesystems_set(c)) {
-                r = serialize_fd(f, fds, "exec-parameters-bpf-outer-map-fd", p->bpf_outer_map_fd);
+                r = serialize_fd(f, fds, "exec-parameters-bpf-outer-map-fd", p->bpf_restrict_fs_map_fd);
                 if (r < 0)
                         return r;
         }
@@ -1479,8 +1491,8 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
                                 return log_oom_debug();
 
                         /* Ensure we don't leave any FD uninitialized on error, it makes the fuzzer sad */
-                        for (size_t i = 0; i < p->n_socket_fds + p->n_storage_fds; ++i)
-                                p->fds[i] = -EBADF;
+                        FOREACH_ARRAY(i, p->fds, p->n_socket_fds + p->n_storage_fds)
+                                *i = -EBADF;
 
                         r = deserialize_fd_many(fds, val, p->n_socket_fds + p->n_storage_fds, p->fds);
                         if (r < 0)
@@ -1522,7 +1534,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
                         _cleanup_free_ char *type = NULL, *prefix = NULL;
                         ExecDirectoryType dt;
 
-                        r = extract_many_words(&val, "= ", 0, &type, &prefix, NULL);
+                        r = extract_many_words(&val, "= ", 0, &type, &prefix);
                         if (r < 0)
                                 return r;
                         if (r == 0)
@@ -1585,7 +1597,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
                         if (fd < 0)
                                 continue;
 
-                        p->stdin_fd = fd;
+                        close_and_replace(p->stdin_fd, fd);
 
                 } else if ((val = startswith(l, "exec-parameters-stdout-fd="))) {
                         int fd;
@@ -1594,7 +1606,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
                         if (fd < 0)
                                 continue;
 
-                        p->stdout_fd = fd;
+                        close_and_replace(p->stdout_fd, fd);
 
                 } else if ((val = startswith(l, "exec-parameters-stderr-fd="))) {
                         int fd;
@@ -1603,7 +1615,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
                         if (fd < 0)
                                 continue;
 
-                        p->stderr_fd = fd;
+                        close_and_replace(p->stderr_fd, fd);
                 } else if ((val = startswith(l, "exec-parameters-exec-fd="))) {
                         int fd;
 
@@ -1611,7 +1623,15 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
                         if (fd < 0)
                                 continue;
 
-                        p->exec_fd = fd;
+                        close_and_replace(p->exec_fd, fd);
+                } else if ((val = startswith(l, "exec-parameters-handoff-timestamp-fd="))) {
+                        int fd;
+
+                        fd = deserialize_fd(fds, val);
+                        if (fd < 0)
+                                continue;
+
+                        close_and_replace(p->handoff_timestamp_fd, fd);
                 } else if ((val = startswith(l, "exec-parameters-bpf-outer-map-fd="))) {
                         int fd;
 
@@ -1619,13 +1639,13 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
                         if (fd < 0)
                                 continue;
 
-                        p->bpf_outer_map_fd = fd;
+                        close_and_replace(p->bpf_restrict_fs_map_fd, fd);
                 } else if ((val = startswith(l, "exec-parameters-notify-socket="))) {
                         r = free_and_strdup(&p->notify_socket, val);
                         if (r < 0)
                                 return r;
                 } else if ((val = startswith(l, "exec-parameters-open-file="))) {
-                        OpenFile *of = NULL;
+                        OpenFile *of;
 
                         r = open_file_parse(val, &of);
                         if (r < 0)
@@ -1643,7 +1663,7 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
                         if (fd < 0)
                                 continue;
 
-                        p->user_lookup_fd = fd;
+                        close_and_replace(p->user_lookup_fd, fd);
                 } else if ((val = startswith(l, "exec-parameters-files-env="))) {
                         r = deserialize_strv(val, &p->files_env);
                         if (r < 0)
@@ -1812,6 +1832,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
         if (r < 0)
                 return r;
 
+        r = serialize_item_tristate(f, "exec-context-mount-api-vfs", c->mount_apivfs);
+        if (r < 0)
+                return r;
+
         r = serialize_item_tristate(f, "exec-context-memory-ksm", c->memory_ksm);
         if (r < 0)
                 return r;
@@ -1868,20 +1892,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
         if (r < 0)
                 return r;
 
-        if (c->mount_apivfs_set) {
-                r = serialize_bool(f, "exec-context-mount-api-vfs", c->mount_apivfs);
-                if (r < 0)
-                        return r;
-        }
-
         r = serialize_bool_elide(f, "exec-context-same-pgrp", c->same_pgrp);
         if (r < 0)
                 return r;
 
-        r = serialize_bool_elide(f, "exec-context-cpu-sched-reset-on-fork", c->cpu_sched_reset_on_fork);
-        if (r < 0)
-                return r;
-
         r = serialize_bool(f, "exec-context-ignore-sigpipe", c->ignore_sigpipe);
         if (r < 0)
                 return r;
@@ -2154,6 +2168,8 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
         if (r < 0)
                 return r;
 
+        /* This is also passed to executor as an argument. So, the information should be redundant in general.
+         * But, let's keep this as is for consistency with other elements of ExecContext. See exec_spawn(). */
         r = serialize_item_format(f, "exec-context-log-level-max", "%d", c->log_level_max);
         if (r < 0)
                 return r;
@@ -2538,14 +2554,14 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
                 if (base64mem(sc->data, sc->size, &data) < 0)
                         return log_oom_debug();
 
-                r = serialize_item_format(f, "exec-context-set-credentials", "%s %s %s", sc->id, yes_no(sc->encrypted), data);
+                r = serialize_item_format(f, "exec-context-set-credentials", "%s %s %s", sc->id, data, yes_no(sc->encrypted));
                 if (r < 0)
                         return r;
         }
 
         ExecLoadCredential *lc;
         HASHMAP_FOREACH(lc, c->load_credentials) {
-                r = serialize_item_format(f, "exec-context-load-credentials", "%s %s %s", lc->id, yes_no(lc->encrypted), lc->path);
+                r = serialize_item_format(f, "exec-context-load-credentials", "%s %s %s", lc->id, lc->path, yes_no(lc->encrypted));
                 if (r < 0)
                         return r;
         }
@@ -2636,7 +2652,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                                         break;
 
                                 p = word;
-                                r = extract_many_words(&p, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL);
+                                r = extract_many_words(&p, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options);
                                 if (r < 0)
                                         return r;
                                 if (r == 0)
@@ -2669,12 +2685,12 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                                 return r;
                 } else if ((val = startswith(l, "exec-context-root-hash="))) {
                         c->root_hash = mfree(c->root_hash);
-                        r = unhexmem(val, strlen(val), &c->root_hash, &c->root_hash_size);
+                        r = unhexmem(val, &c->root_hash, &c->root_hash_size);
                         if (r < 0)
                                 return r;
                 } else if ((val = startswith(l, "exec-context-root-hash-sig="))) {
                         c->root_hash_sig = mfree(c->root_hash_sig);
-                        r= unbase64mem(val, strlen(val), &c->root_hash_sig, &c->root_hash_sig_size);
+                        r= unbase64mem(val, &c->root_hash_sig, &c->root_hash_sig_size);
                         if (r < 0)
                                 return r;
                 } else if ((val = startswith(l, "exec-context-root-ephemeral="))) {
@@ -2695,6 +2711,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         r = safe_atoi(val, &c->private_mounts);
                         if (r < 0)
                                 return r;
+                } else if ((val = startswith(l, "exec-context-mount-api-vfs="))) {
+                        r = safe_atoi(val, &c->mount_apivfs);
+                        if (r < 0)
+                                return r;
                 } else if ((val = startswith(l, "exec-context-memory-ksm="))) {
                         r = safe_atoi(val, &c->memory_ksm);
                         if (r < 0)
@@ -2762,22 +2782,11 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         c->protect_system = protect_system_from_string(val);
                         if (c->protect_system < 0)
                                 return -EINVAL;
-                } else if ((val = startswith(l, "exec-context-mount-api-vfs="))) {
-                        r = parse_boolean(val);
-                        if (r < 0)
-                                return r;
-                        c->mount_apivfs = r;
-                        c->mount_apivfs_set = true;
                 } else if ((val = startswith(l, "exec-context-same-pgrp="))) {
                         r = parse_boolean(val);
                         if (r < 0)
                                 return r;
                         c->same_pgrp = r;
-                } else if ((val = startswith(l, "exec-context-cpu-sched-reset-on-fork="))) {
-                        r = parse_boolean(val);
-                        if (r < 0)
-                                return r;
-                        c->cpu_sched_reset_on_fork = r;
                 } else if ((val = startswith(l, "exec-context-non-blocking="))) {
                         r = parse_boolean(val);
                         if (r < 0)
@@ -2828,7 +2837,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         _cleanup_free_ char *type = NULL, *mode = NULL;
                         ExecDirectoryType dt;
 
-                        r = extract_many_words(&val, "= ", 0, &type, &mode, NULL);
+                        r = extract_many_words(&val, "= ", 0, &type, &mode);
                         if (r < 0)
                                 return r;
                         if (r == 0 || !mode)
@@ -2854,7 +2863,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                                         break;
 
                                 p = tuple;
-                                r = extract_many_words(&p, ":", EXTRACT_UNESCAPE_SEPARATORS, &path, &only_create, NULL);
+                                r = extract_many_words(&p, ":", EXTRACT_UNESCAPE_SEPARATORS, &path, &only_create);
                                 if (r < 0)
                                         return r;
                                 if (r < 2)
@@ -3054,7 +3063,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         if (c->stdin_data)
                                 return -EINVAL; /* duplicated */
 
-                        r = unbase64mem(val, strlen(val), &c->stdin_data, &c->stdin_data_size);
+                        r = unbase64mem(val, &c->stdin_data, &c->stdin_data_size);
                         if (r < 0)
                                 return r;
                 } else if ((val = startswith(l, "exec-context-tty-path="))) {
@@ -3098,6 +3107,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         if (r < 0)
                                 return r;
                 } else if ((val = startswith(l, "exec-context-log-level-max="))) {
+                        /* See comment in serialization. */
                         r = safe_atoi(val, &c->log_level_max);
                         if (r < 0)
                                 return r;
@@ -3314,7 +3324,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                 } else if ((val = startswith(l, "exec-context-temporary-filesystems="))) {
                         _cleanup_free_ char *path = NULL, *options = NULL;
 
-                        r = extract_many_words(&val, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &path, &options, NULL);
+                        r = extract_many_words(&val, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &path, &options);
                         if (r < 0)
                                 return r;
                         if (r < 1)
@@ -3392,7 +3402,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         _cleanup_free_ char *s_id = NULL, *s_errno_num = NULL;
                         int id, errno_num;
 
-                        r = extract_many_words(&val, NULL, 0, &s_id, &s_errno_num, NULL);
+                        r = extract_many_words(&val, NULL, 0, &s_id, &s_errno_num);
                         if (r < 0)
                                 return r;
                         if (r != 2)
@@ -3432,7 +3442,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         _cleanup_free_ char *s_id = NULL, *s_errno_num = NULL;
                         int id, errno_num;
 
-                        r = extract_many_words(&val, " ", 0, &s_id, &s_errno_num, NULL);
+                        r = extract_many_words(&val, " ", 0, &s_id, &s_errno_num);
                         if (r < 0)
                                 return r;
                         if (r != 2)
@@ -3505,8 +3515,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                                                NULL,
                                                EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS,
                                                &source,
-                                               &destination,
-                                               NULL);
+                                               &destination);
                         if (r < 0)
                                 return r;
                         if (r == 0)
@@ -3538,8 +3547,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                                                        ":",
                                                        EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS,
                                                        &partition,
-                                                       &opts,
-                                                       NULL);
+                                                       &opts);
                                 if (r < 0)
                                         return r;
                                 if (r == 0)
@@ -3619,8 +3627,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                                                        ":",
                                                        EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS,
                                                        &partition,
-                                                       &opts,
-                                                       NULL);
+                                                       &opts);
                                 if (r < 0)
                                         return r;
                                 if (r == 0)
@@ -3669,7 +3676,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         _cleanup_(exec_set_credential_freep) ExecSetCredential *sc = NULL;
                         _cleanup_free_ char *id = NULL, *encrypted = NULL, *data = NULL;
 
-                        r = extract_many_words(&val, " ", 0, &id, &encrypted, &data, NULL);
+                        r = extract_many_words(&val, " ", EXTRACT_DONT_COALESCE_SEPARATORS, &id, &data, &encrypted);
                         if (r < 0)
                                 return r;
                         if (r != 3)
@@ -3688,7 +3695,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                                 .encrypted = r,
                         };
 
-                        r = unbase64mem(data, strlen(data), &sc->data, &sc->size);
+                        r = unbase64mem(data, &sc->data, &sc->size);
                         if (r < 0)
                                 return r;
 
@@ -3701,7 +3708,7 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         _cleanup_(exec_load_credential_freep) ExecLoadCredential *lc = NULL;
                         _cleanup_free_ char *id = NULL, *encrypted = NULL, *path = NULL;
 
-                        r = extract_many_words(&val, " ", 0, &id, &encrypted, &path, NULL);
+                        r = extract_many_words(&val, " ", EXTRACT_DONT_COALESCE_SEPARATORS, &id, &path, &encrypted);
                         if (r < 0)
                                 return r;
                         if (r != 3)
diff --git a/src/core/execute.c b/src/core/execute.c
index 8dbdfcf..513e95e 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -147,7 +147,7 @@ void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p)
 
         const char *path = exec_context_tty_path(context);
 
-        if (p && p->stdin_fd >= 0 && isatty(p->stdin_fd))
+        if (p && p->stdin_fd >= 0 && isatty_safe(p->stdin_fd))
                 fd = p->stdin_fd;
         else if (path && (context->tty_path || is_terminal_input(context->std_input) ||
                         is_terminal_output(context->std_output) || is_terminal_output(context->std_error))) {
@@ -162,9 +162,11 @@ void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p)
          * that will be closed automatically, and operate on it for convenience. */
         lock_fd = lock_dev_console();
         if (ERRNO_IS_NEG_PRIVILEGE(lock_fd))
-                log_debug_errno(lock_fd, "No privileges to lock /dev/console, proceeding without: %m");
+                log_debug_errno(lock_fd, "No privileges to lock /dev/console, proceeding without lock: %m");
+        else if (ERRNO_IS_NEG_DEVICE_ABSENT(lock_fd))
+                log_debug_errno(lock_fd, "Device /dev/console does not exist, proceeding without lock: %m");
         else if (lock_fd < 0)
-                return (void) log_debug_errno(lock_fd, "Failed to lock /dev/console: %m");
+                log_warning_errno(lock_fd, "Failed to lock /dev/console, proceeding without lock: %m");
 
         if (context->tty_vhangup)
                 (void) terminal_vhangup_fd(fd);
@@ -351,19 +353,18 @@ static void log_command_line(Unit *unit, const char *msg, const char *executable
 
 static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
 
-int exec_spawn(Unit *unit,
-               ExecCommand *command,
-               const ExecContext *context,
-               ExecParameters *params,
-               ExecRuntime *runtime,
-               const CGroupContext *cgroup_context,
-               pid_t *ret) {
+int exec_spawn(
+                Unit *unit,
+                ExecCommand *command,
+                const ExecContext *context,
+                ExecParameters *params,
+                ExecRuntime *runtime,
+                const CGroupContext *cgroup_context,
+                PidRef *ret) {
 
-        char serialization_fd_number[DECIMAL_STR_MAX(int) + 1];
-        _cleanup_free_ char *subcgroup_path = NULL, *log_level = NULL, *executor_path = NULL;
+        _cleanup_free_ char *subcgroup_path = NULL, *max_log_levels = NULL, *executor_path = NULL;
         _cleanup_fdset_free_ FDSet *fdset = NULL;
         _cleanup_fclose_ FILE *f = NULL;
-        pid_t pid;
         int r;
 
         assert(unit);
@@ -371,10 +372,11 @@ int exec_spawn(Unit *unit,
         assert(unit->manager->executor_fd >= 0);
         assert(command);
         assert(context);
-        assert(ret);
         assert(params);
-        assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
+        assert(!params->fds || FLAGS_SET(params->flags, EXEC_PASS_FDS));
+        assert(params->fds || (params->n_socket_fds + params->n_storage_fds == 0));
         assert(!params->files_env); /* We fill this field, ensure it comes NULL-initialized to us */
+        assert(ret);
 
         LOG_CONTEXT_PUSH_UNIT(unit);
 
@@ -404,8 +406,8 @@ int exec_spawn(Unit *unit,
          * child's memory.max, serialize all the state needed to start the unit, and pass it to the
          * systemd-executor binary. clone() with CLONE_VM + CLONE_VFORK will pause the parent until the exec
          * and ensure all memory is shared. The child immediately execs the new binary so the delay should
-         * be minimal. Once glibc provides a clone3 wrapper we can switch to that, and clone directly in the
-         * target cgroup. */
+         * be minimal. If glibc 2.39 is available pidfd_spawn() is used in order to get a race-free pid fd
+         * and to clone directly into the target cgroup (if we booted with cgroupv2). */
 
         r = open_serialization_file("sd-executor-state", &f);
         if (r < 0)
@@ -430,39 +432,57 @@ int exec_spawn(Unit *unit,
         if (r < 0)
                 return log_unit_error_errno(unit, r, "Failed to set O_CLOEXEC on serialized fds: %m");
 
-        r = log_level_to_string_alloc(log_get_max_level(), &log_level);
+        /* If LogLevelMax= is specified, then let's use the specified log level at the beginning of the
+         * executor process. To achieve that the specified log level is passed as an argument, rather than
+         * the one for the manager process. */
+        r = log_max_levels_to_string(context->log_level_max >= 0 ? context->log_level_max : log_get_max_level(), &max_log_levels);
         if (r < 0)
-                return log_unit_error_errno(unit, r, "Failed to convert log level to string: %m");
+                return log_unit_error_errno(unit, r, "Failed to convert max log levels to string: %m");
 
         r = fd_get_path(unit->manager->executor_fd, &executor_path);
         if (r < 0)
                 return log_unit_error_errno(unit, r, "Failed to get executor path from fd: %m");
 
+        char serialization_fd_number[DECIMAL_STR_MAX(int)];
         xsprintf(serialization_fd_number, "%i", fileno(f));
 
+        _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
+        dual_timestamp start_timestamp;
+
+        /* Record the start timestamp before we fork so that it is guaranteed to be earlier than the
+         * handoff timestamp. */
+        dual_timestamp_now(&start_timestamp);
+
         /* The executor binary is pinned, to avoid compatibility problems during upgrades. */
         r = posix_spawn_wrapper(
                         FORMAT_PROC_FD_PATH(unit->manager->executor_fd),
                         STRV_MAKE(executor_path,
                                   "--deserialize", serialization_fd_number,
-                                  "--log-level", log_level,
+                                  "--log-level", max_log_levels,
                                   "--log-target", log_target_to_string(manager_get_executor_log_target(unit->manager))),
                         environ,
-                        &pid);
+                        cg_unified() > 0 ? subcgroup_path : NULL,
+                        &pidref);
+        if (r == -EUCLEAN && subcgroup_path)
+                return log_unit_error_errno(unit, r,
+                                            "Failed to spawn process into cgroup '%s', because the cgroup "
+                                            "or one of its parents or siblings is in the threaded mode.",
+                                            subcgroup_path);
         if (r < 0)
                 return log_unit_error_errno(unit, r, "Failed to spawn executor: %m");
-
-        log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
-
         /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
          * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
          * process will be killed too). */
-        if (subcgroup_path)
-                (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
+        if (r == 0 && subcgroup_path)
+                (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pidref.pid);
+        /* r > 0: Already in the right cgroup thanks to CLONE_INTO_CGROUP */
+
+        log_unit_debug(unit, "Forked %s as " PID_FMT " (%s CLONE_INTO_CGROUP)",
+                       command->path, pidref.pid, r > 0 ? "via" : "without");
 
-        exec_status_start(&command->exec_status, pid);
+        exec_status_start(&command->exec_status, pidref.pid, &start_timestamp);
 
-        *ret = pid;
+        *ret = TAKE_PIDREF(pidref);
         return 0;
 }
 
@@ -491,6 +511,7 @@ void exec_context_init(ExecContext *c) {
                 .tty_rows = UINT_MAX,
                 .tty_cols = UINT_MAX,
                 .private_mounts = -1,
+                .mount_apivfs = -1,
                 .memory_ksm = -1,
                 .set_login_environment = -1,
         };
@@ -664,13 +685,19 @@ void exec_command_done_array(ExecCommand *c, size_t n) {
                 exec_command_done(i);
 }
 
+ExecCommand* exec_command_free(ExecCommand *c) {
+        if (!c)
+                return NULL;
+
+        exec_command_done(c);
+        return mfree(c);
+}
+
 ExecCommand* exec_command_free_list(ExecCommand *c) {
         ExecCommand *i;
 
-        while ((i = LIST_POP(command, c))) {
-                exec_command_done(i);
-                free(i);
-        }
+        while ((i = LIST_POP(command, c)))
+                exec_command_free(i);
 
         return NULL;
 }
@@ -1396,7 +1423,7 @@ bool exec_context_maintains_privileges(const ExecContext *c) {
         if (!c->user)
                 return true;
 
-        if (streq(c->user, "root") || streq(c->user, "0"))
+        if (STR_IN_SET(c->user, "root", "0"))
                 return true;
 
         return false;
@@ -1421,8 +1448,8 @@ bool exec_context_get_effective_mount_apivfs(const ExecContext *c) {
         assert(c);
 
         /* Explicit setting wins */
-        if (c->mount_apivfs_set)
-                return c->mount_apivfs;
+        if (c->mount_apivfs >= 0)
+                return c->mount_apivfs > 0;
 
         /* Default to "yes" if root directory or image are specified */
         if (exec_context_with_rootfs(c))
@@ -1657,6 +1684,15 @@ uint64_t exec_context_get_timer_slack_nsec(const ExecContext *c) {
         return (uint64_t) MAX(r, 0);
 }
 
+bool exec_context_get_set_login_environment(const ExecContext *c) {
+        assert(c);
+
+        if (c->set_login_environment >= 0)
+                return c->set_login_environment;
+
+        return c->user || c->dynamic_user || c->pam_name;
+}
+
 char** exec_context_get_syscall_filter(const ExecContext *c) {
         _cleanup_strv_free_ char **l = NULL;
 
@@ -1787,14 +1823,17 @@ char** exec_context_get_restrict_filesystems(const ExecContext *c) {
         return l ? TAKE_PTR(l) : strv_new(NULL);
 }
 
-void exec_status_start(ExecStatus *s, pid_t pid) {
+void exec_status_start(ExecStatus *s, pid_t pid, const dual_timestamp *ts) {
         assert(s);
 
         *s = (ExecStatus) {
                 .pid = pid,
         };
 
-        dual_timestamp_now(&s->start_timestamp);
+        if (ts)
+                s->start_timestamp = *ts;
+        else
+                dual_timestamp_now(&s->start_timestamp);
 }
 
 void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
@@ -1814,6 +1853,19 @@ void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int
                 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
 }
 
+void exec_status_handoff(ExecStatus *s, const struct ucred *ucred, const dual_timestamp *ts) {
+        assert(s);
+        assert(ucred);
+        assert(ts);
+
+        if (ucred->pid != s->pid)
+                *s = (ExecStatus) {
+                        .pid = ucred->pid,
+                };
+
+        s->handoff_timestamp = *ts;
+}
+
 void exec_status_reset(ExecStatus *s) {
         assert(s);
 
@@ -1836,19 +1888,45 @@ void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
         if (dual_timestamp_is_set(&s->start_timestamp))
                 fprintf(f,
                         "%sStart Timestamp: %s\n",
-                        prefix, FORMAT_TIMESTAMP(s->start_timestamp.realtime));
+                        prefix, FORMAT_TIMESTAMP_STYLE(s->start_timestamp.realtime, TIMESTAMP_US));
+
+        if (dual_timestamp_is_set(&s->handoff_timestamp) && dual_timestamp_is_set(&s->start_timestamp) &&
+            s->handoff_timestamp.monotonic > s->start_timestamp.monotonic)
+                fprintf(f,
+                        "%sHandoff Timestamp: %s since start\n",
+                        prefix,
+                        FORMAT_TIMESPAN(usec_sub_unsigned(s->handoff_timestamp.monotonic, s->start_timestamp.monotonic), 1));
+        else
+                fprintf(f,
+                        "%sHandoff Timestamp: %s\n",
+                        prefix, FORMAT_TIMESTAMP_STYLE(s->handoff_timestamp.realtime, TIMESTAMP_US));
+
+        if (dual_timestamp_is_set(&s->exit_timestamp)) {
+
+                if (dual_timestamp_is_set(&s->handoff_timestamp) && s->exit_timestamp.monotonic > s->handoff_timestamp.monotonic)
+                        fprintf(f,
+                                "%sExit Timestamp: %s since handoff\n",
+                                prefix,
+                                FORMAT_TIMESPAN(usec_sub_unsigned(s->exit_timestamp.monotonic, s->handoff_timestamp.monotonic), 1));
+                else if (dual_timestamp_is_set(&s->start_timestamp) && s->exit_timestamp.monotonic > s->start_timestamp.monotonic)
+                        fprintf(f,
+                                "%sExit Timestamp: %s since start\n",
+                                prefix,
+                                FORMAT_TIMESPAN(usec_sub_unsigned(s->exit_timestamp.monotonic, s->start_timestamp.monotonic), 1));
+                else
+                        fprintf(f,
+                                "%sExit Timestamp: %s\n",
+                                prefix, FORMAT_TIMESTAMP_STYLE(s->exit_timestamp.realtime, TIMESTAMP_US));
 
-        if (dual_timestamp_is_set(&s->exit_timestamp))
                 fprintf(f,
-                        "%sExit Timestamp: %s\n"
                         "%sExit Code: %s\n"
                         "%sExit Status: %i\n",
-                        prefix, FORMAT_TIMESTAMP(s->exit_timestamp.realtime),
                         prefix, sigchld_code_to_string(s->code),
                         prefix, s->status);
+        }
 }
 
-static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
+void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
         _cleanup_free_ char *cmd = NULL;
         const char *prefix2;
 
@@ -1951,8 +2029,7 @@ static char *destroy_tree(char *path) {
 }
 
 void exec_shared_runtime_done(ExecSharedRuntime *rt) {
-        if (!rt)
-                return;
+        assert(rt);
 
         if (rt->manager)
                 (void) hashmap_remove(rt->manager->exec_shared_runtime_by_id, rt->id);
@@ -1965,8 +2042,10 @@ void exec_shared_runtime_done(ExecSharedRuntime *rt) {
 }
 
 static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt) {
-        exec_shared_runtime_done(rt);
+        if (!rt)
+                return NULL;
 
+        exec_shared_runtime_done(rt);
         return mfree(rt);
 }
 
@@ -2090,15 +2169,13 @@ static int exec_shared_runtime_make(
                         return r;
         }
 
-        if (exec_needs_network_namespace(c)) {
+        if (exec_needs_network_namespace(c))
                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
                         return -errno;
-        }
 
-        if (exec_needs_ipc_namespace(c)) {
+        if (exec_needs_ipc_namespace(c))
                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ipcns_storage_socket) < 0)
                         return -errno;
-        }
 
         r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_storage_socket, ipcns_storage_socket, ret);
         if (r < 0)
@@ -2488,7 +2565,7 @@ void exec_params_shallow_clear(ExecParameters *p) {
         p->fds = mfree(p->fds);
         p->exec_fd = safe_close(p->exec_fd);
         p->user_lookup_fd = -EBADF;
-        p->bpf_outer_map_fd = -EBADF;
+        p->bpf_restrict_fs_map_fd = -EBADF;
         p->unit_id = mfree(p->unit_id);
         p->invocation_id = SD_ID128_NULL;
         p->invocation_id_string[0] = '\0';
@@ -2643,46 +2720,46 @@ ExecCleanMask exec_clean_mask_from_string(const char *s) {
 }
 
 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
-        [EXEC_INPUT_NULL] = "null",
-        [EXEC_INPUT_TTY] = "tty",
+        [EXEC_INPUT_NULL]      = "null",
+        [EXEC_INPUT_TTY]       = "tty",
         [EXEC_INPUT_TTY_FORCE] = "tty-force",
-        [EXEC_INPUT_TTY_FAIL] = "tty-fail",
-        [EXEC_INPUT_SOCKET] = "socket",
-        [EXEC_INPUT_NAMED_FD] = "fd",
-        [EXEC_INPUT_DATA] = "data",
-        [EXEC_INPUT_FILE] = "file",
+        [EXEC_INPUT_TTY_FAIL]  = "tty-fail",
+        [EXEC_INPUT_SOCKET]    = "socket",
+        [EXEC_INPUT_NAMED_FD]  = "fd",
+        [EXEC_INPUT_DATA]      = "data",
+        [EXEC_INPUT_FILE]      = "file",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
 
 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
-        [EXEC_OUTPUT_INHERIT] = "inherit",
-        [EXEC_OUTPUT_NULL] = "null",
-        [EXEC_OUTPUT_TTY] = "tty",
-        [EXEC_OUTPUT_KMSG] = "kmsg",
-        [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
-        [EXEC_OUTPUT_JOURNAL] = "journal",
+        [EXEC_OUTPUT_INHERIT]             = "inherit",
+        [EXEC_OUTPUT_NULL]                = "null",
+        [EXEC_OUTPUT_TTY]                 = "tty",
+        [EXEC_OUTPUT_KMSG]                = "kmsg",
+        [EXEC_OUTPUT_KMSG_AND_CONSOLE]    = "kmsg+console",
+        [EXEC_OUTPUT_JOURNAL]             = "journal",
         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
-        [EXEC_OUTPUT_SOCKET] = "socket",
-        [EXEC_OUTPUT_NAMED_FD] = "fd",
-        [EXEC_OUTPUT_FILE] = "file",
-        [EXEC_OUTPUT_FILE_APPEND] = "append",
-        [EXEC_OUTPUT_FILE_TRUNCATE] = "truncate",
+        [EXEC_OUTPUT_SOCKET]              = "socket",
+        [EXEC_OUTPUT_NAMED_FD]            = "fd",
+        [EXEC_OUTPUT_FILE]                = "file",
+        [EXEC_OUTPUT_FILE_APPEND]         = "append",
+        [EXEC_OUTPUT_FILE_TRUNCATE]       = "truncate",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
 
 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
-        [EXEC_UTMP_INIT] = "init",
+        [EXEC_UTMP_INIT]  = "init",
         [EXEC_UTMP_LOGIN] = "login",
-        [EXEC_UTMP_USER] = "user",
+        [EXEC_UTMP_USER]  = "user",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
 
 static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
-        [EXEC_PRESERVE_NO] = "no",
-        [EXEC_PRESERVE_YES] = "yes",
+        [EXEC_PRESERVE_NO]      = "no",
+        [EXEC_PRESERVE_YES]     = "yes",
         [EXEC_PRESERVE_RESTART] = "restart",
 };
 
@@ -2690,10 +2767,10 @@ DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EX
 
 /* This table maps ExecDirectoryType to the setting it is configured with in the unit */
 static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
-        [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
-        [EXEC_DIRECTORY_STATE] = "StateDirectory",
-        [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
-        [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
+        [EXEC_DIRECTORY_RUNTIME]       = "RuntimeDirectory",
+        [EXEC_DIRECTORY_STATE]         = "StateDirectory",
+        [EXEC_DIRECTORY_CACHE]         = "CacheDirectory",
+        [EXEC_DIRECTORY_LOGS]          = "LogsDirectory",
         [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
 };
 
@@ -2724,10 +2801,10 @@ DEFINE_STRING_TABLE_LOOKUP(exec_directory_type_mode, ExecDirectoryType);
  * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
  * directories, specifically .timer units with their timestamp touch file. */
 static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
-        [EXEC_DIRECTORY_RUNTIME] = "runtime",
-        [EXEC_DIRECTORY_STATE] = "state",
-        [EXEC_DIRECTORY_CACHE] = "cache",
-        [EXEC_DIRECTORY_LOGS] = "logs",
+        [EXEC_DIRECTORY_RUNTIME]       = "runtime",
+        [EXEC_DIRECTORY_STATE]         = "state",
+        [EXEC_DIRECTORY_CACHE]         = "cache",
+        [EXEC_DIRECTORY_LOGS]          = "logs",
         [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
 };
 
@@ -2736,7 +2813,7 @@ DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
 static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
         [EXEC_KEYRING_INHERIT] = "inherit",
         [EXEC_KEYRING_PRIVATE] = "private",
-        [EXEC_KEYRING_SHARED] = "shared",
+        [EXEC_KEYRING_SHARED]  = "shared",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);
diff --git a/src/core/execute.h b/src/core/execute.h
index 5a6927a..107ae25 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -91,6 +91,7 @@ typedef enum ExecKeyringMode {
 struct ExecStatus {
         dual_timestamp start_timestamp;
         dual_timestamp exit_timestamp;
+        dual_timestamp handoff_timestamp;
         pid_t pid;
         int code;     /* as in siginfo_t::si_code */
         int status;   /* as in siginfo_t::si_status */
@@ -199,7 +200,6 @@ struct ExecContext {
         bool nice_set:1;
         bool ioprio_set:1;
         bool cpu_sched_set:1;
-        bool mount_apivfs_set:1;
 
         /* This is not exposed to the user but available internally. We need it to make sure that whenever we
          * spawn /usr/bin/mount it is run in the same process group as us so that the autofs logic detects
@@ -312,6 +312,7 @@ struct ExecContext {
         ProcSubset proc_subset;    /* subset= */
 
         int private_mounts;
+        int mount_apivfs;
         int memory_ksm;
         bool private_tmp;
         bool private_network;
@@ -326,7 +327,6 @@ struct ExecContext {
         ProtectSystem protect_system;
         ProtectHome protect_home;
         bool protect_hostname;
-        bool mount_apivfs;
 
         bool dynamic_user;
         bool remove_ipc;
@@ -390,22 +390,23 @@ static inline bool exec_context_with_rootfs(const ExecContext *c) {
 }
 
 typedef enum ExecFlags {
-        EXEC_APPLY_SANDBOXING      = 1 << 0,
-        EXEC_APPLY_CHROOT          = 1 << 1,
-        EXEC_APPLY_TTY_STDIN       = 1 << 2,
-        EXEC_PASS_LOG_UNIT         = 1 << 3, /* Whether to pass the unit name to the service's journal stream connection */
-        EXEC_CHOWN_DIRECTORIES     = 1 << 4, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
-        EXEC_NSS_DYNAMIC_BYPASS    = 1 << 5, /* Set the SYSTEMD_NSS_DYNAMIC_BYPASS environment variable, to disable nss-systemd blocking on PID 1, for use by dbus-daemon */
-        EXEC_CGROUP_DELEGATE       = 1 << 6,
-        EXEC_IS_CONTROL            = 1 << 7,
-        EXEC_CONTROL_CGROUP        = 1 << 8, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
-        EXEC_WRITE_CREDENTIALS     = 1 << 9, /* Set up the credential store logic */
+        EXEC_APPLY_SANDBOXING        = 1 << 0,
+        EXEC_APPLY_CHROOT            = 1 << 1,
+        EXEC_APPLY_TTY_STDIN         = 1 << 2,
+        EXEC_PASS_LOG_UNIT           = 1 << 3,  /* Whether to pass the unit name to the service's journal stream connection */
+        EXEC_CHOWN_DIRECTORIES       = 1 << 4,  /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
+        EXEC_NSS_DYNAMIC_BYPASS      = 1 << 5,  /* Set the SYSTEMD_NSS_DYNAMIC_BYPASS environment variable, to disable nss-systemd blocking on PID 1, for use by dbus-daemon */
+        EXEC_CGROUP_DELEGATE         = 1 << 6,
+        EXEC_IS_CONTROL              = 1 << 7,
+        EXEC_CONTROL_CGROUP          = 1 << 8,  /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
+        EXEC_SETUP_CREDENTIALS       = 1 << 9,  /* Set up the credential store logic */
+        EXEC_SETUP_CREDENTIALS_FRESH = 1 << 10, /* Set up a new credential store (disable reuse) */
 
         /* The following are not used by execute.c, but by consumers internally */
-        EXEC_PASS_FDS              = 1 << 10,
-        EXEC_SETENV_RESULT         = 1 << 11,
-        EXEC_SET_WATCHDOG          = 1 << 12,
-        EXEC_SETENV_MONITOR_RESULT = 1 << 13, /* Pass exit status to OnFailure= and OnSuccess= dependencies. */
+        EXEC_PASS_FDS                = 1 << 11,
+        EXEC_SETENV_RESULT           = 1 << 12,
+        EXEC_SET_WATCHDOG            = 1 << 13,
+        EXEC_SETENV_MONITOR_RESULT   = 1 << 14, /* Pass exit status to OnFailure= and OnSuccess= dependencies. */
 } ExecFlags;
 
 /* Parameters for a specific invocation of a command. This structure is put together right before a command is
@@ -442,7 +443,7 @@ struct ExecParameters {
         int stdout_fd;
         int stderr_fd;
 
-        /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
+        /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done. */
         int exec_fd;
 
         char *notify_socket;
@@ -453,7 +454,9 @@ struct ExecParameters {
 
         char **files_env;
         int user_lookup_fd;
-        int bpf_outer_map_fd;
+        int handoff_timestamp_fd;
+
+        int bpf_restrict_fs_map_fd;
 
         /* Used for logging in the executor functions */
         char *unit_id;
@@ -461,34 +464,40 @@ struct ExecParameters {
         char invocation_id_string[SD_ID128_STRING_MAX];
 };
 
-#define EXEC_PARAMETERS_INIT(_flags)        \
-        (ExecParameters) {                  \
-                .flags = (_flags),          \
-                .stdin_fd         = -EBADF, \
-                .stdout_fd        = -EBADF, \
-                .stderr_fd        = -EBADF, \
-                .exec_fd          = -EBADF, \
-                .bpf_outer_map_fd = -EBADF, \
-                .user_lookup_fd   = -EBADF, \
-        };
+#define EXEC_PARAMETERS_INIT(_flags)              \
+        (ExecParameters) {                        \
+                .flags = (_flags),                \
+                .stdin_fd               = -EBADF, \
+                .stdout_fd              = -EBADF, \
+                .stderr_fd              = -EBADF, \
+                .exec_fd                = -EBADF, \
+                .bpf_restrict_fs_map_fd = -EBADF, \
+                .user_lookup_fd         = -EBADF, \
+                .handoff_timestamp_fd   = -EBADF, \
+        }
 
 #include "unit.h"
 #include "dynamic-user.h"
 
-int exec_spawn(Unit *unit,
-               ExecCommand *command,
-               const ExecContext *context,
-               ExecParameters *exec_params,
-               ExecRuntime *runtime,
-               const CGroupContext *cgroup_context,
-               pid_t *ret);
+int exec_spawn(
+                Unit *unit,
+                ExecCommand *command,
+                const ExecContext *context,
+                ExecParameters *exec_params,
+                ExecRuntime *runtime,
+                const CGroupContext *cgroup_context,
+                PidRef *ret);
 
 void exec_command_done(ExecCommand *c);
 void exec_command_done_array(ExecCommand *c, size_t n);
+ExecCommand* exec_command_free(ExecCommand *c);
+DEFINE_TRIVIAL_CLEANUP_FUNC(ExecCommand*, exec_command_free);
 ExecCommand* exec_command_free_list(ExecCommand *c);
 void exec_command_free_array(ExecCommand **c, size_t n);
 void exec_command_reset_status_array(ExecCommand *c, size_t n);
 void exec_command_reset_status_list_array(ExecCommand **c, size_t n);
+
+void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix);
 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix);
 void exec_command_append_list(ExecCommand **l, ExecCommand *e);
 int exec_command_set(ExecCommand *c, const char *path, ...) _sentinel_;
@@ -527,14 +536,16 @@ int exec_context_get_nice(const ExecContext *c);
 int exec_context_get_cpu_sched_policy(const ExecContext *c);
 int exec_context_get_cpu_sched_priority(const ExecContext *c);
 uint64_t exec_context_get_timer_slack_nsec(const ExecContext *c);
+bool exec_context_get_set_login_environment(const ExecContext *c);
 char** exec_context_get_syscall_filter(const ExecContext *c);
 char** exec_context_get_syscall_archs(const ExecContext *c);
 char** exec_context_get_syscall_log(const ExecContext *c);
 char** exec_context_get_address_families(const ExecContext *c);
 char** exec_context_get_restrict_filesystems(const ExecContext *c);
 
-void exec_status_start(ExecStatus *s, pid_t pid);
+void exec_status_start(ExecStatus *s, pid_t pid, const dual_timestamp *ts);
 void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
+void exec_status_handoff(ExecStatus *s, const struct ucred *ucred, const dual_timestamp *ts);
 void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix);
 void exec_status_reset(ExecStatus *s);
 
@@ -613,23 +624,23 @@ bool exec_needs_ipc_namespace(const ExecContext *context);
 #define LOG_EXEC_INVOCATION_ID_FIELD_FORMAT(ep) \
         ((ep)->runtime_scope == RUNTIME_SCOPE_USER ? "USER_INVOCATION_ID=%s" : "INVOCATION_ID=%s")
 
-#define log_exec_full_errno_zerook(ec, ep, level, error, ...)             \
-        ({                                                                \
-                const ExecContext *_c = (ec);                             \
-                const ExecParameters *_p = (ep);                          \
-                const int _l = (level);                                   \
-                bool _do_log = !(log_get_max_level() < LOG_PRI(_l) ||     \
-                        !(_c->log_level_max < 0 ||                        \
-                        _c->log_level_max >= LOG_PRI(_l)));               \
-                LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields,                \
-                                     _c->n_log_extra_fields);             \
-                !_do_log ? -ERRNO_VALUE(error) :                          \
-                        log_object_internal(_l, error, PROJECT_FILE,      \
-                        __LINE__, __func__,                               \
-                        LOG_EXEC_ID_FIELD(_p),                            \
-                        _p->unit_id,                                      \
-                        LOG_EXEC_INVOCATION_ID_FIELD(_p),                 \
-                        _p->invocation_id_string, ##__VA_ARGS__);         \
+#define log_exec_full_errno_zerook(ec, ep, level, error, ...)                     \
+        ({                                                                        \
+                const ExecContext *_c = (ec);                                     \
+                const ExecParameters *_p = (ep);                                  \
+                const int _l = (level);                                           \
+                bool _do_log = _c->log_level_max < 0 ||                           \
+                               _c->log_level_max >= LOG_PRI(_l);                  \
+                LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields,                        \
+                                     _c->n_log_extra_fields);                     \
+                !_do_log ? -ERRNO_VALUE(error) :                                  \
+                        log_object_internal(_l, error,                            \
+                                            PROJECT_FILE, __LINE__, __func__,     \
+                                            LOG_EXEC_ID_FIELD(_p),                \
+                                            _p->unit_id,                          \
+                                            LOG_EXEC_INVOCATION_ID_FIELD(_p),     \
+                                            _p->invocation_id_string,             \
+                                            ##__VA_ARGS__);                       \
         })
 
 #define log_exec_full_errno(ec, ep, level, error, ...)                            \
@@ -653,48 +664,34 @@ bool exec_needs_ipc_namespace(const ExecContext *context);
 #define log_exec_warning_errno(ec, ep, error, ...) log_exec_full_errno(ec, ep, LOG_WARNING, error, __VA_ARGS__)
 #define log_exec_error_errno(ec, ep, error, ...)   log_exec_full_errno(ec, ep, LOG_ERR, error, __VA_ARGS__)
 
-#define log_exec_struct_errno(ec, ep, level, error, ...)                                                      \
-        ({                                                                                                    \
-                const ExecContext *_c = (ec);                                                                 \
-                const ExecParameters *_p = (ep);                                                              \
-                const int _l = (level);                                                                       \
-                bool _do_log = !(_c->log_level_max < 0 ||                                                     \
-                                 _c->log_level_max >= LOG_PRI(_l));                                           \
-                LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields,                                                    \
-                                     _c->n_log_extra_fields);                                                 \
-                _do_log ?                                                                                     \
-                        log_struct_errno(_l, error, __VA_ARGS__, LOG_EXEC_ID_FIELD_FORMAT(_p), _p->unit_id) : \
-                        -ERRNO_VALUE(error);                            \
-        })
-
-#define log_exec_struct(ec, ep, level, ...) log_exec_struct_errno(ec, ep, level, 0, __VA_ARGS__)
-
-#define log_exec_struct_iovec_errno(ec, ep, level, error, iovec, n_iovec)   \
-        ({                                                                  \
-                const ExecContext *_c = (ec);                               \
-                const ExecParameters *_p = (ep);                            \
-                const int _l = (level);                                     \
-                bool _do_log = !(_c->log_level_max < 0 ||                   \
-                                 _c->log_level_max >= LOG_PRI(_l));         \
-                LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields,                  \
-                                     _c->n_log_extra_fields);               \
-                _do_log ?                                                   \
-                        log_struct_iovec_errno(_l, error, iovec, n_iovec) : \
-                        -ERRNO_VALUE(error);                                \
-        })
-
-#define log_exec_struct_iovec(ec, ep, level, iovec, n_iovec) log_exec_struct_iovec_errno(ec, ep, level, 0, iovec, n_iovec)
-
 /* Like LOG_MESSAGE(), but with the unit name prefixed. */
 #define LOG_EXEC_MESSAGE(ep, fmt, ...) LOG_MESSAGE("%s: " fmt, (ep)->unit_id, ##__VA_ARGS__)
 #define LOG_EXEC_ID(ep) LOG_EXEC_ID_FIELD_FORMAT(ep), (ep)->unit_id
 #define LOG_EXEC_INVOCATION_ID(ep) LOG_EXEC_INVOCATION_ID_FIELD_FORMAT(ep), (ep)->invocation_id_string
 
-#define _LOG_CONTEXT_PUSH_EXEC(ec, ep, p, c)                                                  \
-        const ExecContext *c = (ec);                                                          \
-        const ExecParameters *p = (ep);                                                       \
+#define log_exec_struct_errno(ec, ep, level, error, ...)                          \
+        ({                                                                        \
+                const ExecContext *_c = (ec);                                     \
+                const ExecParameters *_p = (ep);                                  \
+                const int _l = (level);                                           \
+                bool _do_log = _c->log_level_max < 0 ||                           \
+                               _c->log_level_max >= LOG_PRI(_l);                  \
+                LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields,                        \
+                                     _c->n_log_extra_fields);                     \
+                !_do_log ? -ERRNO_VALUE(error) :                                  \
+                        log_struct_errno(_l, error,                               \
+                                         LOG_EXEC_ID(_p),                         \
+                                         LOG_EXEC_INVOCATION_ID(_p),              \
+                                         __VA_ARGS__);                            \
+        })
+
+#define log_exec_struct(ec, ep, level, ...) log_exec_struct_errno(ec, ep, level, 0, __VA_ARGS__)
+
+#define _LOG_CONTEXT_PUSH_EXEC(ec, ep, p, c)                                                       \
+        const ExecContext *c = (ec);                                                               \
+        const ExecParameters *p = (ep);                                                            \
         LOG_CONTEXT_PUSH_KEY_VALUE(LOG_EXEC_ID_FIELD(p), p->unit_id);                              \
-        LOG_CONTEXT_PUSH_KEY_VALUE(LOG_EXEC_INVOCATION_ID_FIELD(p), p->invocation_id_string); \
+        LOG_CONTEXT_PUSH_KEY_VALUE(LOG_EXEC_INVOCATION_ID_FIELD(p), p->invocation_id_string);      \
         LOG_CONTEXT_PUSH_IOV(c->log_extra_fields, c->n_log_extra_fields)
 
 #define LOG_CONTEXT_PUSH_EXEC(ec, ep) \
diff --git a/src/core/executor.c b/src/core/executor.c
index b2716ef..bd0c742 100644
--- a/src/core/executor.c
+++ b/src/core/executor.c
@@ -245,12 +245,13 @@ static int run(int argc, char *argv[]) {
 
                 log_exec_struct_errno(&context, &params, LOG_ERR, r,
                                       "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
-                                      LOG_EXEC_INVOCATION_ID(&params),
                                       LOG_EXEC_MESSAGE(&params, "Failed at step %s spawning %s: %m",
                                                        status, command.path),
                                       "EXECUTABLE=%s", command.path);
         } else
-                assert(exit_status == EXIT_SUCCESS); /* When 'skip' is chosen in the confirm spawn prompt */
+                /* r == 0: 'skip' is chosen in the confirm spawn prompt
+                 * r > 0:  expected/ignored failure, do not log at error level */
+                assert((r == 0) == (exit_status == EXIT_SUCCESS));
 
         return exit_status;
 }
diff --git a/src/core/fuzz-execute-serialize.c b/src/core/fuzz-execute-serialize.c
index 6069efd..5b2dc95 100644
--- a/src/core/fuzz-execute-serialize.c
+++ b/src/core/fuzz-execute-serialize.c
@@ -56,7 +56,7 @@ static void exec_fuzz_one(FILE *f, FDSet *fdset) {
         params.stderr_fd = -EBADF;
         params.exec_fd = -EBADF;
         params.user_lookup_fd = -EBADF;
-        params.bpf_outer_map_fd = -EBADF;
+        params.bpf_restrict_fs_map_fd = -EBADF;
         if (!params.fds)
                 params.n_socket_fds = params.n_storage_fds = 0;
         for (size_t i = 0; params.fds && i < params.n_socket_fds + params.n_storage_fds; i++)
diff --git a/src/core/generator-setup.c b/src/core/generator-setup.c
index 00d6ad6..b16211e 100644
--- a/src/core/generator-setup.c
+++ b/src/core/generator-setup.c
@@ -8,7 +8,7 @@
 #include "rm-rf.h"
 
 int lookup_paths_mkdir_generator(LookupPaths *p) {
-        int r, q;
+        int r;
 
         assert(p);
 
@@ -16,14 +16,8 @@ int lookup_paths_mkdir_generator(LookupPaths *p) {
                 return -EINVAL;
 
         r = mkdir_p_label(p->generator, 0755);
-
-        q = mkdir_p_label(p->generator_early, 0755);
-        if (q < 0 && r >= 0)
-                r = q;
-
-        q = mkdir_p_label(p->generator_late, 0755);
-        if (q < 0 && r >= 0)
-                r = q;
+        RET_GATHER(r, mkdir_p_label(p->generator_early, 0755));
+        RET_GATHER(r, mkdir_p_label(p->generator_late, 0755));
 
         return r;
 }
diff --git a/src/core/import-creds.c b/src/core/import-creds.c
index 48f3160..f27ffed 100644
--- a/src/core/import-creds.c
+++ b/src/core/import-creds.c
@@ -80,7 +80,7 @@ static int acquire_credential_directory(ImportCredentialContext *c, const char *
         if (c->target_dir_fd >= 0)
                 return c->target_dir_fd;
 
-        r = path_is_mount_point(path, NULL, 0);
+        r = path_is_mount_point(path);
         if (r < 0) {
                 if (r != -ENOENT)
                         return log_error_errno(r, "Failed to determine if %s is a mount point: %m", path);
@@ -314,7 +314,7 @@ static int proc_cmdline_callback(const char *key, const char *value, void *data)
         colon++;
 
         if (base64) {
-                r = unbase64mem(colon, SIZE_MAX, &binary, &l);
+                r = unbase64mem(colon, &binary, &l);
                 if (r < 0) {
                         log_warning_errno(r, "Failed to decode binary credential '%s' data, ignoring: %m", n);
                         return 0;
@@ -519,13 +519,13 @@ static int parse_smbios_strings(ImportCredentialContext *c, const char *data, si
                         return log_oom();
 
                 if (!credential_name_valid(cn)) {
-                        log_warning("SMBIOS credential name '%s' is not valid, ignoring: %m", cn);
+                        log_warning("SMBIOS credential name '%s' is not valid, ignoring.", cn);
                         continue;
                 }
 
                 /* Optionally base64 decode the data, if requested, to allow binary credentials */
                 if (unbase64) {
-                        r = unbase64mem(eq + 1, nul - (eq + 1), &buf, &buflen);
+                        r = unbase64mem_full(eq + 1, nul - (eq + 1), /* secure = */ false, &buf, &buflen);
                         if (r < 0) {
                                 log_warning_errno(r, "Failed to base64 decode credential '%s', ignoring: %m", cn);
                                 continue;
@@ -753,7 +753,7 @@ static int merge_credentials_trusted(const char *creds_dir) {
                 return 0;
 
         /* Do not try to merge initrd credentials into foreign credentials directories */
-        if (!path_equal_ptr(creds_dir, SYSTEM_CREDENTIALS_DIRECTORY)) {
+        if (!path_equal(creds_dir, SYSTEM_CREDENTIALS_DIRECTORY)) {
                 log_debug("Not importing initrd credentials, as foreign $CREDENTIALS_DIRECTORY has been set.");
                 return 0;
         }
@@ -815,7 +815,6 @@ static int setenv_notify_socket(void) {
 
 static int report_credentials_per_func(const char *title, int (*get_directory_func)(const char **ret)) {
         _cleanup_free_ DirectoryEntries *de = NULL;
-        _cleanup_close_ int dir_fd = -EBADF;
         _cleanup_free_ char *ll = NULL;
         const char *d = NULL;
         int r, c = 0;
@@ -831,11 +830,7 @@ static int report_credentials_per_func(const char *title, int (*get_directory_fu
                 return log_warning_errno(r, "Failed to determine %s directory: %m", title);
         }
 
-        dir_fd = open(d, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
-        if (dir_fd < 0)
-                return log_warning_errno(errno, "Failed to open credentials directory %s: %m", d);
-
-        r = readdir_all(dir_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
+        r = readdir_all_at(AT_FDCWD, d, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
         if (r < 0)
                 return log_warning_errno(r, "Failed to enumerate credentials directory %s: %m", d);
 
diff --git a/src/core/job.c b/src/core/job.c
index e78c2a7..2f19468 100644
--- a/src/core/job.c
+++ b/src/core/job.c
@@ -133,6 +133,7 @@ Job* job_free(Job *j) {
 
 static void job_set_state(Job *j, JobState state) {
         assert(j);
+        assert(j->manager);
         assert(state >= 0);
         assert(state < _JOB_STATE_MAX);
 
@@ -145,15 +146,15 @@ static void job_set_state(Job *j, JobState state) {
                 return;
 
         if (j->state == JOB_RUNNING)
-                j->unit->manager->n_running_jobs++;
+                j->manager->n_running_jobs++;
         else {
                 assert(j->state == JOB_WAITING);
-                assert(j->unit->manager->n_running_jobs > 0);
+                assert(j->manager->n_running_jobs > 0);
 
-                j->unit->manager->n_running_jobs--;
+                j->manager->n_running_jobs--;
 
-                if (j->unit->manager->n_running_jobs <= 0)
-                        j->unit->manager->jobs_in_progress_event_source = sd_event_source_disable_unref(j->unit->manager->jobs_in_progress_event_source);
+                if (j->manager->n_running_jobs <= 0)
+                        j->manager->jobs_in_progress_event_source = sd_event_source_disable_unref(j->manager->jobs_in_progress_event_source);
         }
 }
 
@@ -281,6 +282,8 @@ int job_install_deserialized(Job *j) {
         Job **pj;
         int r;
 
+        assert(j);
+        assert(j->manager);
         assert(!j->installed);
 
         if (j->type < 0 || j->type >= _JOB_TYPE_MAX_IN_TRANSACTION)
@@ -307,7 +310,7 @@ int job_install_deserialized(Job *j) {
         j->installed = true;
 
         if (j->state == JOB_RUNNING)
-                j->unit->manager->n_running_jobs++;
+                j->manager->n_running_jobs++;
 
         log_unit_debug(j->unit,
                        "Reinstalled deserialized job %s/%s as %u",
@@ -633,16 +636,19 @@ static const char* job_done_message_format(Unit *u, JobType t, JobResult result)
                 [JOB_UNSUPPORTED] = "Starting of %s unsupported.",
                 [JOB_COLLECTED]   = "Unnecessary job was removed for %s.",
                 [JOB_ONCE]        = "Unit %s has been started before and cannot be started again.",
+                [JOB_FROZEN]      = "Cannot start frozen unit %s.",
         };
         static const char* const generic_finished_stop_job[_JOB_RESULT_MAX] = {
                 [JOB_DONE]        = "Stopped %s.",
                 [JOB_FAILED]      = "Stopped %s with error.",
                 [JOB_TIMEOUT]     = "Timed out stopping %s.",
+                [JOB_FROZEN]      = "Cannot stop frozen unit %s.",
         };
         static const char* const generic_finished_reload_job[_JOB_RESULT_MAX] = {
                 [JOB_DONE]        = "Reloaded %s.",
                 [JOB_FAILED]      = "Reload failed for %s.",
                 [JOB_TIMEOUT]     = "Timed out reloading %s.",
+                [JOB_FROZEN]      = "Cannot reload frozen unit %s.",
         };
         /* When verify-active detects the unit is inactive, report it.
          * Most likely a DEPEND warning from a requisiting unit will
@@ -704,6 +710,7 @@ static const struct {
         [JOB_UNSUPPORTED] = { LOG_WARNING, ANSI_HIGHLIGHT_YELLOW, "UNSUPP" },
         [JOB_COLLECTED]   = { LOG_INFO,                                    },
         [JOB_ONCE]        = { LOG_ERR,     ANSI_HIGHLIGHT_RED,    " ONCE " },
+        [JOB_FROZEN]      = { LOG_ERR,     ANSI_HIGHLIGHT_RED,    "FROZEN" },
 };
 
 static const char* job_done_mid(JobType type, JobResult result) {
@@ -954,6 +961,8 @@ int job_run_and_invalidate(Job *j) {
                         r = job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false);
                 else if (r == -ESTALE)
                         r = job_finish_and_invalidate(j, JOB_ONCE, true, false);
+                else if (r == -EDEADLK)
+                        r = job_finish_and_invalidate(j, JOB_FROZEN, true, false);
                 else if (r < 0)
                         r = job_finish_and_invalidate(j, JOB_FAILED, true, false);
         }
@@ -1011,7 +1020,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
                 goto finish;
         }
 
-        if (IN_SET(result, JOB_FAILED, JOB_INVALID))
+        if (IN_SET(result, JOB_FAILED, JOB_INVALID, JOB_FROZEN))
                 j->manager->n_failed_jobs++;
 
         job_uninstall(j);
@@ -1369,6 +1378,7 @@ int job_coldplug(Job *j) {
 
 void job_shutdown_magic(Job *j) {
         assert(j);
+        assert(j->manager);
 
         /* The shutdown target gets some special treatment here: we
          * tell the kernel to begin with flushing its disk caches, to
@@ -1381,16 +1391,19 @@ void job_shutdown_magic(Job *j) {
         if (j->type != JOB_START)
                 return;
 
-        if (!MANAGER_IS_SYSTEM(j->unit->manager))
+        if (!unit_has_name(j->unit, SPECIAL_SHUTDOWN_TARGET))
                 return;
 
-        if (!unit_has_name(j->unit, SPECIAL_SHUTDOWN_TARGET))
+        /* This is the very beginning of the shutdown phase, so take the timestamp here */
+        dual_timestamp_now(j->manager->timestamps + MANAGER_TIMESTAMP_SHUTDOWN_START);
+
+        if (!MANAGER_IS_SYSTEM(j->manager))
                 return;
 
         /* In case messages on console has been disabled on boot */
-        j->unit->manager->no_console_output = false;
+        j->manager->no_console_output = false;
 
-        manager_invalidate_startup_units(j->unit->manager);
+        manager_invalidate_startup_units(j->manager);
 
         if (detect_container() > 0)
                 return;
@@ -1430,6 +1443,7 @@ bool job_may_gc(Job *j) {
         Unit *other;
 
         assert(j);
+        assert(j->manager);
 
         /* Checks whether this job should be GC'ed away. We only do this for jobs of units that have no effect on their
          * own and just track external state. For now the only unit type that qualifies for this are .device units.
@@ -1450,7 +1464,7 @@ bool job_may_gc(Job *j) {
          * referenced by one, and reset this whenever we notice that no private bus connections are around. This means
          * the GC is a bit too conservative when it comes to jobs created by private bus connections. */
         if (j->ref_by_private_bus) {
-                if (set_isempty(j->unit->manager->private_buses))
+                if (set_isempty(j->manager->private_buses))
                         j->ref_by_private_bus = false;
                 else
                         return false;
@@ -1473,6 +1487,7 @@ bool job_may_gc(Job *j) {
 
 void job_add_to_gc_queue(Job *j) {
         assert(j);
+        assert(j->manager);
 
         if (j->in_gc_queue)
                 return;
@@ -1480,7 +1495,7 @@ void job_add_to_gc_queue(Job *j) {
         if (!job_may_gc(j))
                 return;
 
-        LIST_PREPEND(gc_queue, j->unit->manager->gc_job_queue, j);
+        LIST_PREPEND(gc_queue, j->manager->gc_job_queue, j);
         j->in_gc_queue = true;
 }
 
@@ -1645,6 +1660,7 @@ static const char* const job_result_table[_JOB_RESULT_MAX] = {
         [JOB_UNSUPPORTED] = "unsupported",
         [JOB_COLLECTED]   = "collected",
         [JOB_ONCE]        = "once",
+        [JOB_FROZEN]      = "frozen",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult);
diff --git a/src/core/job.h b/src/core/job.h
index 891d87a..8318b52 100644
--- a/src/core/job.h
+++ b/src/core/job.h
@@ -96,6 +96,7 @@ enum JobResult {
         JOB_UNSUPPORTED,         /* Couldn't start a unit, because the unit type is not supported on the system */
         JOB_COLLECTED,           /* Job was garbage collected, since nothing needed it anymore */
         JOB_ONCE,                /* Unit was started before, and hence can't be started again */
+        JOB_FROZEN,              /* Unit is currently frozen, so we can't safely operate on it */
         _JOB_RESULT_MAX,
         _JOB_RESULT_INVALID = -EINVAL,
 };
diff --git a/src/core/kmod-setup.c b/src/core/kmod-setup.c
index b8e3f7a..c39b136 100644
--- a/src/core/kmod-setup.c
+++ b/src/core/kmod-setup.c
@@ -9,28 +9,13 @@
 #include "fileio.h"
 #include "kmod-setup.h"
 #include "macro.h"
+#include "module-util.h"
 #include "recurse-dir.h"
 #include "string-util.h"
 #include "strv.h"
 #include "virt.h"
 
 #if HAVE_KMOD
-#include "module-util.h"
-
-static void systemd_kmod_log(
-                void *data,
-                int priority,
-                const char *file, int line,
-                const char *fn,
-                const char *format,
-                va_list args) {
-
-        /* library logging is enabled at debug only */
-        DISABLE_WARNING_FORMAT_NONLITERAL;
-        log_internalv(LOG_DEBUG, 0, file, line, fn, format, args);
-        REENABLE_WARNING;
-}
-
 static int match_modalias_recurse_dir_cb(
                 RecurseDirEvent event,
                 const char *path,
@@ -113,12 +98,11 @@ static bool in_qemu(void) {
 
 int kmod_setup(void) {
 #if HAVE_KMOD
-
         static const struct {
                 const char *module;
                 const char *path;
-                bool warn_if_unavailable:1;
-                bool warn_if_module:1;
+                bool warn_if_unavailable;
+                bool warn_if_module;
                 bool (*condition_fn)(void);
         } kmod_table[] = {
                 /* This one we need to load explicitly, since auto-loading on use doesn't work
@@ -166,34 +150,32 @@ int kmod_setup(void) {
                 { "tpm",                        "/sys/class/tpmrm",          false, false, efi_has_tpm2       },
 #endif
         };
-        _cleanup_(kmod_unrefp) struct kmod_ctx *ctx = NULL;
-        unsigned i;
+
+        int r;
 
         if (have_effective_cap(CAP_SYS_MODULE) <= 0)
                 return 0;
 
-        for (i = 0; i < ELEMENTSOF(kmod_table); i++) {
-                if (kmod_table[i].path && access(kmod_table[i].path, F_OK) >= 0)
+        _cleanup_(sym_kmod_unrefp) struct kmod_ctx *ctx = NULL;
+        FOREACH_ELEMENT(kmod, kmod_table) {
+                if (kmod->path && access(kmod->path, F_OK) >= 0)
                         continue;
 
-                if (kmod_table[i].condition_fn && !kmod_table[i].condition_fn())
+                if (kmod->condition_fn && !kmod->condition_fn())
                         continue;
 
-                if (kmod_table[i].warn_if_module)
+                if (kmod->warn_if_module)
                         log_debug("Your kernel apparently lacks built-in %s support. Might be "
                                   "a good idea to compile it in. We'll now try to work around "
-                                  "this by loading the module...", kmod_table[i].module);
+                                  "this by loading the module...", kmod->module);
 
                 if (!ctx) {
-                        ctx = kmod_new(NULL, NULL);
-                        if (!ctx)
-                                return log_oom();
-
-                        kmod_set_log_fn(ctx, systemd_kmod_log, NULL);
-                        kmod_load_resources(ctx);
+                        r = module_setup_context(&ctx);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to initialize kmod context: %m");
                 }
 
-                (void) module_load_and_warn(ctx, kmod_table[i].module, kmod_table[i].warn_if_unavailable);
+                (void) module_load_and_warn(ctx, kmod->module, kmod->warn_if_unavailable);
         }
 
 #endif
diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in
index 45f9ab0..df219d8 100644
--- a/src/core/load-fragment-gperf.gperf.in
+++ b/src/core/load-fragment-gperf.gperf.in
@@ -136,7 +136,7 @@
 {{type}}.ProtectSystem,                    config_parse_protect_system,                 0,                                  offsetof({{type}}, exec_context.protect_system)
 {{type}}.ProtectHome,                      config_parse_protect_home,                   0,                                  offsetof({{type}}, exec_context.protect_home)
 {{type}}.MountFlags,                       config_parse_exec_mount_propagation_flag,    0,                                  offsetof({{type}}, exec_context.mount_propagation_flag)
-{{type}}.MountAPIVFS,                      config_parse_exec_mount_apivfs,              0,                                  offsetof({{type}}, exec_context)
+{{type}}.MountAPIVFS,                      config_parse_tristate,                       0,                                  offsetof({{type}}, exec_context.mount_apivfs)
 {{type}}.Personality,                      config_parse_personality,                    0,                                  offsetof({{type}}, exec_context.personality)
 {{type}}.RuntimeDirectoryPreserve,         config_parse_exec_preserve_mode,             0,                                  offsetof({{type}}, exec_context.runtime_directory_preserve_mode)
 {{type}}.RuntimeDirectoryMode,             config_parse_mode,                           0,                                  offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode)
@@ -220,6 +220,7 @@
 {{type}}.StartupMemorySwapMax,             config_parse_memory_limit,                   0,                                  offsetof({{type}}, cgroup_context)
 {{type}}.MemoryZSwapMax,                   config_parse_memory_limit,                   0,                                  offsetof({{type}}, cgroup_context)
 {{type}}.StartupMemoryZSwapMax,            config_parse_memory_limit,                   0,                                  offsetof({{type}}, cgroup_context)
+{{type}}.MemoryZSwapWriteback,             config_parse_bool,                           0,                                  offsetof({{type}}, cgroup_context.memory_zswap_writeback)
 {{type}}.MemoryLimit,                      config_parse_memory_limit,                   0,                                  offsetof({{type}}, cgroup_context)
 {{type}}.DeviceAllow,                      config_parse_device_allow,                   0,                                  offsetof({{type}}, cgroup_context)
 {{type}}.DevicePolicy,                     config_parse_device_policy,                  0,                                  offsetof({{type}}, cgroup_context.device_policy)
@@ -309,7 +310,8 @@ Unit.PartOf,                             config_parse_unit_deps,
 Unit.JoinsNamespaceOf,                   config_parse_unit_deps,                      UNIT_JOINS_NAMESPACE_OF,            0
 Unit.RequiresOverridable,                config_parse_obsolete_unit_deps,             UNIT_REQUIRES,                      0
 Unit.RequisiteOverridable,               config_parse_obsolete_unit_deps,             UNIT_REQUISITE,                     0
-Unit.RequiresMountsFor,                  config_parse_unit_requires_mounts_for,       0,                                  0
+Unit.RequiresMountsFor,                  config_parse_unit_mounts_for,                0,                                  0
+Unit.WantsMountsFor,                     config_parse_unit_mounts_for,                0,                                  0
 Unit.StopWhenUnneeded,                   config_parse_bool,                           0,                                  offsetof(Unit, stop_when_unneeded)
 Unit.RefuseManualStart,                  config_parse_bool,                           0,                                  offsetof(Unit, refuse_manual_start)
 Unit.RefuseManualStop,                   config_parse_bool,                           0,                                  offsetof(Unit, refuse_manual_stop)
@@ -325,7 +327,7 @@ Unit.IgnoreOnSnapshot,                   config_parse_warn_compat,
 Unit.JobTimeoutSec,                      config_parse_job_timeout_sec,                0,                                  0
 Unit.JobRunningTimeoutSec,               config_parse_job_running_timeout_sec,        0,                                  0
 Unit.JobTimeoutAction,                   config_parse_emergency_action,               0,                                  offsetof(Unit, job_timeout_action)
-Unit.JobTimeoutRebootArgument,           config_parse_unit_string_printf,             0,                                  offsetof(Unit, job_timeout_reboot_arg)
+Unit.JobTimeoutRebootArgument,           config_parse_reboot_parameter,               0,                                  offsetof(Unit, job_timeout_reboot_arg)
 Unit.StartLimitIntervalSec,              config_parse_sec,                            0,                                  offsetof(Unit, start_ratelimit.interval)
 {# The following is a legacy alias name for compatibility #}
 Unit.StartLimitInterval,                 config_parse_sec,                            0,                                  offsetof(Unit, start_ratelimit.interval)
@@ -335,7 +337,7 @@ Unit.FailureAction,                      config_parse_emergency_action,
 Unit.SuccessAction,                      config_parse_emergency_action,               0,                                  offsetof(Unit, success_action)
 Unit.FailureActionExitStatus,            config_parse_exit_status,                    0,                                  offsetof(Unit, failure_action_exit_status)
 Unit.SuccessActionExitStatus,            config_parse_exit_status,                    0,                                  offsetof(Unit, success_action_exit_status)
-Unit.RebootArgument,                     config_parse_unit_string_printf,             0,                                  offsetof(Unit, reboot_arg)
+Unit.RebootArgument,                     config_parse_reboot_parameter,               0,                                  offsetof(Unit, reboot_arg)
 Unit.ConditionPathExists,                config_parse_unit_condition_path,            CONDITION_PATH_EXISTS,              offsetof(Unit, conditions)
 Unit.ConditionPathExistsGlob,            config_parse_unit_condition_path,            CONDITION_PATH_EXISTS_GLOB,         offsetof(Unit, conditions)
 Unit.ConditionPathIsDirectory,           config_parse_unit_condition_path,            CONDITION_PATH_IS_DIRECTORY,        offsetof(Unit, conditions)
@@ -498,6 +500,7 @@ Socket.FreeBind,                         config_parse_bool,
 Socket.Transparent,                      config_parse_bool,                           0,                                  offsetof(Socket, transparent)
 Socket.Broadcast,                        config_parse_bool,                           0,                                  offsetof(Socket, broadcast)
 Socket.PassCredentials,                  config_parse_bool,                           0,                                  offsetof(Socket, pass_cred)
+Socket.PassFileDescriptorsToExec,        config_parse_bool,                           0,                                  offsetof(Socket, pass_fds_to_exec)
 Socket.PassSecurity,                     config_parse_bool,                           0,                                  offsetof(Socket, pass_sec)
 Socket.PassPacketInfo,                   config_parse_bool,                           0,                                  offsetof(Socket, pass_pktinfo)
 Socket.Timestamping,                     config_parse_socket_timestamping,            0,                                  offsetof(Socket, timestamping)
@@ -530,7 +533,7 @@ Socket.SELinuxContextFromNet,            config_parse_warn_compat,
 {{ EXEC_CONTEXT_CONFIG_ITEMS('Socket') }}
 {{ CGROUP_CONTEXT_CONFIG_ITEMS('Socket') }}
 {{ KILL_CONTEXT_CONFIG_ITEMS('Socket') }}
-Mount.What,                              config_parse_unit_string_printf,             0,                                  offsetof(Mount, parameters_fragment.what)
+Mount.What,                              config_parse_mount_node,                     0,                                  offsetof(Mount, parameters_fragment.what)
 Mount.Where,                             config_parse_unit_path_printf,               0,                                  offsetof(Mount, where)
 Mount.Options,                           config_parse_unit_string_printf,             0,                                  offsetof(Mount, parameters_fragment.options)
 Mount.Type,                              config_parse_unit_string_printf,             0,                                  offsetof(Mount, parameters_fragment.fstype)
@@ -547,7 +550,7 @@ Automount.Where,                         config_parse_unit_path_printf,
 Automount.ExtraOptions,                  config_parse_unit_string_printf,             0,                                  offsetof(Automount, extra_options)
 Automount.DirectoryMode,                 config_parse_mode,                           0,                                  offsetof(Automount, directory_mode)
 Automount.TimeoutIdleSec,                config_parse_sec_fix_0,                      0,                                  offsetof(Automount, timeout_idle_usec)
-Swap.What,                               config_parse_unit_path_printf,               0,                                  offsetof(Swap, parameters_fragment.what)
+Swap.What,                               config_parse_mount_node,                     0,                                  offsetof(Swap, parameters_fragment.what)
 Swap.Priority,                           config_parse_swap_priority,                  0,                                  0
 Swap.Options,                            config_parse_unit_string_printf,             0,                                  offsetof(Swap, parameters_fragment.options)
 Swap.TimeoutSec,                         config_parse_sec_fix_0,                      0,                                  offsetof(Swap, timeout_usec)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 0baf08e..5ae6888 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -16,8 +16,8 @@
 #include "all-units.h"
 #include "alloc-util.h"
 #include "bpf-firewall.h"
-#include "bpf-lsm.h"
 #include "bpf-program.h"
+#include "bpf-restrict-fs.h"
 #include "bpf-socket-bind.h"
 #include "bus-error.h"
 #include "bus-internal.h"
@@ -38,6 +38,7 @@
 #include "fileio.h"
 #include "firewall-util.h"
 #include "fs-util.h"
+#include "fstab-util.h"
 #include "hexdecoct.h"
 #include "iovec-util.h"
 #include "ioprio-util.h"
@@ -56,6 +57,7 @@
 #include "pcre2-util.h"
 #include "percent-util.h"
 #include "process-util.h"
+#include "reboot-util.h"
 #include "seccomp-util.h"
 #include "securebits-util.h"
 #include "selinux-util.h"
@@ -248,7 +250,7 @@ int unit_is_likely_recursive_template_dependency(Unit *u, const char *name, cons
 
         /* Fragment paths should also be equal as a custom fragment for a specific template instance
          * wouldn't necessarily lead to infinite recursion. */
-        if (!path_equal_ptr(u->fragment_path, fragment_path))
+        if (!path_equal(u->fragment_path, fragment_path))
                 return false;
 
         if (!contains_instance_specifier_superset(format))
@@ -361,6 +363,40 @@ int config_parse_unit_string_printf(
         return config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
 }
 
+int config_parse_reboot_parameter(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        _cleanup_free_ char *k = NULL;
+        const Unit *u = ASSERT_PTR(userdata);
+        int r;
+
+        assert(filename);
+        assert(line);
+        assert(rvalue);
+
+        r = unit_full_printf(u, rvalue, &k);
+        if (r < 0) {
+                log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+                return 0;
+        }
+
+        if (!reboot_parameter_is_valid(k)) {
+                log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid reboot parameter '%s', ignoring.", k);
+                return 0;
+        }
+
+        return config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, k, data, userdata);
+}
+
 int config_parse_unit_strv_printf(
                 const char *unit,
                 const char *filename,
@@ -433,8 +469,9 @@ int config_parse_colon_separated_paths(
                 const char *rvalue,
                 void *data,
                 void *userdata) {
+
         char ***sv = ASSERT_PTR(data);
-        const Unit *u = userdata;
+        const Unit *u = ASSERT_PTR(userdata);
         int r;
 
         assert(filename);
@@ -574,17 +611,13 @@ int config_parse_socket_listen(
                 void *data,
                 void *userdata) {
 
+        Socket *s = ASSERT_PTR(SOCKET(data));
         _cleanup_free_ SocketPort *p = NULL;
-        SocketPort *tail;
-        Socket *s;
         int r;
 
         assert(filename);
         assert(lvalue);
         assert(rvalue);
-        assert(data);
-
-        s = SOCKET(data);
 
         if (isempty(rvalue)) {
                 /* An empty assignment removes all ports */
@@ -592,10 +625,15 @@ int config_parse_socket_listen(
                 return 0;
         }
 
-        p = new0(SocketPort, 1);
+        p = new(SocketPort, 1);
         if (!p)
                 return log_oom();
 
+        *p = (SocketPort) {
+                .socket = s,
+                .fd = -EBADF,
+        };
+
         if (ltype != SOCKET_SOCKET) {
                 _cleanup_free_ char *k = NULL;
 
@@ -605,7 +643,11 @@ int config_parse_socket_listen(
                         return 0;
                 }
 
-                r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+                PathSimplifyWarnFlags flags = PATH_CHECK_ABSOLUTE;
+                if (ltype != SOCKET_SPECIAL)
+                        flags |= PATH_CHECK_NON_API_VFS;
+
+                r = path_simplify_and_warn(k, flags, unit, filename, line, lvalue);
                 if (r < 0)
                         return 0;
 
@@ -619,7 +661,7 @@ int config_parse_socket_listen(
                 p->type = ltype;
 
         } else if (streq(lvalue, "ListenNetlink")) {
-                _cleanup_free_ char  *k = NULL;
+                _cleanup_free_ char *k = NULL;
 
                 r = unit_path_printf(UNIT(s), rvalue, &k);
                 if (r < 0) {
@@ -644,7 +686,7 @@ int config_parse_socket_listen(
                         return 0;
                 }
 
-                if (k[0] == '/') { /* Only for AF_UNIX file system sockets… */
+                if (path_is_absolute(k)) { /* Only for AF_UNIX file system sockets… */
                         r = patch_var_run(unit, filename, line, lvalue, &k);
                         if (r < 0)
                                 return r;
@@ -674,16 +716,7 @@ int config_parse_socket_listen(
                 p->type = SOCKET_SOCKET;
         }
 
-        p->fd = -EBADF;
-        p->auxiliary_fds = NULL;
-        p->n_auxiliary_fds = 0;
-        p->socket = s;
-
-        tail = LIST_FIND_TAIL(port, s->ports);
-        LIST_INSERT_AFTER(port, s->ports, tail, p);
-
-        p = NULL;
-
+        LIST_APPEND(port, s->ports, TAKE_PTR(p));
         return 0;
 }
 
@@ -858,9 +891,7 @@ int config_parse_exec(
                 void *userdata) {
 
         ExecCommand **e = ASSERT_PTR(data);
-        const Unit *u = userdata;
-        const char *p;
-        bool semicolon;
+        const Unit *u = ASSERT_PTR(userdata);
         int r;
 
         assert(filename);
@@ -875,15 +906,11 @@ int config_parse_exec(
                 return 0;
         }
 
-        p = rvalue;
+        const char *p = rvalue;
+        bool semicolon;
+
         do {
                 _cleanup_free_ char *path = NULL, *firstword = NULL;
-                ExecCommandFlags flags = 0;
-                bool ignore = false, separate_argv0 = false;
-                _cleanup_free_ ExecCommand *nce = NULL;
-                _cleanup_strv_free_ char **n = NULL;
-                size_t nlen = 0;
-                const char *f;
 
                 semicolon = false;
 
@@ -897,25 +924,30 @@ int config_parse_exec(
                         continue;
                 }
 
-                f = firstword;
-                for (;;) {
-                        /* We accept an absolute path as first argument.  If it's prefixed with - and the path doesn't
-                         * exist, we ignore it instead of erroring out; if it's prefixed with @, we allow overriding of
-                         * argv[0]; if it's prefixed with :, we will not do environment variable substitution;
-                         * if it's prefixed with +, it will be run with full privileges and no sandboxing; if
-                         * it's prefixed with '!' we apply sandboxing, but do not change user/group credentials; if
-                         * it's prefixed with '!!', then we apply user/group credentials if the kernel supports ambient
-                         * capabilities -- if it doesn't we don't apply the credentials themselves, but do apply most
-                         * other sandboxing, with some special exceptions for changing UID.
+                const char *f = firstword;
+                bool ignore, separate_argv0 = false;
+                ExecCommandFlags flags = 0;
+
+                for (;; f++) {
+                        /* We accept an absolute path as first argument. Valid prefixes and their effect:
+                         *
+                         * "-":  Ignore if the path doesn't exist
+                         * "@":  Allow overriding argv[0] (supplied as a separate argument)
+                         * ":":  Disable environment variable substitution
+                         * "+":  Run with full privileges and no sandboxing
+                         * "!":  Apply sandboxing except for user/group credentials
+                         * "!!": Apply user/group credentials if the kernel supports ambient capabilities -
+                         *       if it doesn't we don't apply the credentials themselves, but do apply
+                         *       most other sandboxing, with some special exceptions for changing UID.
                          *
-                         * The idea is that '!!' may be used to write services that can take benefit of systemd's
-                         * UID/GID dropping if the kernel supports ambient creds, but provide an automatic fallback to
-                         * privilege dropping within the daemon if the kernel does not offer that. */
+                         * The idea is that '!!' may be used to write services that can take benefit of
+                         * systemd's UID/GID dropping if the kernel supports ambient creds, but provide
+                         * an automatic fallback to privilege dropping within the daemon if the kernel
+                         * does not offer that. */
 
-                        if (*f == '-' && !(flags & EXEC_COMMAND_IGNORE_FAILURE)) {
+                        if (*f == '-' && !(flags & EXEC_COMMAND_IGNORE_FAILURE))
                                 flags |= EXEC_COMMAND_IGNORE_FAILURE;
-                                ignore = true;
-                        } else if (*f == '@' && !separate_argv0)
+                        else if (*f == '@' && !separate_argv0)
                                 separate_argv0 = true;
                         else if (*f == ':' && !(flags & EXEC_COMMAND_NO_ENV_EXPAND))
                                 flags |= EXEC_COMMAND_NO_ENV_EXPAND;
@@ -928,9 +960,10 @@ int config_parse_exec(
                                 flags |= EXEC_COMMAND_AMBIENT_MAGIC;
                         } else
                                 break;
-                        f++;
                 }
 
+                ignore = FLAGS_SET(flags, EXEC_COMMAND_IGNORE_FAILURE);
+
                 r = unit_path_printf(u, f, &path);
                 if (r < 0) {
                         log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, r,
@@ -940,19 +973,18 @@ int config_parse_exec(
                 }
 
                 if (isempty(path)) {
-                        /* First word is either "-" or "@" with no command. */
                         log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
-                                   "Empty path in command line%s: '%s'",
+                                   "Empty path in command line%s: %s",
                                    ignore ? ", ignoring" : "", rvalue);
                         return ignore ? 0 : -ENOEXEC;
                 }
                 if (!string_is_safe(path)) {
                         log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
-                                   "Executable name contains special characters%s: %s",
+                                   "Executable path contains special characters%s: %s",
                                    ignore ? ", ignoring" : "", path);
                         return ignore ? 0 : -ENOEXEC;
                 }
-                if (endswith(path, "/")) {
+                if (path_implies_directory(path)) {
                         log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
                                    "Executable path specifies a directory%s: %s",
                                    ignore ? ", ignoring" : "", path);
@@ -966,92 +998,71 @@ int config_parse_exec(
                         return ignore ? 0 : -ENOEXEC;
                 }
 
-                if (!separate_argv0) {
-                        char *w = NULL;
-
-                        if (!GREEDY_REALLOC0(n, nlen + 2))
-                                return log_oom();
+                _cleanup_strv_free_ char **args = NULL;
 
-                        w = strdup(path);
-                        if (!w)
+                if (!separate_argv0)
+                        if (strv_extend(&args, path) < 0)
                                 return log_oom();
-                        n[nlen++] = w;
-                        n[nlen] = NULL;
-                }
-
-                path_simplify(path);
 
                 while (!isempty(p)) {
                         _cleanup_free_ char *word = NULL, *resolved = NULL;
 
-                        /* Check explicitly for an unquoted semicolon as
-                         * command separator token.  */
+                        /* Check explicitly for an unquoted semicolon as command separator token. */
                         if (p[0] == ';' && (!p[1] || strchr(WHITESPACE, p[1]))) {
                                 p++;
-                                p += strspn(p, WHITESPACE);
+                                p = skip_leading_chars(p, /* bad = */ NULL);
                                 semicolon = true;
                                 break;
                         }
 
                         /* Check for \; explicitly, to not confuse it with \\; or "\;" or "\\;" etc.
-                         * extract_first_word() would return the same for all of those.  */
+                         * extract_first_word() would return the same for all of those. */
                         if (p[0] == '\\' && p[1] == ';' && (!p[2] || strchr(WHITESPACE, p[2]))) {
-                                char *w;
-
                                 p += 2;
-                                p += strspn(p, WHITESPACE);
+                                p = skip_leading_chars(p, /* bad = */ NULL);
 
-                                if (!GREEDY_REALLOC0(n, nlen + 2))
+                                if (strv_extend(&args, ";") < 0)
                                         return log_oom();
 
-                                w = strdup(";");
-                                if (!w)
-                                        return log_oom();
-                                n[nlen++] = w;
-                                n[nlen] = NULL;
                                 continue;
                         }
 
                         r = extract_first_word_and_warn(&p, &word, NULL, EXTRACT_UNQUOTE|EXTRACT_CUNESCAPE, unit, filename, line, rvalue);
-                        if (r == 0)
-                                break;
                         if (r < 0)
                                 return ignore ? 0 : -ENOEXEC;
+                        if (r == 0)
+                                break;
 
                         r = unit_full_printf(u, word, &resolved);
                         if (r < 0) {
                                 log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, r,
-                                           "Failed to resolve unit specifiers in %s%s: %m",
+                                           "Failed to resolve unit specifiers in '%s'%s: %m",
                                            word, ignore ? ", ignoring" : "");
                                 return ignore ? 0 : -ENOEXEC;
                         }
 
-                        if (!GREEDY_REALLOC(n, nlen + 2))
+                        if (strv_consume(&args, TAKE_PTR(resolved)) < 0)
                                 return log_oom();
-
-                        n[nlen++] = TAKE_PTR(resolved);
-                        n[nlen] = NULL;
                 }
 
-                if (!n || !n[0]) {
+                if (strv_isempty(args)) {
                         log_syntax(unit, ignore ? LOG_WARNING : LOG_ERR, filename, line, 0,
                                    "Empty executable name or zeroeth argument%s: %s",
                                    ignore ? ", ignoring" : "", rvalue);
                         return ignore ? 0 : -ENOEXEC;
                 }
 
-                nce = new0(ExecCommand, 1);
-                if (!nce)
+                ExecCommand *nec = new(ExecCommand, 1);
+                if (!nec)
                         return log_oom();
 
-                nce->argv = TAKE_PTR(n);
-                nce->path = TAKE_PTR(path);
-                nce->flags = flags;
-
-                exec_command_append_list(e, nce);
+                *nec = (ExecCommand) {
+                        .path = path_simplify(TAKE_PTR(path)),
+                        .argv = TAKE_PTR(args),
+                        .flags = flags,
+                };
 
-                /* Do not _cleanup_free_ these. */
-                nce = NULL;
+                exec_command_append_list(e, nec);
 
                 rvalue = p;
         } while (semicolon);
@@ -1254,7 +1265,7 @@ int config_parse_exec_input_data(
                 return 0;
         }
 
-        r = unbase64mem(rvalue, SIZE_MAX, &p, &sz);
+        r = unbase64mem(rvalue, &p, &sz);
         if (r < 0) {
                 log_syntax(unit, LOG_WARNING, filename, line, r,
                            "Failed to decode base64 data, ignoring: %s", rvalue);
@@ -1520,43 +1531,6 @@ int config_parse_exec_cpu_sched_policy(const char *unit,
         return 0;
 }
 
-int config_parse_exec_mount_apivfs(const char *unit,
-                                   const char *filename,
-                                   unsigned line,
-                                   const char *section,
-                                   unsigned section_line,
-                                   const char *lvalue,
-                                   int ltype,
-                                   const char *rvalue,
-                                   void *data,
-                                   void *userdata) {
-
-        ExecContext *c = ASSERT_PTR(data);
-        int k;
-
-        assert(filename);
-        assert(lvalue);
-        assert(rvalue);
-
-        if (isempty(rvalue)) {
-                c->mount_apivfs_set = false;
-                c->mount_apivfs = false;
-                return 0;
-        }
-
-        k = parse_boolean(rvalue);
-        if (k < 0) {
-                log_syntax(unit, LOG_WARNING, filename, line, k,
-                           "Failed to parse boolean value, ignoring: %s",
-                           rvalue);
-                return 0;
-        }
-
-        c->mount_apivfs_set = true;
-        c->mount_apivfs = k;
-        return 0;
-}
-
 int config_parse_numa_mask(const char *unit,
                            const char *filename,
                            unsigned line,
@@ -1748,7 +1722,7 @@ int config_parse_exec_root_hash(
         }
 
         /* We have a roothash to decode, eg: RootHash=012345789abcdef */
-        r = unhexmem(rvalue, strlen(rvalue), &roothash_decoded, &roothash_decoded_size);
+        r = unhexmem(rvalue, &roothash_decoded, &roothash_decoded_size);
         if (r < 0) {
                 log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to decode RootHash=, ignoring: %s", rvalue);
                 return 0;
@@ -1816,7 +1790,7 @@ int config_parse_exec_root_hash_sig(
         }
 
         /* We have a roothash signature to decode, eg: RootHashSignature=base64:012345789abcdef */
-        r = unbase64mem(value, strlen(value), &roothash_sig_decoded, &roothash_sig_decoded_size);
+        r = unbase64mem(value, &roothash_sig_decoded, &roothash_sig_decoded_size);
         if (r < 0) {
                 log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to decode RootHashSignature=, ignoring: %s", rvalue);
                 return 0;
@@ -2634,6 +2608,7 @@ int config_parse_working_directory(
         assert(rvalue);
 
         if (isempty(rvalue)) {
+                c->working_directory_missing_ok = false;
                 c->working_directory_home = false;
                 c->working_directory = mfree(c->working_directory);
                 return 0;
@@ -2659,7 +2634,7 @@ int config_parse_working_directory(
                         return missing_ok ? 0 : -ENOEXEC;
                 }
 
-                r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE | (missing_ok ? 0 : PATH_CHECK_FATAL), unit, filename, line, lvalue);
+                r = path_simplify_and_warn(k, PATH_CHECK_ABSOLUTE|PATH_CHECK_NON_API_VFS|(missing_ok ? 0 : PATH_CHECK_FATAL), unit, filename, line, lvalue);
                 if (r < 0)
                         return missing_ok ? 0 : -ENOEXEC;
 
@@ -2697,7 +2672,7 @@ int config_parse_unit_env_file(const char *unit,
                 return 0;
         }
 
-        r = unit_full_printf_full(u, rvalue, PATH_MAX, &n);
+        r = unit_path_printf(u, rvalue, &n);
         if (r < 0) {
                 log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
                 return 0;
@@ -3152,7 +3127,7 @@ int config_parse_unit_condition_string(
         return 0;
 }
 
-int config_parse_unit_requires_mounts_for(
+int config_parse_unit_mounts_for(
                 const char *unit,
                 const char *filename,
                 unsigned line,
@@ -3171,6 +3146,7 @@ int config_parse_unit_requires_mounts_for(
         assert(lvalue);
         assert(rvalue);
         assert(data);
+        assert(STR_IN_SET(lvalue, "RequiresMountsFor", "WantsMountsFor"));
 
         for (const char *p = rvalue;;) {
                 _cleanup_free_ char *word = NULL, *resolved = NULL;
@@ -3196,9 +3172,9 @@ int config_parse_unit_requires_mounts_for(
                 if (r < 0)
                         continue;
 
-                r = unit_require_mounts_for(u, resolved, UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(u, resolved, UNIT_DEPENDENCY_FILE, unit_mount_dependency_type_from_string(lvalue));
                 if (r < 0) {
-                        log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to add required mount '%s', ignoring: %m", resolved);
+                        log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to add requested mount '%s', ignoring: %m", resolved);
                         continue;
                 }
         }
@@ -3695,7 +3671,7 @@ int config_parse_restrict_filesystems(
                         break;
                 }
 
-                r = lsm_bpf_parse_filesystem(
+                r = bpf_restrict_fs_parse_filesystem(
                               word,
                               &c->restrict_filesystems,
                               FILESYSTEM_PARSE_LOG|
@@ -4693,7 +4669,7 @@ int config_parse_exec_directories(
 
                 _cleanup_free_ char *src = NULL, *dest = NULL;
                 const char *q = tuple;
-                r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &src, &dest, NULL);
+                r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &src, &dest);
                 if (r == -ENOMEM)
                         return log_oom();
                 if (r <= 0) {
@@ -4908,11 +4884,8 @@ int config_parse_load_credential(
                 void *data,
                 void *userdata) {
 
-        _cleanup_free_ char *word = NULL, *k = NULL, *q = NULL;
         ExecContext *context = ASSERT_PTR(data);
-        bool encrypted = ltype;
-        Unit *u = userdata;
-        const char *p;
+        const Unit *u = ASSERT_PTR(userdata);
         int r;
 
         assert(filename);
@@ -4925,7 +4898,10 @@ int config_parse_load_credential(
                 return 0;
         }
 
-        p = rvalue;
+        _cleanup_free_ char *word = NULL, *id = NULL, *path = NULL;
+        const char *p = rvalue;
+        bool encrypted = ltype;
+
         r = extract_first_word(&p, &word, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
         if (r == -ENOMEM)
                 return log_oom();
@@ -4934,35 +4910,35 @@ int config_parse_load_credential(
                 return 0;
         }
 
-        r = unit_cred_printf(u, word, &k);
+        r = unit_cred_printf(u, word, &id);
         if (r < 0) {
                 log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in \"%s\", ignoring: %m", word);
                 return 0;
         }
-        if (!credential_name_valid(k)) {
-                log_syntax(unit, LOG_WARNING, filename, line, 0, "Credential name \"%s\" not valid, ignoring.", k);
+        if (!credential_name_valid(id)) {
+                log_syntax(unit, LOG_WARNING, filename, line, 0, "Credential name \"%s\" not valid, ignoring.", id);
                 return 0;
         }
 
         if (isempty(p)) {
                 /* If only one field is specified take it as shortcut for inheriting a credential named
                  * the same way from our parent */
-                q = strdup(k);
-                if (!q)
+                path = strdup(id);
+                if (!path)
                         return log_oom();
         } else {
-                r = unit_path_printf(u, p, &q);
+                r = unit_path_printf(u, p, &path);
                 if (r < 0) {
                         log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in \"%s\", ignoring: %m", p);
                         return 0;
                 }
-                if (path_is_absolute(q) ? !path_is_normalized(q) : !credential_name_valid(q)) {
-                        log_syntax(unit, LOG_WARNING, filename, line, 0, "Credential source \"%s\" not valid, ignoring.", q);
+                if (path_is_absolute(path) ? !path_is_normalized(path) : !credential_name_valid(path)) {
+                        log_syntax(unit, LOG_WARNING, filename, line, 0, "Credential source \"%s\" not valid, ignoring.", path);
                         return 0;
                 }
         }
 
-        r = hashmap_put_credential(&context->load_credentials, k, q, encrypted);
+        r = hashmap_put_credential(&context->load_credentials, id, path, encrypted);
         if (r < 0)
                 return log_error_errno(r, "Failed to store load credential '%s': %m", rvalue);
 
@@ -5236,7 +5212,7 @@ int config_parse_bind_paths(
                 void *userdata) {
 
         ExecContext *c = ASSERT_PTR(data);
-        const Unit *u = userdata;
+        const Unit *u = ASSERT_PTR(userdata);
         int r;
 
         assert(filename);
@@ -5267,7 +5243,7 @@ int config_parse_bind_paths(
                 if (r == 0)
                         break;
 
-                r = unit_full_printf_full(u, source, PATH_MAX, &sresolved);
+                r = unit_path_printf(u, source, &sresolved);
                 if (r < 0) {
                         log_syntax(unit, LOG_WARNING, filename, line, r,
                                    "Failed to resolve unit specifiers in \"%s\", ignoring: %m", source);
@@ -5396,7 +5372,7 @@ int config_parse_mount_images(
                         return 0;
 
                 q = tuple;
-                r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &first, &second, NULL);
+                r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &first, &second);
                 if (r == -ENOMEM)
                         return log_oom();
                 if (r < 0) {
@@ -5420,7 +5396,7 @@ int config_parse_mount_images(
                         continue;
                 }
 
-                r = path_simplify_and_warn(sresolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+                r = path_simplify_and_warn(sresolved, PATH_CHECK_ABSOLUTE|PATH_CHECK_NON_API_VFS, unit, filename, line, lvalue);
                 if (r < 0)
                         continue;
 
@@ -5436,7 +5412,7 @@ int config_parse_mount_images(
                         continue;
                 }
 
-                r = path_simplify_and_warn(dresolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+                r = path_simplify_and_warn(dresolved, PATH_CHECK_ABSOLUTE|PATH_CHECK_NON_API_VFS, unit, filename, line, lvalue);
                 if (r < 0)
                         continue;
 
@@ -5445,7 +5421,7 @@ int config_parse_mount_images(
                         MountOptions *o = NULL;
                         PartitionDesignator partition_designator;
 
-                        r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL);
+                        r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options);
                         if (r == -ENOMEM)
                                 return log_oom();
                         if (r < 0) {
@@ -5578,7 +5554,7 @@ int config_parse_extension_images(
                         continue;
                 }
 
-                r = path_simplify_and_warn(sresolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+                r = path_simplify_and_warn(sresolved, PATH_CHECK_ABSOLUTE|PATH_CHECK_NON_API_VFS, unit, filename, line, lvalue);
                 if (r < 0)
                         continue;
 
@@ -5587,7 +5563,7 @@ int config_parse_extension_images(
                         MountOptions *o = NULL;
                         PartitionDesignator partition_designator;
 
-                        r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL);
+                        r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options);
                         if (r == -ENOMEM)
                                 return log_oom();
                         if (r < 0) {
@@ -5799,7 +5775,7 @@ int config_parse_pid_file(
                 return log_oom();
 
         /* Check that the result is a sensible path */
-        r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue);
+        r = path_simplify_and_warn(n, PATH_CHECK_ABSOLUTE|PATH_CHECK_NON_API_VFS, unit, filename, line, lvalue);
         if (r < 0)
                 return r;
 
@@ -6095,7 +6071,7 @@ int config_parse_restrict_network_interfaces(
                         break;
                 }
 
-                if (!ifname_valid(word)) {
+                if (!ifname_valid_full(word, IFNAME_VALID_ALTERNATIVE)) {
                         log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid interface name, ignoring: %s", word);
                         continue;
                 }
@@ -6112,6 +6088,47 @@ int config_parse_restrict_network_interfaces(
         return 0;
 }
 
+int config_parse_mount_node(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        const Unit *u = ASSERT_PTR(userdata);
+        _cleanup_free_ char *resolved = NULL, *path = NULL;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+
+        r = unit_full_printf(u, rvalue, &resolved);
+        if (r < 0) {
+                log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue);
+                return 0;
+        }
+
+        path = fstab_node_to_udev_node(resolved);
+        if (!path)
+                return log_oom();
+
+        /* The source passed is not necessarily something we understand, and we pass it as-is to mount/swapon,
+         * so path_is_valid is not used. But let's check for basic sanity, i.e. if the source is longer than
+         * PATH_MAX, you're likely doing something wrong. */
+        if (strlen(path) >= PATH_MAX) {
+                log_syntax(unit, LOG_WARNING, filename, line, 0, "Resolved mount path '%s' too long, ignoring.", path);
+                return 0;
+        }
+
+        return config_parse_string(unit, filename, line, section, section_line, lvalue, ltype, path, data, userdata);
+}
+
 static int merge_by_names(Unit *u, Set *names, const char *id) {
         char *k;
         int r;
@@ -6316,8 +6333,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_nsec,                  "NANOSECONDS" },
                 { config_parse_namespace_path_strv,   "PATH [...]" },
                 { config_parse_bind_paths,            "PATH[:PATH[:OPTIONS]] [...]" },
-                { config_parse_unit_requires_mounts_for,
-                                                      "PATH [...]" },
+                { config_parse_unit_mounts_for,       "PATH [...]" },
                 { config_parse_exec_mount_propagation_flag,
                                                       "MOUNTFLAG" },
                 { config_parse_unit_string_printf,    "STRING" },
@@ -6365,6 +6381,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_job_mode_isolate,      "BOOLEAN" },
                 { config_parse_personality,           "PERSONALITY" },
                 { config_parse_log_filter_patterns,   "REGEX" },
+                { config_parse_mount_node,            "NODE" },
         };
 
         const char *prev = NULL;
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 6919805..005b915 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -23,6 +23,7 @@ void unit_dump_config_items(FILE *f);
 CONFIG_PARSER_PROTOTYPE(config_parse_unit_deps);
 CONFIG_PARSER_PROTOTYPE(config_parse_obsolete_unit_deps);
 CONFIG_PARSER_PROTOTYPE(config_parse_unit_string_printf);
+CONFIG_PARSER_PROTOTYPE(config_parse_reboot_parameter);
 CONFIG_PARSER_PROTOTYPE(config_parse_unit_strv_printf);
 CONFIG_PARSER_PROTOTYPE(config_parse_unit_path_printf);
 CONFIG_PARSER_PROTOTYPE(config_parse_colon_separated_paths);
@@ -71,7 +72,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_unit_condition_string);
 CONFIG_PARSER_PROTOTYPE(config_parse_kill_mode);
 CONFIG_PARSER_PROTOTYPE(config_parse_notify_access);
 CONFIG_PARSER_PROTOTYPE(config_parse_emergency_action);
-CONFIG_PARSER_PROTOTYPE(config_parse_unit_requires_mounts_for);
+CONFIG_PARSER_PROTOTYPE(config_parse_unit_mounts_for);
 CONFIG_PARSER_PROTOTYPE(config_parse_syscall_filter);
 CONFIG_PARSER_PROTOTYPE(config_parse_syscall_archs);
 CONFIG_PARSER_PROTOTYPE(config_parse_syscall_errno);
@@ -159,6 +160,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_log_filter_patterns);
 CONFIG_PARSER_PROTOTYPE(config_parse_open_file);
 CONFIG_PARSER_PROTOTYPE(config_parse_memory_pressure_watch);
 CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_nft_set);
+CONFIG_PARSER_PROTOTYPE(config_parse_mount_node);
 
 /* gperf prototypes */
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
diff --git a/src/core/main.c b/src/core/main.c
index 1ed968d..4b8a315 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -21,7 +21,7 @@
 #include "architecture.h"
 #include "argv-util.h"
 #if HAVE_LIBBPF
-#include "bpf-lsm.h"
+#include "bpf-restrict-fs.h"
 #endif
 #include "build.h"
 #include "bus-error.h"
@@ -68,6 +68,7 @@
 #include "manager-serialize.h"
 #include "mkdir-label.h"
 #include "mount-setup.h"
+#include "mount-util.h"
 #include "os-util.h"
 #include "pager.h"
 #include "parse-argument.h"
@@ -87,6 +88,7 @@
 #include "special.h"
 #include "stat-util.h"
 #include "stdio-util.h"
+#include "string-table.h"
 #include "strv.h"
 #include "switch-root.h"
 #include "sysctl-util.h"
@@ -121,7 +123,7 @@ static RuntimeScope arg_runtime_scope;
 bool arg_dump_core;
 int arg_crash_chvt;
 bool arg_crash_shell;
-bool arg_crash_reboot;
+CrashAction arg_crash_action;
 static char *arg_confirm_spawn;
 static ShowStatus arg_show_status;
 static StatusUnitFormat arg_status_unit_format;
@@ -140,6 +142,7 @@ static char **arg_default_environment;
 static char **arg_manager_environment;
 static uint64_t arg_capability_bounding_set;
 static bool arg_no_new_privs;
+static int arg_protect_system;
 static nsec_t arg_timer_slack_nsec;
 static Set* arg_syscall_archs;
 static FILE* arg_serialization;
@@ -159,6 +162,16 @@ static char **saved_env = NULL;
 static int parse_configuration(const struct rlimit *saved_rlimit_nofile,
                                const struct rlimit *saved_rlimit_memlock);
 
+static const char* const crash_action_table[_CRASH_ACTION_MAX] = {
+        [CRASH_FREEZE]   = "freeze",
+        [CRASH_REBOOT]   = "reboot",
+        [CRASH_POWEROFF] = "poweroff",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(crash_action, CrashAction);
+
+static DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_crash_action, crash_action, CrashAction, CRASH_FREEZE, "Invalid crash action");
+
 static int manager_find_user_config_paths(char ***ret_files, char ***ret_dirs) {
         _cleanup_free_ char *base = NULL;
         _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
@@ -206,13 +219,17 @@ static int console_setup(void) {
 
         r = proc_cmdline_tty_size("/dev/console", &rows, &cols);
         if (r < 0)
-                log_warning_errno(r, "Failed to get terminal size, ignoring: %m");
+                log_warning_errno(r, "Failed to get /dev/console size, ignoring: %m");
         else {
                 r = terminal_set_size_fd(tty_fd, NULL, rows, cols);
                 if (r < 0)
-                        log_warning_errno(r, "Failed to set terminal size, ignoring: %m");
+                        log_warning_errno(r, "Failed to set /dev/console size, ignoring: %m");
         }
 
+        r = terminal_reset_ansi_seq(tty_fd);
+        if (r < 0)
+                log_warning_errno(r, "Failed to reset /dev/console using ANSI sequences, ignoring: %m");
+
         return 0;
 }
 
@@ -273,7 +290,18 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
                 if (r < 0)
                         log_warning_errno(r, "Failed to parse crash reboot switch %s, ignoring: %m", value);
                 else
-                        arg_crash_reboot = r;
+                        arg_crash_action = r ? CRASH_REBOOT : CRASH_FREEZE;
+
+        } else if (proc_cmdline_key_streq(key, "systemd.crash_action")) {
+
+                if (proc_cmdline_value_missing(key, value))
+                        return 0;
+
+                r = crash_action_from_string(value);
+                if (r < 0)
+                        log_warning_errno(r, "Failed to parse crash action switch %s, ignoring: %m", value);
+                else
+                        arg_crash_action = r;
 
         } else if (proc_cmdline_key_streq(key, "systemd.confirm_spawn")) {
                 char *s;
@@ -462,7 +490,7 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
                 if (proc_cmdline_value_missing(key, value))
                         return 0;
 
-                r = unbase64mem(value, SIZE_MAX, &p, &sz);
+                r = unbase64mem(value, &p, &sz);
                 if (r < 0)
                         log_warning_errno(r, "Failed to parse systemd.random_seed= argument, ignoring: %s", value);
 
@@ -610,6 +638,73 @@ static int config_parse_oom_score_adjust(
         return 0;
 }
 
+static int config_parse_protect_system_pid1(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        int *v = ASSERT_PTR(data), r;
+
+        /* This is modelled after the per-service ProtectSystem= setting, but a bit more restricted on one
+         * hand, and more automatic in another. i.e. we currently only support yes/no (not "strict" or
+         * "full"). And we will enable this automatically for the initrd unless configured otherwise.
+         *
+         * We might extend this later to match more closely what the per-service ProtectSystem= can do, but
+         * this is not trivial, due to ordering constraints: besides /usr/ we don't really have much mounted
+         * at the moment we enable this logic. */
+
+        if (isempty(rvalue) || streq(rvalue, "auto")) {
+                *v = -1;
+                return 0;
+        }
+
+        r = parse_boolean(rvalue);
+        if (r < 0) {
+                log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse ProtectSystem= argument '%s', ignoring: %m", rvalue);
+                return 0;
+        }
+
+        *v = r;
+        return 0;
+}
+
+static int config_parse_crash_reboot(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        CrashAction *v = ASSERT_PTR(data);
+        int r;
+
+        if (isempty(rvalue)) {
+                *v = CRASH_REBOOT;
+                return 0;
+        }
+
+        r = parse_boolean(rvalue);
+        if (r < 0) {
+                log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse CrashReboot= argument '%s', ignoring: %m", rvalue);
+                return 0;
+        }
+
+        *v = r > 0 ? CRASH_REBOOT : CRASH_FREEZE;
+        return 0;
+}
+
 static int parse_config_file(void) {
         const ConfigTableItem items[] = {
                 { "Manager", "LogLevel",                     config_parse_level2,                0,                        NULL                              },
@@ -621,7 +716,8 @@ static int parse_config_file(void) {
                 { "Manager", "CrashChVT", /* legacy */       config_parse_crash_chvt,            0,                        &arg_crash_chvt                   },
                 { "Manager", "CrashChangeVT",                config_parse_crash_chvt,            0,                        &arg_crash_chvt                   },
                 { "Manager", "CrashShell",                   config_parse_bool,                  0,                        &arg_crash_shell                  },
-                { "Manager", "CrashReboot",                  config_parse_bool,                  0,                        &arg_crash_reboot                 },
+                { "Manager", "CrashReboot",                  config_parse_crash_reboot,          0,                        &arg_crash_action                 },
+                { "Manager", "CrashAction",                  config_parse_crash_action,          0,                        &arg_crash_action                 },
                 { "Manager", "ShowStatus",                   config_parse_show_status,           0,                        &arg_show_status                  },
                 { "Manager", "StatusUnitFormat",             config_parse_status_unit_format,    0,                        &arg_status_unit_format           },
                 { "Manager", "CPUAffinity",                  config_parse_cpu_affinity2,         0,                        &arg_cpu_affinity                 },
@@ -637,6 +733,7 @@ static int parse_config_file(void) {
                 { "Manager", "RuntimeWatchdogPreGovernor",   config_parse_string,                CONFIG_PARSE_STRING_SAFE, &arg_watchdog_pretimeout_governor },
                 { "Manager", "CapabilityBoundingSet",        config_parse_capability_set,        0,                        &arg_capability_bounding_set      },
                 { "Manager", "NoNewPrivileges",              config_parse_bool,                  0,                        &arg_no_new_privs                 },
+                { "Manager", "ProtectSystem",                config_parse_protect_system_pid1,   0,                        &arg_protect_system               },
 #if HAVE_SECCOMP
                 { "Manager", "SystemCallArchitectures",      config_parse_syscall_archs,         0,                        &arg_syscall_archs                },
 #else
@@ -696,11 +793,12 @@ static int parse_config_file(void) {
         };
 
         if (arg_runtime_scope == RUNTIME_SCOPE_SYSTEM)
-                (void) config_parse_config_file("system.conf",
-                                                "Manager\0",
-                                                config_item_table_lookup, items,
-                                                CONFIG_PARSE_WARN,
-                                                NULL);
+                (void) config_parse_standard_file_with_dropins(
+                                "systemd/system.conf",
+                                "Manager\0",
+                                config_item_table_lookup, items,
+                                CONFIG_PARSE_WARN,
+                                /* userdata= */ NULL);
         else {
                 _cleanup_strv_free_ char **files = NULL, **dirs = NULL;
                 int r;
@@ -769,8 +867,8 @@ static void set_manager_settings(Manager *m) {
         m->cad_burst_action = arg_cad_burst_action;
         /* Note that we don't do structured initialization here, otherwise it will reset the rate limit
          * counter on every daemon-reload. */
-        m->reload_ratelimit.interval = arg_reload_limit_interval_sec;
-        m->reload_ratelimit.burst = arg_reload_limit_burst;
+        m->reload_reexec_ratelimit.interval = arg_reload_limit_interval_sec;
+        m->reload_reexec_ratelimit.burst = arg_reload_limit_burst;
 
         manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog);
         manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog);
@@ -935,9 +1033,17 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_CRASH_REBOOT:
-                        r = parse_boolean_argument("--crash-reboot", optarg, &arg_crash_reboot);
+                        r = parse_boolean_argument("--crash-reboot", optarg, NULL);
                         if (r < 0)
                                 return r;
+                        arg_crash_action = r > 0 ? CRASH_REBOOT : CRASH_FREEZE;
+                        break;
+
+                case ARG_CRASH_ACTION:
+                        r = crash_action_from_string(optarg);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse crash action \"%s\": %m", optarg);
+                        arg_crash_action = r;
                         break;
 
                 case ARG_CONFIRM_SPAWN:
@@ -1053,7 +1159,7 @@ static int help(void) {
                "     --unit=UNIT                 Set default unit\n"
                "     --dump-core[=BOOL]          Dump core on crash\n"
                "     --crash-vt=NR               Change to specified VT on crash\n"
-               "     --crash-reboot[=BOOL]       Reboot on crash\n"
+               "     --crash-action=ACTION       Specify what to do on crash\n"
                "     --crash-shell[=BOOL]        Run shell on crash\n"
                "     --confirm-spawn[=BOOL]      Ask for confirmation when spawning processes\n"
                "     --show-status[=BOOL]        Show status updates on the console during boot\n"
@@ -1265,7 +1371,7 @@ static void test_usr(void) {
 
         log_warning("/usr appears to be on its own filesystem and is not already mounted. This is not a supported setup. "
                     "Some things will probably break (sometimes even silently) in mysterious ways. "
-                    "Consult https://www.freedesktop.org/wiki/Software/systemd/separate-usr-is-broken for more information.");
+                    "Consult https://systemd.io/SEPARATE_USR_IS_BROKEN for more information.");
 }
 
 static int enforce_syscall_archs(Set *archs) {
@@ -1277,7 +1383,7 @@ static int enforce_syscall_archs(Set *archs) {
 
         r = seccomp_restrict_archs(arg_syscall_archs);
         if (r < 0)
-                return log_error_errno(r, "Failed to enforce system call architecture restrication: %m");
+                return log_error_errno(r, "Failed to enforce system call architecture restriction: %m");
 #endif
         return 0;
 }
@@ -1435,7 +1541,7 @@ static int fixup_environment(void) {
                 return -errno;
 
         /* The kernels sets HOME=/ for init. Let's undo this. */
-        if (path_equal_ptr(getenv("HOME"), "/"))
+        if (path_equal(getenv("HOME"), "/"))
                 assert_se(unsetenv("HOME") == 0);
 
         return 0;
@@ -1467,32 +1573,37 @@ static int become_shutdown(int objective, int retval) {
                 [MANAGER_KEXEC]    = "kexec",
         };
 
-        char log_level[STRLEN("--log-level=") + DECIMAL_STR_MAX(int)],
-             timeout[STRLEN("--timeout=") + DECIMAL_STR_MAX(usec_t) + STRLEN("us")],
+        char timeout[STRLEN("--timeout=") + DECIMAL_STR_MAX(usec_t) + STRLEN("us")],
              exit_code[STRLEN("--exit-code=") + DECIMAL_STR_MAX(uint8_t)];
 
         _cleanup_strv_free_ char **env_block = NULL;
+        _cleanup_free_ char *max_log_levels = NULL;
         usec_t watchdog_timer = 0;
         int r;
 
         assert(objective >= 0 && objective < _MANAGER_OBJECTIVE_MAX);
         assert(table[objective]);
 
-        xsprintf(log_level, "--log-level=%d", log_get_max_level());
         xsprintf(timeout, "--timeout=%" PRI_USEC "us", arg_defaults.timeout_stop_usec);
 
-        const char* command_line[10] = {
+        const char* command_line[11] = {
                 SYSTEMD_SHUTDOWN_BINARY_PATH,
                 table[objective],
-                log_level,
                 timeout,
                 /* Note that the last position is a terminator and must contain NULL. */
         };
-        size_t pos = 4;
+        size_t pos = 3;
 
         assert(command_line[pos-1]);
         assert(!command_line[pos]);
 
+        (void) log_max_levels_to_string(log_get_max_level(), &max_log_levels);
+
+        if (max_log_levels) {
+                command_line[pos++] = "--log-level";
+                command_line[pos++] = max_log_levels;
+        }
+
         switch (log_get_target()) {
 
         case LOG_TARGET_KMSG:
@@ -1538,7 +1649,7 @@ static int become_shutdown(int objective, int retval) {
         (void) watchdog_setup_pretimeout(0);
         (void) watchdog_setup_pretimeout_governor(NULL);
         r = watchdog_setup(watchdog_timer);
-        watchdog_close(r < 0);
+        watchdog_close(/* disarm= */ r < 0);
 
         /* The environment block: */
 
@@ -1684,6 +1795,35 @@ static void initialize_core_pattern(bool skip_setup) {
                                   arg_early_core_pattern);
 }
 
+static void apply_protect_system(bool skip_setup) {
+        int r;
+
+        if (skip_setup || getpid_cached() != 1 || arg_protect_system == 0)
+                return;
+
+        if (arg_protect_system < 0 && !in_initrd()) {
+                log_debug("ProtectSystem=auto selected, but not running in an initrd, skipping.");
+                return;
+        }
+
+        r = make_mount_point("/usr");
+        if (r < 0) {
+                log_warning_errno(r, "Failed to make /usr/ a mount point, ignoring: %m");
+                return;
+        }
+
+        if (mount_nofollow_verbose(
+                        LOG_WARNING,
+                        /* what= */ NULL,
+                        "/usr",
+                        /* fstype= */ NULL,
+                        MS_BIND|MS_REMOUNT|MS_RDONLY,
+                        /* options= */ NULL) < 0)
+                return;
+
+        log_info("Successfully made /usr/ read-only.");
+}
+
 static void update_cpu_affinity(bool skip_setup) {
         _cleanup_free_ char *mask = NULL;
 
@@ -1966,6 +2106,16 @@ static int invoke_main_loop(
                                                 "MESSAGE_ID=" SD_MESSAGE_CORE_MAINLOOP_FAILED_STR);
                 }
 
+                /* Ensure shutdown timestamp is taken even when bypassing the job engine */
+                if (IN_SET(objective,
+                           MANAGER_SOFT_REBOOT,
+                           MANAGER_REBOOT,
+                           MANAGER_KEXEC,
+                           MANAGER_HALT,
+                           MANAGER_POWEROFF) &&
+                    !dual_timestamp_is_set(m->timestamps + MANAGER_TIMESTAMP_SHUTDOWN_START))
+                        dual_timestamp_now(m->timestamps + MANAGER_TIMESTAMP_SHUTDOWN_START);
+
                 switch (objective) {
 
                 case MANAGER_RELOAD: {
@@ -2133,9 +2283,9 @@ static void log_execution_mode(bool *ret_first_boot) {
                         /* Let's check whether we are in first boot. First, check if an override was
                          * specified on the kernel command line. If yes, we honour that. */
 
-                        r = proc_cmdline_get_bool("systemd.condition-first-boot", /* flags = */ 0, &first_boot);
+                        r = proc_cmdline_get_bool("systemd.condition_first_boot", /* flags = */ 0, &first_boot);
                         if (r < 0)
-                                log_debug_errno(r, "Failed to parse systemd.condition-first-boot= kernel command line argument, ignoring: %m");
+                                log_debug_errno(r, "Failed to parse systemd.condition_first_boot= kernel command line argument, ignoring: %m");
 
                         if (r > 0)
                                 log_full(first_boot ? LOG_INFO : LOG_DEBUG,
@@ -2221,12 +2371,6 @@ static int initialize_runtime(
                 install_crash_handler();
 
                 if (!skip_setup) {
-                        r = mount_cgroup_controllers();
-                        if (r < 0) {
-                                *ret_error_message = "Failed to mount cgroup hierarchies";
-                                return r;
-                        }
-
                         /* Pull credentials from various sources into a common credential directory (we do
                          * this here, before setting up the machine ID, so that we can use credential info
                          * for setting up the machine ID) */
@@ -2493,7 +2637,7 @@ static void setenv_manager_environment(void) {
 
                 r = putenv_dup(*p, true);
                 if (r < 0)
-                        log_warning_errno(errno, "Failed to setenv \"%s\", ignoring: %m", *p);
+                        log_warning_errno(r, "Failed to setenv \"%s\", ignoring: %m", *p);
         }
 }
 
@@ -2507,7 +2651,7 @@ static void reset_arguments(void) {
         arg_dump_core = true;
         arg_crash_chvt = -1;
         arg_crash_shell = false;
-        arg_crash_reboot = false;
+        arg_crash_action = CRASH_FREEZE;
         arg_confirm_spawn = mfree(arg_confirm_spawn);
         arg_show_status = _SHOW_STATUS_INVALID;
         arg_status_unit_format = STATUS_UNIT_FORMAT_DEFAULT;
@@ -2531,6 +2675,7 @@ static void reset_arguments(void) {
 
         arg_capability_bounding_set = CAP_MASK_UNSET;
         arg_no_new_privs = false;
+        arg_protect_system = -1;
         arg_timer_slack_nsec = NSEC_INFINITY;
 
         arg_syscall_archs = set_free(arg_syscall_archs);
@@ -2952,6 +3097,24 @@ int main(int argc, char *argv[]) {
                         goto finish;
                 }
 
+                if (!skip_setup) {
+                        /* Before we actually start deleting cgroup v1 code, make it harder to boot
+                         * in cgroupv1 mode first. See also #30852. */
+
+                        r = mount_cgroup_legacy_controllers(loaded_policy);
+                        if (r < 0) {
+                                if (r == -ERFKILL)
+                                        error_message = "Refusing to run under cgroup v1, SYSTEMD_CGROUP_ENABLE_LEGACY_FORCE=1 not specified on kernel command line";
+                                else
+                                        error_message = "Failed to mount cgroup v1 hierarchy";
+                                goto finish;
+                        }
+                        if (r > 0) {
+                                log_full(LOG_CRIT, "Legacy cgroup v1 support selected. This is no longer supported. Will proceed anyway after 30s.");
+                                (void) usleep_safe(30 * USEC_PER_SEC);
+                        }
+                }
+
                 /* The efivarfs is now mounted, let's lock down the system token. */
                 lock_down_efi_variables();
 
@@ -3038,9 +3201,12 @@ int main(int argc, char *argv[]) {
                         cmdline_take_random_seed();
                 }
 
-                /* A core pattern might have been specified via the cmdline.  */
+                /* A core pattern might have been specified via the cmdline. */
                 initialize_core_pattern(skip_setup);
 
+                /* Make /usr/ read-only */
+                apply_protect_system(skip_setup);
+
                 /* Close logging fds, in order not to confuse collecting passed fds and terminal logic below */
                 log_close();
 
@@ -3196,7 +3362,8 @@ finish:
 #endif
 
         if (r < 0)
-                (void) sd_notifyf(0, "ERRNO=%i", -r);
+                (void) sd_notifyf(/* unset_environment= */ false,
+                                  "ERRNO=%i", -r);
 
         /* Try to invoke the shutdown binary unless we already failed.
          * If we failed above, we want to freeze after finishing cleanup. */
@@ -3209,7 +3376,8 @@ finish:
 
         /* This is primarily useful when running systemd in a VM, as it provides the user running the VM with
          * a mechanism to pick up systemd's exit status in the VM. */
-        (void) sd_notifyf(0, "EXIT_STATUS=%i", retval);
+        (void) sd_notifyf(/* unset_environment= */ false,
+                          "EXIT_STATUS=%i", retval);
 
         watchdog_free_device();
         arg_watchdog_device = mfree(arg_watchdog_device);
diff --git a/src/core/main.h b/src/core/main.h
index b12a1cc..1949a08 100644
--- a/src/core/main.h
+++ b/src/core/main.h
@@ -1,9 +1,21 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 #pragma once
 
+#include <errno.h>
 #include <stdbool.h>
 
+typedef enum CrashAction {
+        CRASH_FREEZE,
+        CRASH_REBOOT,
+        CRASH_POWEROFF,
+        _CRASH_ACTION_MAX,
+        _CRASH_ACTION_INVALID = -EINVAL,
+} CrashAction;
+
+const char* crash_action_to_string(CrashAction action);
+CrashAction crash_action_from_string(const char *action);
+
 extern bool arg_dump_core;
 extern int arg_crash_chvt;
 extern bool arg_crash_shell;
-extern bool arg_crash_reboot;
+extern CrashAction arg_crash_action;
diff --git a/src/core/manager-dump.c b/src/core/manager-dump.c
index 6c32d78..a12d50c 100644
--- a/src/core/manager-dump.c
+++ b/src/core/manager-dump.c
@@ -64,7 +64,7 @@ static void manager_dump_header(Manager *m, FILE *f, const char *prefix) {
          * stable between versions. We take the liberty to restructure it entirely between versions and
          * add/remove fields at will. */
 
-        fprintf(f, "%sManager: systemd " STRINGIFY(PROJECT_VERSION) " (" GIT_VERSION ")\n", strempty(prefix));
+        fprintf(f, "%sManager: systemd " PROJECT_VERSION_FULL " (" GIT_VERSION ")\n", strempty(prefix));
         fprintf(f, "%sFeatures: %s\n", strempty(prefix), systemd_features);
 
         for (ManagerTimestamp q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
diff --git a/src/core/manager-serialize.c b/src/core/manager-serialize.c
index 1ac2636..b4af82b 100644
--- a/src/core/manager-serialize.c
+++ b/src/core/manager-serialize.c
@@ -23,11 +23,12 @@ int manager_open_serialization(Manager *m, FILE **ret_f) {
         return open_serialization_file("systemd-state", ret_f);
 }
 
-static bool manager_timestamp_shall_serialize(ManagerTimestamp t) {
-        if (!in_initrd())
+static bool manager_timestamp_shall_serialize(ManagerObjective o, ManagerTimestamp t) {
+        if (!in_initrd() && o != MANAGER_SOFT_REBOOT)
                 return true;
 
-        /* The following timestamps only apply to the host system, hence only serialize them there */
+        /* The following timestamps only apply to the host system (or first boot in case of soft-reboot),
+         * hence only serialize them there. */
         return !IN_SET(t,
                        MANAGER_TIMESTAMP_USERSPACE, MANAGER_TIMESTAMP_FINISH,
                        MANAGER_TIMESTAMP_SECURITY_START, MANAGER_TIMESTAMP_SECURITY_FINISH,
@@ -108,10 +109,13 @@ int manager_serialize(
         (void) serialize_usec(f, "pretimeout-watchdog-overridden", m->watchdog_overridden[WATCHDOG_PRETIMEOUT]);
         (void) serialize_item(f, "pretimeout-watchdog-governor-overridden", m->watchdog_pretimeout_governor_overridden);
 
+        (void) serialize_item(f, "previous-objective", manager_objective_to_string(m->objective));
+        (void) serialize_item_format(f, "soft-reboots-count", "%u", m->soft_reboots_count);
+
         for (ManagerTimestamp q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
                 _cleanup_free_ char *joined = NULL;
 
-                if (!manager_timestamp_shall_serialize(q))
+                if (!manager_timestamp_shall_serialize(m->objective, q))
                         continue;
 
                 joined = strjoin(manager_timestamp_to_string(q), "-timestamp");
@@ -139,21 +143,19 @@ int manager_serialize(
         }
 
         if (m->user_lookup_fds[0] >= 0) {
-                int copy0, copy1;
-
-                copy0 = fdset_put_dup(fds, m->user_lookup_fds[0]);
-                if (copy0 < 0)
-                        return log_error_errno(copy0, "Failed to add user lookup fd to serialization: %m");
-
-                copy1 = fdset_put_dup(fds, m->user_lookup_fds[1]);
-                if (copy1 < 0)
-                        return log_error_errno(copy1, "Failed to add user lookup fd to serialization: %m");
+                r = serialize_fd_many(f, fds, "user-lookup", m->user_lookup_fds, 2);
+                if (r < 0)
+                        return r;
+        }
 
-                (void) serialize_item_format(f, "user-lookup", "%i %i", copy0, copy1);
+        if (m->handoff_timestamp_fds[0] >= 0) {
+                r = serialize_fd_many(f, fds, "handoff-timestamp-fds", m->handoff_timestamp_fds, 2);
+                if (r < 0)
+                        return r;
         }
 
         (void) serialize_ratelimit(f, "dump-ratelimit", &m->dump_ratelimit);
-        (void) serialize_ratelimit(f, "reload-ratelimit", &m->reload_ratelimit);
+        (void) serialize_ratelimit(f, "reload-reexec-ratelimit", &m->reload_reexec_ratelimit);
 
         bus_track_serialize(m->subscribed, f, "subscribed");
 
@@ -443,10 +445,10 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                         if (r < 0)
                                 return r;
 
-                } else if (startswith(l, "env=")) {
-                        r = deserialize_environment(l + 4, &m->client_environment);
+                } else if ((val = startswith(l, "env="))) {
+                        r = deserialize_environment(val, &m->client_environment);
                         if (r < 0)
-                                log_notice_errno(r, "Failed to parse environment entry: \"%s\", ignoring: %m", l);
+                                log_notice_errno(r, "Failed to parse environment entry: \"%s\", ignoring: %m", val);
 
                 } else if ((val = startswith(l, "notify-fd="))) {
                         int fd;
@@ -454,8 +456,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                         fd = deserialize_fd(fds, val);
                         if (fd >= 0) {
                                 m->notify_event_source = sd_event_source_disable_unref(m->notify_event_source);
-                                safe_close(m->notify_fd);
-                                m->notify_fd = fd;
+                                close_and_replace(m->notify_fd, fd);
                         }
 
                 } else if ((val = startswith(l, "notify-socket="))) {
@@ -469,21 +470,26 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                         fd = deserialize_fd(fds, val);
                         if (fd >= 0) {
                                 m->cgroups_agent_event_source = sd_event_source_disable_unref(m->cgroups_agent_event_source);
-                                safe_close(m->cgroups_agent_fd);
-                                m->cgroups_agent_fd = fd;
+                                close_and_replace(m->cgroups_agent_fd, fd);
                         }
 
                 } else if ((val = startswith(l, "user-lookup="))) {
-                        int fd0, fd1;
-
-                        if (sscanf(val, "%i %i", &fd0, &fd1) != 2 || fd0 < 0 || fd1 < 0 || fd0 == fd1 || !fdset_contains(fds, fd0) || !fdset_contains(fds, fd1))
-                                log_notice("Failed to parse user lookup fd, ignoring: %s", val);
-                        else {
-                                m->user_lookup_event_source = sd_event_source_disable_unref(m->user_lookup_event_source);
-                                safe_close_pair(m->user_lookup_fds);
-                                m->user_lookup_fds[0] = fdset_remove(fds, fd0);
-                                m->user_lookup_fds[1] = fdset_remove(fds, fd1);
-                        }
+
+                        m->user_lookup_event_source = sd_event_source_disable_unref(m->user_lookup_event_source);
+                        safe_close_pair(m->user_lookup_fds);
+
+                        r = deserialize_fd_many(fds, val, 2, m->user_lookup_fds);
+                        if (r < 0)
+                                log_warning_errno(r, "Failed to parse user-lookup fds: \"%s\", ignoring: %m", val);
+
+                } else if ((val = startswith(l, "handoff-timestamp-fds="))) {
+
+                        m->handoff_timestamp_event_source = sd_event_source_disable_unref(m->handoff_timestamp_event_source);
+                        safe_close_pair(m->handoff_timestamp_fds);
+
+                        r = deserialize_fd_many(fds, val, 2, m->handoff_timestamp_fds);
+                        if (r < 0)
+                                log_warning_errno(r, "Failed to parse handoff-timestamp fds: \"%s\", ignoring: %m", val);
 
                 } else if ((val = startswith(l, "dynamic-user=")))
                         dynamic_user_deserialize_one(m, val, fds, NULL);
@@ -495,8 +501,9 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                         (void) exec_shared_runtime_deserialize_one(m, val, fds);
                 else if ((val = startswith(l, "subscribed="))) {
 
-                        if (strv_extend(&m->deserialized_subscribed, val) < 0)
-                                return -ENOMEM;
+                        r = strv_extend(&m->deserialized_subscribed, val);
+                        if (r < 0)
+                                return r;
                 } else if ((val = startswith(l, "varlink-server-socket-address="))) {
                         if (!m->varlink_server && MANAGER_IS_SYSTEM(m)) {
                                 r = manager_varlink_init(m);
@@ -516,9 +523,25 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) {
                                 (void) varlink_server_deserialize_one(m->varlink_server, val, fds);
                 } else if ((val = startswith(l, "dump-ratelimit=")))
                         deserialize_ratelimit(&m->dump_ratelimit, "dump-ratelimit", val);
-                else if ((val = startswith(l, "reload-ratelimit=")))
-                        deserialize_ratelimit(&m->reload_ratelimit, "reload-ratelimit", val);
-                else {
+                else if ((val = startswith(l, "reload-reexec-ratelimit=")))
+                        deserialize_ratelimit(&m->reload_reexec_ratelimit, "reload-reexec-ratelimit", val);
+                else if ((val = startswith(l, "soft-reboots-count="))) {
+                        unsigned n;
+
+                        if (safe_atou(val, &n) < 0)
+                                log_notice("Failed to parse soft reboots counter '%s', ignoring.", val);
+                        else
+                                m->soft_reboots_count = n;
+                } else if ((val = startswith(l, "previous-objective="))) {
+                        ManagerObjective objective;
+
+                        objective = manager_objective_from_string(val);
+                        if (objective < 0)
+                                log_notice("Failed to parse previous objective '%s', ignoring.", val);
+                        else
+                                m->previous_objective = objective;
+
+                } else {
                         ManagerTimestamp q;
 
                         for (q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) {
diff --git a/src/core/manager.c b/src/core/manager.c
index 88eebfc..90e72b0 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -9,7 +9,6 @@
 #include <sys/mount.h>
 #include <sys/reboot.h>
 #include <sys/timerfd.h>
-#include <sys/utsname.h>
 #include <sys/wait.h>
 #include <unistd.h>
 
@@ -25,6 +24,7 @@
 #include "alloc-util.h"
 #include "audit-fd.h"
 #include "boot-timestamps.h"
+#include "build-path.h"
 #include "bus-common-errors.h"
 #include "bus-error.h"
 #include "bus-kernel.h"
@@ -36,6 +36,7 @@
 #include "constants.h"
 #include "core-varlink.h"
 #include "creds-util.h"
+#include "daemon-util.h"
 #include "dbus-job.h"
 #include "dbus-manager.h"
 #include "dbus-unit.h"
@@ -55,6 +56,7 @@
 #include "inotify-util.h"
 #include "install.h"
 #include "io-util.h"
+#include "iovec-util.h"
 #include "label-util.h"
 #include "load-fragment.h"
 #include "locale-setup.h"
@@ -88,6 +90,7 @@
 #include "strxcpyx.h"
 #include "sysctl-util.h"
 #include "syslog-util.h"
+#include "taint.h"
 #include "terminal-util.h"
 #include "time-util.h"
 #include "transaction.h"
@@ -122,6 +125,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
 static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
 static int manager_dispatch_sigchld(sd_event_source *source, void *userdata);
@@ -263,12 +267,11 @@ static void manager_print_jobs_in_progress(Manager *m) {
                                       strempty(status_text));
         }
 
-        sd_notifyf(false,
-                   "STATUS=%sUser job %s/%s running (%s / %s)...",
-                   job_of_n,
-                   ident,
-                   job_type_to_string(j->type),
-                   time, limit);
+        (void) sd_notifyf(/* unset_environment= */ false,
+                          "STATUS=%sUser job %s/%s running (%s / %s)...",
+                          job_of_n,
+                          ident, job_type_to_string(j->type),
+                          time, limit);
         m->status_ready = false;
 }
 
@@ -397,7 +400,7 @@ static int manager_setup_time_change(Manager *m) {
                 return log_error_errno(r, "Failed to create time change event source: %m");
 
         /* Schedule this slightly earlier than the .timer event sources */
-        r = sd_event_source_set_priority(m->time_change_event_source, SD_EVENT_PRIORITY_NORMAL-1);
+        r = sd_event_source_set_priority(m->time_change_event_source, EVENT_PRIORITY_TIME_CHANGE);
         if (r < 0)
                 return log_error_errno(r, "Failed to set priority of time change event sources: %m");
 
@@ -464,7 +467,7 @@ static int manager_setup_timezone_change(Manager *m) {
                 return log_error_errno(r, "Failed to create timezone change event source: %m");
 
         /* Schedule this slightly earlier than the .timer event sources */
-        r = sd_event_source_set_priority(new_event, SD_EVENT_PRIORITY_NORMAL-1);
+        r = sd_event_source_set_priority(new_event, EVENT_PRIORITY_TIME_ZONE);
         if (r < 0)
                 return log_error_errno(r, "Failed to set priority of timezone change event sources: %m");
 
@@ -482,21 +485,19 @@ static int enable_special_signals(Manager *m) {
         if (MANAGER_IS_TEST_RUN(m))
                 return 0;
 
-        /* Enable that we get SIGINT on control-alt-del. In containers
-         * this will fail with EPERM (older) or EINVAL (newer), so
-         * ignore that. */
+        /* Enable that we get SIGINT on control-alt-del. In containers this will fail with EPERM (older) or
+         * EINVAL (newer), so ignore that. */
         if (reboot(RB_DISABLE_CAD) < 0 && !IN_SET(errno, EPERM, EINVAL))
-                log_warning_errno(errno, "Failed to enable ctrl-alt-del handling: %m");
+                log_warning_errno(errno, "Failed to enable ctrl-alt-del handling, ignoring: %m");
 
         fd = open_terminal("/dev/tty0", O_RDWR|O_NOCTTY|O_CLOEXEC);
-        if (fd < 0) {
-                /* Support systems without virtual console */
-                if (fd != -ENOENT)
-                        log_warning_errno(errno, "Failed to open /dev/tty0: %m");
-        } else {
+        if (fd < 0)
+                /* Support systems without virtual console (ENOENT) gracefully */
+                log_full_errno(fd == -ENOENT ? LOG_DEBUG : LOG_WARNING, fd, "Failed to open /dev/tty0, ignoring: %m");
+        else {
                 /* Enable that we get SIGWINCH on kbrequest */
                 if (ioctl(fd, KDSIGACCEPT, SIGWINCH) < 0)
-                        log_warning_errno(errno, "Failed to enable kbrequest handling: %m");
+                        log_warning_errno(errno, "Failed to enable kbrequest handling, ignoring: %m");
         }
 
         return 0;
@@ -592,10 +593,21 @@ static int manager_setup_signals(Manager *m) {
          * notify processing can still figure out to which process/service a message belongs, before we reap the
          * process. Also, process this before handling cgroup notifications, so that we always collect child exit
          * status information before detecting that there's no process in a cgroup. */
-        r = sd_event_source_set_priority(m->signal_event_source, SD_EVENT_PRIORITY_NORMAL-6);
+        r = sd_event_source_set_priority(m->signal_event_source, EVENT_PRIORITY_SIGNALS);
         if (r < 0)
                 return r;
 
+        /* Report to supervisor that we now process the above signals. We report this as level "2", to
+         * indicate that we support more than sysvinit's signals (of course, sysvinit never sent this
+         * message, but conceptually it makes sense to consider level "1" to be equivalent to sysvinit's
+         * signal handling). Also, by setting this to "2" people looking for this hopefully won't
+         * misunderstand this as a boolean concept. Signal level 2 shall refer to the signals PID 1
+         * understands at the time of release of systemd v256, i.e. including basic SIGRTMIN+18 handling for
+         * memory pressure and stuff. When more signals are hooked up (or more SIGRTMIN+18 multiplex
+         * operations added, this level should be increased).  */
+        (void) sd_notify(/* unset_environment= */ false,
+                         "X_SYSTEMD_SIGNALS_LEVEL=2");
+
         if (MANAGER_IS_SYSTEM(m))
                 return enable_special_signals(m);
 
@@ -641,16 +653,13 @@ static char** sanitize_environment(char **l) {
                         "TRIGGER_TIMER_REALTIME_USEC",
                         "TRIGGER_UNIT",
                         "WATCHDOG_PID",
-                        "WATCHDOG_USEC",
-                        NULL);
+                        "WATCHDOG_USEC");
 
         /* Let's order the environment alphabetically, just to make it pretty */
         return strv_sort(l);
 }
 
 int manager_default_environment(Manager *m) {
-        int r;
-
         assert(m);
 
         m->transient_environment = strv_free(m->transient_environment);
@@ -661,21 +670,39 @@ int manager_default_environment(Manager *m) {
                  *
                  * The initial passed environment is untouched to keep /proc/self/environ valid; it is used
                  * for tagging the init process inside containers. */
-                m->transient_environment = strv_new("PATH=" DEFAULT_PATH);
-                if (!m->transient_environment)
+                char *path = strjoin("PATH=", default_PATH());
+                if (!path)
+                        return log_oom();
+
+                if (strv_consume(&m->transient_environment, path) < 0)
                         return log_oom();
 
                 /* Import locale variables LC_*= from configuration */
                 (void) locale_setup(&m->transient_environment);
         } else {
-                /* The user manager passes its own environment along to its children, except for $PATH. */
+                /* The user manager passes its own environment along to its children, except for $PATH and
+                 * session envs. */
+
                 m->transient_environment = strv_copy(environ);
                 if (!m->transient_environment)
                         return log_oom();
 
-                r = strv_env_replace_strdup(&m->transient_environment, "PATH=" DEFAULT_USER_PATH);
-                if (r < 0)
+                char *path = strjoin("PATH=", default_user_PATH());
+                if (!path)
+                        return log_oom();
+
+                if (strv_env_replace_consume(&m->transient_environment, path) < 0)
                         return log_oom();
+
+                /* Envvars set for our 'manager' class session are private and should not be propagated
+                 * to children. Also it's likely that the graphical session will set these on their own. */
+                strv_env_unset_many(m->transient_environment,
+                                    "XDG_SESSION_ID",
+                                    "XDG_SESSION_CLASS",
+                                    "XDG_SESSION_TYPE",
+                                    "XDG_SESSION_DESKTOP",
+                                    "XDG_SEAT",
+                                    "XDG_VTNR");
         }
 
         sanitize_environment(m->transient_environment);
@@ -689,18 +716,18 @@ static int manager_setup_prefix(Manager *m) {
         };
 
         static const struct table_entry paths_system[_EXEC_DIRECTORY_TYPE_MAX] = {
-                [EXEC_DIRECTORY_RUNTIME] =       { SD_PATH_SYSTEM_RUNTIME,       NULL },
-                [EXEC_DIRECTORY_STATE] =         { SD_PATH_SYSTEM_STATE_PRIVATE, NULL },
-                [EXEC_DIRECTORY_CACHE] =         { SD_PATH_SYSTEM_STATE_CACHE,   NULL },
-                [EXEC_DIRECTORY_LOGS] =          { SD_PATH_SYSTEM_STATE_LOGS,    NULL },
+                [EXEC_DIRECTORY_RUNTIME]       = { SD_PATH_SYSTEM_RUNTIME,       NULL },
+                [EXEC_DIRECTORY_STATE]         = { SD_PATH_SYSTEM_STATE_PRIVATE, NULL },
+                [EXEC_DIRECTORY_CACHE]         = { SD_PATH_SYSTEM_STATE_CACHE,   NULL },
+                [EXEC_DIRECTORY_LOGS]          = { SD_PATH_SYSTEM_STATE_LOGS,    NULL },
                 [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_SYSTEM_CONFIGURATION, NULL },
         };
 
         static const struct table_entry paths_user[_EXEC_DIRECTORY_TYPE_MAX] = {
-                [EXEC_DIRECTORY_RUNTIME] =       { SD_PATH_USER_RUNTIME,       NULL  },
-                [EXEC_DIRECTORY_STATE] =         { SD_PATH_USER_STATE_PRIVATE, NULL  },
-                [EXEC_DIRECTORY_CACHE] =         { SD_PATH_USER_STATE_CACHE,   NULL  },
-                [EXEC_DIRECTORY_LOGS] =          { SD_PATH_USER_STATE_PRIVATE, "log" },
+                [EXEC_DIRECTORY_RUNTIME]       = { SD_PATH_USER_RUNTIME,       NULL  },
+                [EXEC_DIRECTORY_STATE]         = { SD_PATH_USER_STATE_PRIVATE, NULL  },
+                [EXEC_DIRECTORY_CACHE]         = { SD_PATH_USER_STATE_CACHE,   NULL  },
+                [EXEC_DIRECTORY_LOGS]          = { SD_PATH_USER_STATE_PRIVATE, "log" },
                 [EXEC_DIRECTORY_CONFIGURATION] = { SD_PATH_USER_CONFIGURATION, NULL  },
         };
 
@@ -736,7 +763,7 @@ static int manager_setup_run_queue(Manager *m) {
         if (r < 0)
                 return r;
 
-        r = sd_event_source_set_priority(m->run_queue_event_source, SD_EVENT_PRIORITY_IDLE);
+        r = sd_event_source_set_priority(m->run_queue_event_source, EVENT_PRIORITY_RUN_QUEUE);
         if (r < 0)
                 return r;
 
@@ -759,7 +786,7 @@ static int manager_setup_sigchld_event_source(Manager *m) {
         if (r < 0)
                 return r;
 
-        r = sd_event_source_set_priority(m->sigchld_event_source, SD_EVENT_PRIORITY_NORMAL-7);
+        r = sd_event_source_set_priority(m->sigchld_event_source, EVENT_PRIORITY_SIGCHLD);
         if (r < 0)
                 return r;
 
@@ -861,6 +888,7 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
         *m = (Manager) {
                 .runtime_scope = runtime_scope,
                 .objective = _MANAGER_OBJECTIVE_INVALID,
+                .previous_objective = _MANAGER_OBJECTIVE_INVALID,
 
                 .status_unit_format = STATUS_UNIT_FORMAT_DEFAULT,
 
@@ -878,6 +906,7 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
                 .cgroups_agent_fd = -EBADF,
                 .signal_fd = -EBADF,
                 .user_lookup_fds = EBADF_PAIR,
+                .handoff_timestamp_fds = EBADF_PAIR,
                 .private_listen_fd = -EBADF,
                 .dev_autofs_fd = -EBADF,
                 .cgroup_inotify_fd = -EBADF,
@@ -992,8 +1021,8 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
                         return r;
 
 #if HAVE_LIBBPF
-                if (MANAGER_IS_SYSTEM(m) && lsm_bpf_supported(/* initialize = */ true)) {
-                        r = lsm_bpf_setup(m);
+                if (MANAGER_IS_SYSTEM(m) && bpf_restrict_fs_supported(/* initialize = */ true)) {
+                        r = bpf_restrict_fs_setup(m);
                         if (r < 0)
                                 log_warning_errno(r, "Failed to setup LSM BPF, ignoring: %m");
                 }
@@ -1013,42 +1042,19 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
 
                 if (r < 0 && r != -EEXIST)
                         return r;
+        }
 
-                m->executor_fd = open(SYSTEMD_EXECUTOR_BINARY_PATH, O_CLOEXEC|O_PATH);
-                if (m->executor_fd < 0)
-                        return log_emergency_errno(errno,
-                                                   "Failed to open executor binary '%s': %m",
-                                                   SYSTEMD_EXECUTOR_BINARY_PATH);
-        } else if (!FLAGS_SET(test_run_flags, MANAGER_TEST_DONT_OPEN_EXECUTOR)) {
-                _cleanup_free_ char *self_exe = NULL, *executor_path = NULL;
-                _cleanup_close_ int self_dir_fd = -EBADF;
-                int level = LOG_DEBUG;
-
-                /* Prefer sd-executor from the same directory as the test, e.g.: when running unit tests from the
-                * build directory. Fallback to working directory and then the installation path. */
-                r = readlink_and_make_absolute("/proc/self/exe", &self_exe);
-                if (r < 0)
-                        return r;
-
-                self_dir_fd = open_parent(self_exe, O_CLOEXEC|O_PATH|O_DIRECTORY, 0);
-                if (self_dir_fd < 0)
-                        return self_dir_fd;
-
-                m->executor_fd = RET_NERRNO(openat(self_dir_fd, "systemd-executor", O_CLOEXEC|O_PATH));
-                if (m->executor_fd == -ENOENT)
-                        m->executor_fd = RET_NERRNO(openat(AT_FDCWD, "systemd-executor", O_CLOEXEC|O_PATH));
-                if (m->executor_fd == -ENOENT) {
-                        m->executor_fd = RET_NERRNO(open(SYSTEMD_EXECUTOR_BINARY_PATH, O_CLOEXEC|O_PATH));
-                        level = LOG_WARNING; /* Tests should normally use local builds */
-                }
+        if (!FLAGS_SET(test_run_flags, MANAGER_TEST_DONT_OPEN_EXECUTOR)) {
+                m->executor_fd = pin_callout_binary(SYSTEMD_EXECUTOR_BINARY_PATH);
                 if (m->executor_fd < 0)
-                        return m->executor_fd;
+                        return log_debug_errno(m->executor_fd, "Failed to pin executor binary: %m");
 
+                _cleanup_free_ char *executor_path = NULL;
                 r = fd_get_path(m->executor_fd, &executor_path);
                 if (r < 0)
                         return r;
 
-                log_full(level, "Using systemd-executor binary from '%s'.", executor_path);
+                log_debug("Using systemd-executor binary from '%s'.", executor_path);
         }
 
         /* Note that we do not set up the notify fd here. We do that after deserialization,
@@ -1113,7 +1119,7 @@ static int manager_setup_notify(Manager *m) {
 
                 /* Process notification messages a bit earlier than SIGCHLD, so that we can still identify to which
                  * service an exit message belongs. */
-                r = sd_event_source_set_priority(m->notify_event_source, SD_EVENT_PRIORITY_NORMAL-8);
+                r = sd_event_source_set_priority(m->notify_event_source, EVENT_PRIORITY_NOTIFY);
                 if (r < 0)
                         return log_error_errno(r, "Failed to set priority of notify event source: %m");
 
@@ -1187,7 +1193,7 @@ static int manager_setup_cgroups_agent(Manager *m) {
                 /* Process cgroups notifications early. Note that when the agent notification is received
                  * we'll just enqueue the unit in the cgroup empty queue, hence pick a high priority than
                  * that. Also see handling of cgroup inotify for the unified cgroup stuff. */
-                r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-9);
+                r = sd_event_source_set_priority(m->cgroups_agent_event_source, EVENT_PRIORITY_CGROUP_AGENT);
                 if (r < 0)
                         return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m");
 
@@ -1236,13 +1242,13 @@ static int manager_setup_user_lookup_fd(Manager *m) {
         if (!m->user_lookup_event_source) {
                 r = sd_event_add_io(m->event, &m->user_lookup_event_source, m->user_lookup_fds[0], EPOLLIN, manager_dispatch_user_lookup_fd, m);
                 if (r < 0)
-                        return log_error_errno(errno, "Failed to allocate user lookup event source: %m");
+                        return log_error_errno(r, "Failed to allocate user lookup event source: %m");
 
                 /* Process even earlier than the notify event source, so that we always know first about valid UID/GID
                  * resolutions */
-                r = sd_event_source_set_priority(m->user_lookup_event_source, SD_EVENT_PRIORITY_NORMAL-11);
+                r = sd_event_source_set_priority(m->user_lookup_event_source, EVENT_PRIORITY_USER_LOOKUP);
                 if (r < 0)
-                        return log_error_errno(errno, "Failed to set priority of user lookup event source: %m");
+                        return log_error_errno(r, "Failed to set priority of user lookup event source: %m");
 
                 (void) sd_event_source_set_description(m->user_lookup_event_source, "user-lookup");
         }
@@ -1250,6 +1256,49 @@ static int manager_setup_user_lookup_fd(Manager *m) {
         return 0;
 }
 
+static int manager_setup_handoff_timestamp_fd(Manager *m) {
+        int r;
+
+        assert(m);
+
+        /* Set up the socket pair used for passing timestamps back when the executor processes we fork
+         * off invokes execve(), i.e. when we hand off control to our payload processes. */
+
+        if (m->handoff_timestamp_fds[0] < 0) {
+                m->handoff_timestamp_event_source = sd_event_source_disable_unref(m->handoff_timestamp_event_source);
+                safe_close_pair(m->handoff_timestamp_fds);
+
+                if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->handoff_timestamp_fds) < 0)
+                        return log_error_errno(errno, "Failed to allocate handoff timestamp socket: %m");
+
+                /* Make sure children never have to block */
+                (void) fd_increase_rxbuf(m->handoff_timestamp_fds[0], NOTIFY_RCVBUF_SIZE);
+
+                r = setsockopt_int(m->handoff_timestamp_fds[0], SOL_SOCKET, SO_PASSCRED, true);
+                if (r < 0)
+                        return log_error_errno(r, "SO_PASSCRED failed: %m");
+
+                /* Mark the receiving socket as O_NONBLOCK (but leave sending side as-is) */
+                r = fd_nonblock(m->handoff_timestamp_fds[0], true);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to make handoff timestamp socket O_NONBLOCK: %m");
+        }
+
+        if (!m->handoff_timestamp_event_source) {
+                r = sd_event_add_io(m->event, &m->handoff_timestamp_event_source, m->handoff_timestamp_fds[0], EPOLLIN, manager_dispatch_handoff_timestamp_fd, m);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to allocate handoff timestamp event source: %m");
+
+                r = sd_event_source_set_priority(m->handoff_timestamp_event_source, EVENT_PRIORITY_HANDOFF_TIMESTAMP);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to set priority of handoff timestamp event source: %m");
+
+                (void) sd_event_source_set_description(m->handoff_timestamp_event_source, "handoff-timestamp");
+        }
+
+        return 0;
+}
+
 static unsigned manager_dispatch_cleanup_queue(Manager *m) {
         Unit *u;
         unsigned n = 0;
@@ -1664,12 +1713,14 @@ Manager* manager_free(Manager *m) {
         sd_event_source_unref(m->jobs_in_progress_event_source);
         sd_event_source_unref(m->run_queue_event_source);
         sd_event_source_unref(m->user_lookup_event_source);
+        sd_event_source_unref(m->handoff_timestamp_event_source);
         sd_event_source_unref(m->memory_pressure_event_source);
 
         safe_close(m->signal_fd);
         safe_close(m->notify_fd);
         safe_close(m->cgroups_agent_fd);
         safe_close_pair(m->user_lookup_fds);
+        safe_close_pair(m->handoff_timestamp_fds);
 
         manager_close_ask_password(m);
 
@@ -1679,7 +1730,7 @@ Manager* manager_free(Manager *m) {
 
         free(m->notify_socket);
 
-        lookup_paths_free(&m->lookup_paths);
+        lookup_paths_done(&m->lookup_paths);
         strv_free(m->transient_environment);
         strv_free(m->client_environment);
 
@@ -1691,8 +1742,10 @@ Manager* manager_free(Manager *m) {
 
         unit_defaults_done(&m->defaults);
 
-        assert(hashmap_isempty(m->units_requiring_mounts_for));
-        hashmap_free(m->units_requiring_mounts_for);
+        FOREACH_ARRAY(map, m->units_needing_mounts_for, _UNIT_MOUNT_DEPENDENCY_TYPE_MAX) {
+                assert(hashmap_isempty(*map));
+                hashmap_free(*map);
+        }
 
         hashmap_free(m->uid_refs);
         hashmap_free(m->gid_refs);
@@ -1708,7 +1761,7 @@ Manager* manager_free(Manager *m) {
         m->fw_ctx = fw_ctx_free(m->fw_ctx);
 
 #if BPF_FRAMEWORK
-        lsm_bpf_destroy(m->restrict_fs);
+        bpf_restrict_fs_destroy(m->restrict_fs);
 #endif
 
         safe_close(m->executor_fd);
@@ -1802,7 +1855,7 @@ static void manager_distribute_fds(Manager *m, FDSet *fds) {
 
         HASHMAP_FOREACH(u, m->units) {
 
-                if (fdset_size(fds) <= 0)
+                if (fdset_isempty(fds))
                         break;
 
                 if (!UNIT_VTABLE(u)->distribute_fds)
@@ -1973,6 +2026,20 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds, const char *roo
                                 return log_error_errno(r, "Deserialization failed: %m");
                 }
 
+                if (m->previous_objective >= 0) {
+                        if (IN_SET(m->previous_objective, MANAGER_REEXECUTE, MANAGER_SOFT_REBOOT, MANAGER_SWITCH_ROOT))
+                                log_debug("Launching as effect of a '%s' operation.",
+                                          manager_objective_to_string(m->previous_objective));
+                        else
+                                log_warning("Got unexpected previous objective '%s', ignoring.",
+                                            manager_objective_to_string(m->previous_objective));
+                }
+
+                /* If we are in a new soft-reboot iteration bump the counter now before starting units, so
+                 * that they can reliably read it. We get the previous objective from serialized state. */
+                if (m->previous_objective == MANAGER_SOFT_REBOOT)
+                        m->soft_reboots_count++;
+
                 /* Any fds left? Find some unit which wants them. This is useful to allow container managers to pass
                  * some file descriptors to us pre-initialized. This enables socket-based activation of entire
                  * containers. */
@@ -1994,6 +2061,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds, const char *roo
                         /* This shouldn't fail, except if things are really broken. */
                         return r;
 
+                r = manager_setup_handoff_timestamp_fd(m);
+                if (r < 0)
+                        /* This shouldn't fail, except if things are really broken. */
+                        return r;
+
                 /* Connect to the bus if we are good for it */
                 manager_setup_bus(m);
 
@@ -2203,8 +2275,8 @@ static int manager_dispatch_target_deps_queue(Manager *m) {
                 if (n_targets < 0)
                         return n_targets;
 
-                for (int i = 0; i < n_targets; i++) {
-                        r = unit_add_default_target_dependency(u, targets[i]);
+                FOREACH_ARRAY(i, targets, n_targets) {
+                        r = unit_add_default_target_dependency(u, *i);
                         if (r < 0)
                                 return r;
                 }
@@ -2303,7 +2375,7 @@ int manager_load_unit_prepare(
 
         Unit *unit = manager_get_unit(m, name);
         if (unit) {
-                /* The time-based cache allows to start new units without daemon-reload,
+                /* The time-based cache allows new units to be started without daemon-reload,
                  * but if they are already referenced (because of dependencies or ordering)
                  * then we have to force a load of the fragment. As an optimization, check
                  * first if anything in the usual paths was modified since the last time
@@ -2403,7 +2475,7 @@ void manager_clear_jobs(Manager *m) {
                 job_finish_and_invalidate(j, JOB_CANCELED, false, false);
 }
 
-void manager_unwatch_pidref(Manager *m, PidRef *pid) {
+void manager_unwatch_pidref(Manager *m, const PidRef *pid) {
         assert(m);
 
         for (;;) {
@@ -2586,22 +2658,70 @@ static void manager_invoke_notify_message(
                 UNIT_VTABLE(u)->notify_message(u, ucred, tags, fds);
 
         else if (DEBUG_LOGGING) {
-                _cleanup_free_ char *buf = NULL, *x = NULL, *y = NULL;
+                _cleanup_free_ char *joined = strv_join(tags, ", ");
+                char buf[CELLESCAPE_DEFAULT_LENGTH];
+
+                log_unit_debug(u, "Got notification message from unexpected unit type, ignoring: %s",
+                               joined ? cellescape(buf, sizeof(buf), joined) : "(null)");
+        }
+}
+
+static int manager_get_units_for_pidref(Manager *m, const PidRef *pidref, Unit ***ret_units) {
+        /* Determine array of every unit that is interested in the specified process */
+
+        assert(m);
+        assert(pidref_is_set(pidref));
 
-                buf = strv_join(tags, ", ");
-                if (buf)
-                        x = ellipsize(buf, 20, 90);
-                if (x)
-                        y = cescape(x);
+        Unit *u1, *u2, **array;
+        u1 = manager_get_unit_by_pidref_cgroup(m, pidref);
+        u2 = hashmap_get(m->watch_pids, pidref);
+        array = hashmap_get(m->watch_pids_more, pidref);
+
+        size_t n = 0;
+        if (u1)
+                n++;
+        if (u2)
+                n++;
+        if (array)
+                for (size_t j = 0; array[j]; j++)
+                        n++;
+
+        assert(n <= INT_MAX); /* Make sure we can reasonably return the counter as "int" */
+
+        if (ret_units) {
+                _cleanup_free_ Unit **units = NULL;
+
+                if (n > 0) {
+                        units = new(Unit*, n + 1);
+                        if (!units)
+                                return -ENOMEM;
+
+                        /* We return a dense array, and put the "main" unit first, i.e. unit in whose cgroup
+                         * the process currently is. Note that we do not bother with filtering duplicates
+                         * here. */
+
+                        size_t i = 0;
+                        if (u1)
+                                units[i++] = u1;
+                        if (u2)
+                                units[i++] = u2;
+                        if (array)
+                                for (size_t j = 0; array[j]; j++)
+                                        units[i++] = array[j];
+                        assert(i == n);
+
+                        units[i] = NULL; /* end array in an extra NULL */
+                }
 
-                log_unit_debug(u, "Got notification message \"%s\", ignoring.", strnull(y));
+                *ret_units = TAKE_PTR(units);
         }
+
+        return (int) n;
 }
 
 static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
-
-        _cleanup_fdset_free_ FDSet *fds = NULL;
         Manager *m = ASSERT_PTR(userdata);
+        _cleanup_fdset_free_ FDSet *fds = NULL;
         char buf[NOTIFY_BUFFER_MAX+1];
         struct iovec iovec = {
                 .iov_base = buf,
@@ -2618,12 +2738,9 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
 
         struct cmsghdr *cmsg;
         struct ucred *ucred = NULL;
-        _cleanup_free_ Unit **array_copy = NULL;
         _cleanup_strv_free_ char **tags = NULL;
-        Unit *u1, *u2, **array;
         int r, *fd_array = NULL;
         size_t n_fds = 0;
-        bool found = false;
         ssize_t n;
 
         assert(m->notify_fd == fd);
@@ -2711,39 +2828,22 @@ static int manager_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t
         PidRef pidref = PIDREF_MAKE_FROM_PID(ucred->pid);
 
         /* Notify every unit that might be interested, which might be multiple. */
-        u1 = manager_get_unit_by_pidref_cgroup(m, &pidref);
-        u2 = hashmap_get(m->watch_pids, &pidref);
-        array = hashmap_get(m->watch_pids_more, &pidref);
-        if (array) {
-                size_t k = 0;
+        _cleanup_free_ Unit **array = NULL;
 
-                while (array[k])
-                        k++;
-
-                array_copy = newdup(Unit*, array, k+1);
-                if (!array_copy)
-                        log_oom();
-        }
-        /* And now invoke the per-unit callbacks. Note that manager_invoke_notify_message() will handle
-         * duplicate units make sure we only invoke each unit's handler once. */
-        if (u1) {
-                manager_invoke_notify_message(m, u1, ucred, tags, fds);
-                found = true;
-        }
-        if (u2) {
-                manager_invoke_notify_message(m, u2, ucred, tags, fds);
-                found = true;
+        int n_array = manager_get_units_for_pidref(m, &pidref, &array);
+        if (n_array < 0) {
+                log_warning_errno(n_array, "Failed to determine units for PID " PID_FMT ", ignoring: %m", ucred->pid);
+                return 0;
         }
-        if (array_copy)
-                for (size_t i = 0; array_copy[i]; i++) {
-                        manager_invoke_notify_message(m, array_copy[i], ucred, tags, fds);
-                        found = true;
-                }
-
-        if (!found)
-                log_warning("Cannot find unit for notify message of PID "PID_FMT", ignoring.", ucred->pid);
+        if (n_array == 0)
+                log_debug("Cannot find unit for notify message of PID "PID_FMT", ignoring.", ucred->pid);
+        else
+                /* And now invoke the per-unit callbacks. Note that manager_invoke_notify_message() will handle
+                 * duplicate units – making sure we only invoke each unit's handler once. */
+                FOREACH_ARRAY(u, array, n_array)
+                        manager_invoke_notify_message(m, *u, ucred, tags, fds);
 
-        if (fdset_size(fds) > 0)
+        if (!fdset_isempty(fds))
                 log_warning("Got extra auxiliary fds with notification message, closing them.");
 
         return 0;
@@ -2792,10 +2892,7 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
                 goto turn_off;
 
         if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) {
-                _cleanup_free_ Unit **array_copy = NULL;
                 _cleanup_free_ char *name = NULL;
-                Unit *u1, *u2, **array;
-
                 (void) pid_get_comm(si.si_pid, &name);
 
                 log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)",
@@ -2813,41 +2910,27 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
                  * pidfd here any more even if we wanted (since the process just exited). */
                 PidRef pidref = PIDREF_MAKE_FROM_PID(si.si_pid);
 
-                /* And now figure out the unit this belongs to, it might be multiple... */
-                u1 = manager_get_unit_by_pidref_cgroup(m, &pidref);
-                u2 = hashmap_get(m->watch_pids, &pidref);
-                array = hashmap_get(m->watch_pids_more, &pidref);
-                if (array) {
-                        size_t n = 0;
-
-                        /* Count how many entries the array has */
-                        while (array[n])
-                                n++;
-
-                        /* Make a copy of the array so that we don't trip up on the array changing beneath us */
-                        array_copy = newdup(Unit*, array, n+1);
-                        if (!array_copy)
-                                log_oom();
-                }
-
-                /* Finally, execute them all. Note that u1, u2 and the array might contain duplicates, but
-                 * that's fine, manager_invoke_sigchld_event() will ensure we only invoke the handlers once for
-                 * each iteration. */
-                if (u1) {
-                        /* We check for oom condition, in case we got SIGCHLD before the oom notification.
-                         * We only do this for the cgroup the PID belonged to. */
-                        (void) unit_check_oom(u1);
+                /* And now figure out the units this belongs to, there might be multiple... */
+                _cleanup_free_ Unit **array = NULL;
+                int n_array = manager_get_units_for_pidref(m, &pidref, &array);
+                if (n_array < 0)
+                        log_warning_errno(n_array, "Failed to get units for process " PID_FMT ", ignoring: %m", si.si_pid);
+                else if (n_array == 0)
+                        log_debug("Got SIGCHLD for process " PID_FMT " we weren't interested in, ignoring.", si.si_pid);
+                else {
+                        /* We check for an OOM condition, in case we got SIGCHLD before the OOM notification.
+                         * We only do this for the cgroup the PID belonged to, which is the f */
+                        (void) unit_check_oom(array[0]);
 
                         /* We check if systemd-oomd performed a kill so that we log and notify appropriately */
-                        (void) unit_check_oomd_kill(u1);
+                        (void) unit_check_oomd_kill(array[0]);
 
-                        manager_invoke_sigchld_event(m, u1, &si);
+                        /* Finally, execute them all. Note that the array might contain duplicates, but that's fine,
+                         * manager_invoke_sigchld_event() will ensure we only invoke the handlers once for each
+                         * iteration. */
+                        FOREACH_ARRAY(u, array, n_array)
+                                manager_invoke_sigchld_event(m, *u, &si);
                 }
-                if (u2)
-                        manager_invoke_sigchld_event(m, u2, &si);
-                if (array_copy)
-                        for (size_t i = 0; array_copy[i]; i++)
-                                manager_invoke_sigchld_event(m, array_copy[i], &si);
         }
 
         /* And now, we actually reap the zombie. */
@@ -2878,8 +2961,8 @@ static void manager_start_special(Manager *m, const char *name, JobMode mode) {
 
         log_info("Activating special unit %s...", s);
 
-        sd_notifyf(false,
-                   "STATUS=Activating special unit %s...", s);
+        (void) sd_notifyf(/* unset_environment= */ false,
+                          "STATUS=Activating special unit %s...", s);
         m->status_ready = false;
 }
 
@@ -2986,7 +3069,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
 
                 r = manager_get_dump_string(m, /* patterns= */ NULL, &dump);
                 if (r < 0) {
-                        log_warning_errno(errno, "Failed to acquire manager dump: %m");
+                        log_warning_errno(r, "Failed to acquire manager dump: %m");
                         break;
                 }
 
@@ -3008,9 +3091,9 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
                         const char *target;
                         JobMode mode;
                 } target_table[] = {
-                        [0] = { SPECIAL_DEFAULT_TARGET,     JOB_ISOLATE },
-                        [1] = { SPECIAL_RESCUE_TARGET,      JOB_ISOLATE },
-                        [2] = { SPECIAL_EMERGENCY_TARGET,   JOB_ISOLATE },
+                        [0] = { SPECIAL_DEFAULT_TARGET,     JOB_ISOLATE              },
+                        [1] = { SPECIAL_RESCUE_TARGET,      JOB_ISOLATE              },
+                        [2] = { SPECIAL_EMERGENCY_TARGET,   JOB_ISOLATE              },
                         [3] = { SPECIAL_HALT_TARGET,        JOB_REPLACE_IRREVERSIBLY },
                         [4] = { SPECIAL_POWEROFF_TARGET,    JOB_REPLACE_IRREVERSIBLY },
                         [5] = { SPECIAL_REBOOT_TARGET,      JOB_REPLACE_IRREVERSIBLY },
@@ -3077,7 +3160,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
 
                                         r = manager_get_dump_jobs_string(m, /* patterns= */ NULL, "  ", &dump_jobs);
                                         if (r < 0) {
-                                                log_warning_errno(errno, "Failed to acquire manager jobs dump: %m");
+                                                log_warning_errno(r, "Failed to acquire manager jobs dump: %m");
                                                 break;
                                         }
 
@@ -3371,16 +3454,18 @@ void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
         const char *msg;
         int audit_fd, r;
 
+        assert(m);
+        assert(u);
+
         if (!MANAGER_IS_SYSTEM(m))
                 return;
 
-        audit_fd = get_audit_fd();
-        if (audit_fd < 0)
+        /* Don't generate audit events if the service was already started and we're just deserializing */
+        if (MANAGER_IS_RELOADING(m))
                 return;
 
-        /* Don't generate audit events if the service was already
-         * started and we're just deserializing */
-        if (MANAGER_IS_RELOADING(m))
+        audit_fd = get_audit_fd();
+        if (audit_fd < 0)
                 return;
 
         r = unit_name_to_prefix_and_instance(u->id, &p);
@@ -3399,21 +3484,22 @@ void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success) {
                         log_warning_errno(errno, "Failed to send audit message, ignoring: %m");
         }
 #endif
-
 }
 
 void manager_send_unit_plymouth(Manager *m, Unit *u) {
         _cleanup_free_ char *message = NULL;
         int c, r;
 
-        /* Don't generate plymouth events if the service was already
-         * started and we're just deserializing */
-        if (MANAGER_IS_RELOADING(m))
-                return;
+        assert(m);
+        assert(u);
 
         if (!MANAGER_IS_SYSTEM(m))
                 return;
 
+        /* Don't generate plymouth events if the service was already started and we're just deserializing */
+        if (MANAGER_IS_RELOADING(m))
+                return;
+
         if (detect_container() > 0)
                 return;
 
@@ -3431,6 +3517,27 @@ void manager_send_unit_plymouth(Manager *m, Unit *u) {
                                "Failed to communicate with plymouth: %m");
 }
 
+void manager_send_unit_supervisor(Manager *m, Unit *u, bool active) {
+        assert(m);
+        assert(u);
+
+        /* Notify a "supervisor" process about our progress, i.e. a container manager, hypervisor, or
+         * surrounding service manager. */
+
+        if (MANAGER_IS_RELOADING(m))
+                return;
+
+        if (!UNIT_VTABLE(u)->notify_supervisor)
+                return;
+
+        if (in_initrd()) /* Only send these once we left the initrd */
+                return;
+
+        (void) sd_notifyf(/* unset_environment= */ false,
+                          active ? "X_SYSTEMD_UNIT_ACTIVE=%s" : "X_SYSTEMD_UNIT_INACTIVE=%s",
+                          u->id);
+}
+
 usec_t manager_get_watchdog(Manager *m, WatchdogType t) {
         assert(m);
 
@@ -3566,7 +3673,7 @@ int manager_reload(Manager *m) {
 
         manager_clear_jobs_and_units(m);
         lookup_paths_flush_generator(&m->lookup_paths);
-        lookup_paths_free(&m->lookup_paths);
+        lookup_paths_done(&m->lookup_paths);
         exec_shared_runtime_vacuum(m);
         dynamic_user_vacuum(m, false);
         m->uid_refs = hashmap_free(m->uid_refs);
@@ -3601,6 +3708,7 @@ int manager_reload(Manager *m) {
         (void) manager_setup_notify(m);
         (void) manager_setup_cgroups_agent(m);
         (void) manager_setup_user_lookup_fd(m);
+        (void) manager_setup_handoff_timestamp_fd(m);
 
         /* Third, fire things up! */
         manager_coldplug(m);
@@ -3645,8 +3753,6 @@ bool manager_unit_inactive_or_pending(Manager *m, const char *name) {
 }
 
 static void log_taint_string(Manager *m) {
-        _cleanup_free_ char *taint = NULL;
-
         assert(m);
 
         if (MANAGER_IS_USER(m) || m->taint_logged)
@@ -3654,7 +3760,7 @@ static void log_taint_string(Manager *m) {
 
         m->taint_logged = true; /* only check for taint once */
 
-        taint = manager_taint_string(m);
+        _cleanup_free_ char *taint = taint_string();
         if (isempty(taint))
                 return;
 
@@ -3670,7 +3776,19 @@ static void manager_notify_finished(Manager *m) {
         if (MANAGER_IS_TEST_RUN(m))
                 return;
 
-        if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) {
+        if (MANAGER_IS_SYSTEM(m) && m->soft_reboots_count > 0) {
+                /* The soft-reboot case, where we only report data for the last reboot */
+                firmware_usec = loader_usec = initrd_usec = kernel_usec = 0;
+                total_usec = userspace_usec = usec_sub_unsigned(m->timestamps[MANAGER_TIMESTAMP_FINISH].monotonic,
+                                                                m->timestamps[MANAGER_TIMESTAMP_SHUTDOWN_START].monotonic);
+
+                log_struct(LOG_INFO,
+                           "MESSAGE_ID=" SD_MESSAGE_STARTUP_FINISHED_STR,
+                           "USERSPACE_USEC="USEC_FMT, userspace_usec,
+                           LOG_MESSAGE("Soft-reboot finished in %s, counter is now at %u.",
+                                       FORMAT_TIMESPAN(total_usec, USEC_PER_MSEC),
+                                       m->soft_reboots_count));
+        } else if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) {
                 char buf[FORMAT_TIMESPAN_MAX + STRLEN(" (firmware) + ") + FORMAT_TIMESPAN_MAX + STRLEN(" (loader) + ")]
                         = {};
                 char *p = buf;
@@ -3740,7 +3858,7 @@ static void manager_notify_finished(Manager *m) {
         log_taint_string(m);
 }
 
-static void user_manager_send_ready(Manager *m) {
+static void manager_send_ready_user_scope(Manager *m) {
         int r;
 
         assert(m);
@@ -3749,7 +3867,7 @@ static void user_manager_send_ready(Manager *m) {
         if (!MANAGER_IS_USER(m) || m->ready_sent)
                 return;
 
-        r = sd_notify(false,
+        r = sd_notify(/* unset_environment= */ false,
                       "READY=1\n"
                       "STATUS=Reached " SPECIAL_BASIC_TARGET ".");
         if (r < 0)
@@ -3759,14 +3877,19 @@ static void user_manager_send_ready(Manager *m) {
         m->status_ready = false;
 }
 
-static void manager_send_ready(Manager *m) {
+static void manager_send_ready_system_scope(Manager *m) {
         int r;
 
+        assert(m);
+
+        if (!MANAGER_IS_SYSTEM(m))
+                return;
+
+        /* Skip the notification if nothing changed. */
         if (m->ready_sent && m->status_ready)
-                /* Skip the notification if nothing changed. */
                 return;
 
-        r = sd_notify(false,
+        r = sd_notify(/* unset_environment= */ false,
                       "READY=1\n"
                       "STATUS=Ready.");
         if (r < 0)
@@ -3790,7 +3913,7 @@ static void manager_check_basic_target(Manager *m) {
                 return;
 
         /* For user managers, send out READY=1 as soon as we reach basic.target */
-        user_manager_send_ready(m);
+        manager_send_ready_user_scope(m);
 
         /* Log the taint string as soon as we reach basic.target */
         log_taint_string(m);
@@ -3808,7 +3931,7 @@ void manager_check_finished(Manager *m) {
 
         manager_check_basic_target(m);
 
-        if (hashmap_size(m->jobs) > 0) {
+        if (!hashmap_isempty(m->jobs)) {
                 if (m->jobs_in_progress_event_source)
                         /* Ignore any failure, this is only for feedback */
                         (void) sd_event_source_set_time(m->jobs_in_progress_event_source,
@@ -3821,7 +3944,7 @@ void manager_check_finished(Manager *m) {
         if (hashmap_buckets(m->jobs) > hashmap_size(m->units) / 10)
                 m->jobs = hashmap_free(m->jobs);
 
-        manager_send_ready(m);
+        manager_send_ready_system_scope(m);
 
         /* Notify Type=idle units that we are done now */
         manager_close_idle_pipe(m);
@@ -3851,9 +3974,7 @@ void manager_send_reloading(Manager *m) {
         assert(m);
 
         /* Let whoever invoked us know that we are now reloading */
-        (void) sd_notifyf(/* unset= */ false,
-                          "RELOADING=1\n"
-                          "MONOTONIC_USEC=" USEC_FMT "\n", now(CLOCK_MONOTONIC));
+        (void) notify_reloading_full(/* status = */ NULL);
 
         /* And ensure that we'll send READY=1 again as soon as we are ready again */
         m->ready_sent = false;
@@ -3878,8 +3999,8 @@ static int manager_run_environment_generators(Manager *m) {
         _cleanup_strv_free_ char **paths = NULL;
         void* args[] = {
                 [STDOUT_GENERATE] = &tmp,
-                [STDOUT_COLLECT] = &tmp,
-                [STDOUT_CONSUME] = &m->transient_environment,
+                [STDOUT_COLLECT]  = &tmp,
+                [STDOUT_CONSUME]  = &m->transient_environment,
         };
         int r;
 
@@ -4040,7 +4161,7 @@ static int manager_run_generators(Manager *m) {
 
         /* On some systems /tmp/ doesn't exist, and on some other systems we cannot create it at all. Avoid
          * trying to mount a private tmpfs on it as there's no one size fits all. */
-        if (is_dir("/tmp", /* follow= */ false) > 0)
+        if (is_dir("/tmp", /* follow= */ false) > 0 && !MANAGER_IS_TEST_RUN(m))
                 flags |= FORK_PRIVATE_TMP;
 
         r = safe_fork("(sd-gens)", flags, NULL);
@@ -4373,7 +4494,7 @@ void manager_override_show_status(Manager *m, ShowStatus mode, const char *reaso
         set_show_status_marker(show_status_on(mode));
 }
 
-const char *manager_get_confirm_spawn(Manager *m) {
+const char* manager_get_confirm_spawn(Manager *m) {
         static int last_errno = 0;
         struct stat st;
         int r;
@@ -4478,14 +4599,15 @@ void manager_status_printf(Manager *m, StatusType type, const char *status, cons
         va_end(ap);
 }
 
-Set* manager_get_units_requiring_mounts_for(Manager *m, const char *path) {
+Set* manager_get_units_needing_mounts_for(Manager *m, const char *path, UnitMountDependencyType t) {
         assert(m);
         assert(path);
+        assert(t >= 0 && t < _UNIT_MOUNT_DEPENDENCY_TYPE_MAX);
 
         if (path_equal(path, "/"))
                 path = "";
 
-        return hashmap_get(m->units_requiring_mounts_for, path);
+        return hashmap_get(m->units_needing_mounts_for[t], path);
 }
 
 int manager_update_failed_units(Manager *m, Unit *u, bool failed) {
@@ -4542,7 +4664,7 @@ ManagerState manager_state(Manager *m) {
         }
 
         /* Are there any failed units? If so, we are in degraded mode */
-        if (set_size(m->failed_units) > 0)
+        if (!set_isempty(m->failed_units))
                 return MANAGER_DEGRADED;
 
         return MANAGER_RUNNING;
@@ -4701,20 +4823,19 @@ static void manager_vacuum(Manager *m) {
         exec_shared_runtime_vacuum(m);
 }
 
-int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
         struct buffer {
                 uid_t uid;
                 gid_t gid;
                 char unit_name[UNIT_NAME_MAX+1];
         } _packed_ buffer;
 
-        Manager *m = userdata;
+        Manager *m = ASSERT_PTR(userdata);
         ssize_t l;
         size_t n;
         Unit *u;
 
-        assert_se(source);
-        assert_se(m);
+        assert(source);
 
         /* Invoked whenever a child process succeeded resolving its user/group to use and sent us the
          * resulting UID/GID in a datagram. We parse the datagram here and pass it off to the unit, so that
@@ -4763,76 +4884,71 @@ int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t re
         return 0;
 }
 
-static int short_uid_range(const char *path) {
-        _cleanup_(uid_range_freep) UidRange *p = NULL;
-        int r;
-
-        assert(path);
-
-        /* Taint systemd if we the UID range assigned to this environment doesn't at least cover 0…65534,
-         * i.e. from root to nobody. */
-
-        r = uid_range_load_userns(&p, path);
-        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
-                return false;
-        if (r < 0)
-                return log_debug_errno(r, "Failed to load %s: %m", path);
-
-        return !uid_range_covers(p, 0, 65535);
-}
-
-char* manager_taint_string(const Manager *m) {
-        /* Returns a "taint string", e.g. "local-hwclock:var-run-bad". Only things that are detected at
-         * runtime should be tagged here. For stuff that is known during compilation, emit a warning in the
-         * configuration phase. */
-
-        assert(m);
-
-        const char* stage[12] = {};
-        size_t n = 0;
-
-        _cleanup_free_ char *usrbin = NULL;
-        if (readlink_malloc("/bin", &usrbin) < 0 || !PATH_IN_SET(usrbin, "usr/bin", "/usr/bin"))
-                stage[n++] = "unmerged-usr";
+static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+        Manager *m = ASSERT_PTR(userdata);
+        usec_t ts[2] = {};
+        CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
+        struct msghdr msghdr = {
+                .msg_iov = &IOVEC_MAKE(ts, sizeof(ts)),
+                .msg_iovlen = 1,
+                .msg_control = &control,
+                .msg_controllen = sizeof(control),
+        };
+        ssize_t n;
 
-        if (access("/proc/cgroups", F_OK) < 0)
-                stage[n++] = "cgroups-missing";
+        assert(source);
 
-        if (cg_all_unified() == 0)
-                stage[n++] = "cgroupsv1";
+        n = recvmsg_safe(m->handoff_timestamp_fds[0], &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
+        if (ERRNO_IS_NEG_TRANSIENT(n))
+                return 0; /* Spurious wakeup, try again */
+        if (n == -EXFULL) {
+                log_warning("Got message with truncated control, ignoring.");
+                return 0;
+        }
+        if (n < 0)
+                return log_error_errno(n, "Failed to receive handoff timestamp message: %m");
 
-        if (clock_is_localtime(NULL) > 0)
-                stage[n++] = "local-hwclock";
+        if (msghdr.msg_flags & MSG_TRUNC) {
+                log_warning("Got truncated handoff timestamp message, ignoring.");
+                return 0;
+        }
+        if (n != sizeof(ts)) {
+                log_warning("Got handoff timestamp message of unexpected size %zi (expected %zu), ignoring.", n, sizeof(ts));
+                return 0;
+        }
 
-        if (os_release_support_ended(NULL, /* quiet= */ true, NULL) > 0)
-                stage[n++] = "support-ended";
+        struct ucred *ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
+        if (!ucred || !pid_is_valid(ucred->pid)) {
+                log_warning("Received notify message without valid credentials. Ignoring.");
+                return 0;
+        }
 
-        _cleanup_free_ char *destination = NULL;
-        if (readlink_malloc("/var/run", &destination) < 0 ||
-            !PATH_IN_SET(destination, "../run", "/run"))
-                stage[n++] = "var-run-bad";
+        log_debug("Got handoff timestamp event for PID " PID_FMT ".", ucred->pid);
 
-        _cleanup_free_ char *overflowuid = NULL, *overflowgid = NULL;
-        if (read_one_line_file("/proc/sys/kernel/overflowuid", &overflowuid) >= 0 &&
-            !streq(overflowuid, "65534"))
-                stage[n++] = "overflowuid-not-65534";
-        if (read_one_line_file("/proc/sys/kernel/overflowgid", &overflowgid) >= 0 &&
-            !streq(overflowgid, "65534"))
-                stage[n++] = "overflowgid-not-65534";
+        _cleanup_free_ Unit **units = NULL;
+        int n_units = manager_get_units_for_pidref(m, &PIDREF_MAKE_FROM_PID(ucred->pid), &units);
+        if (n_units < 0) {
+                log_warning_errno(n_units, "Unable to determine units for PID " PID_FMT ", ignoring: %m", ucred->pid);
+                return 0;
+        }
+        if (n_units == 0) {
+                log_debug("Got handoff timestamp for process " PID_FMT " we are not interested in, ignoring.", ucred->pid);
+                return 0;
+        }
 
-        struct utsname uts;
-        assert_se(uname(&uts) >= 0);
-        if (strverscmp_improved(uts.release, KERNEL_BASELINE_VERSION) < 0)
-                stage[n++] = "old-kernel";
+        dual_timestamp dt = {
+                .realtime = ts[0],
+                .monotonic = ts[1],
+        };
 
-        if (short_uid_range("/proc/self/uid_map") > 0)
-                stage[n++] = "short-uid-range";
-        if (short_uid_range("/proc/self/gid_map") > 0)
-                stage[n++] = "short-gid-range";
+        FOREACH_ARRAY(u, units, n_units) {
+                if (!UNIT_VTABLE(*u)->notify_handoff_timestamp)
+                        continue;
 
-        assert(n < ELEMENTSOF(stage) - 1);  /* One extra for NULL terminator */
+                UNIT_VTABLE(*u)->notify_handoff_timestamp(*u, ucred, &dt);
+        }
 
-        return strv_join((char**) stage, ":");
+        return 0;
 }
 
 void manager_ref_console(Manager *m) {
@@ -4988,14 +5104,13 @@ LogTarget manager_get_executor_log_target(Manager *m) {
         assert(m);
 
         /* If journald is not available tell sd-executor to go to kmsg, as it might be starting journald */
+        if (!MANAGER_IS_TEST_RUN(m) && !manager_journal_is_running(m))
+                return LOG_TARGET_KMSG;
 
-        if (manager_journal_is_running(m))
-                return log_get_target();
-
-        return LOG_TARGET_KMSG;
+        return log_get_target();
 }
 
-static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
+static const char* const manager_state_table[_MANAGER_STATE_MAX] = {
         [MANAGER_INITIALIZING] = "initializing",
         [MANAGER_STARTING]     = "starting",
         [MANAGER_RUNNING]      = "running",
@@ -5006,7 +5121,22 @@ static const char *const manager_state_table[_MANAGER_STATE_MAX] = {
 
 DEFINE_STRING_TABLE_LOOKUP(manager_state, ManagerState);
 
-static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = {
+static const char* const manager_objective_table[_MANAGER_OBJECTIVE_MAX] = {
+        [MANAGER_OK]          = "ok",
+        [MANAGER_EXIT]        = "exit",
+        [MANAGER_RELOAD]      = "reload",
+        [MANAGER_REEXECUTE]   = "reexecute",
+        [MANAGER_REBOOT]      = "reboot",
+        [MANAGER_SOFT_REBOOT] = "soft-reboot",
+        [MANAGER_POWEROFF]    = "poweroff",
+        [MANAGER_HALT]        = "halt",
+        [MANAGER_KEXEC]       = "kexec",
+        [MANAGER_SWITCH_ROOT] = "switch-root",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(manager_objective, ManagerObjective);
+
+static const char* const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = {
         [MANAGER_TIMESTAMP_FIRMWARE]                 = "firmware",
         [MANAGER_TIMESTAMP_LOADER]                   = "loader",
         [MANAGER_TIMESTAMP_KERNEL]                   = "kernel",
@@ -5026,6 +5156,7 @@ static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = {
         [MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH] = "initrd-generators-finish",
         [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START]  = "initrd-units-load-start",
         [MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH] = "initrd-units-load-finish",
+        [MANAGER_TIMESTAMP_SHUTDOWN_START]           = "shutdown-start",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(manager_timestamp, ManagerTimestamp);
diff --git a/src/core/manager.h b/src/core/manager.h
index d96eb7b..0641b27 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -120,6 +120,9 @@ typedef enum ManagerTimestamp {
         MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH,
         MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START,
         MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH,
+
+        MANAGER_TIMESTAMP_SHUTDOWN_START,
+
         _MANAGER_TIMESTAMP_MAX,
         _MANAGER_TIMESTAMP_INVALID = -EINVAL,
 } ManagerTimestamp;
@@ -137,6 +140,7 @@ typedef enum WatchdogType {
 #include "path-lookup.h"
 #include "show-status.h"
 #include "unit-name.h"
+#include "unit.h"
 
 typedef enum ManagerTestRunFlags {
         MANAGER_TEST_NORMAL                  = 0,       /* run normally */
@@ -282,6 +286,9 @@ struct Manager {
         int user_lookup_fds[2];
         sd_event_source *user_lookup_event_source;
 
+        int handoff_timestamp_fds[2];
+        sd_event_source *handoff_timestamp_event_source;
+
         RuntimeScope runtime_scope;
 
         LookupPaths lookup_paths;
@@ -375,6 +382,8 @@ struct Manager {
         bool etc_localtime_accessible;
 
         ManagerObjective objective;
+        /* Objective as it was before serialization, mostly to detect soft-reboots */
+        ManagerObjective previous_objective;
 
         /* Flags */
         bool dispatching_load_queue;
@@ -438,10 +447,9 @@ struct Manager {
         /* This is true before and after switching root. */
         bool switching_root;
 
-        /* This maps all possible path prefixes to the units needing
-         * them. It's a hashmap with a path string as key and a Set as
-         * value where Unit objects are contained. */
-        Hashmap *units_requiring_mounts_for;
+        /* These map all possible path prefixes to the units needing them. They are hashmaps with a path
+         * string as key, and a Set as value where Unit objects are contained. */
+        Hashmap *units_needing_mounts_for[_UNIT_MOUNT_DEPENDENCY_TYPE_MAX];
 
         /* Used for processing polkit authorization responses */
         Hashmap *polkit_registry;
@@ -488,8 +496,8 @@ struct Manager {
         /* Reference to RestrictFileSystems= BPF program */
         struct restrict_fs_bpf *restrict_fs;
 
-        /* Allow users to configure a rate limit for Reload() operations */
-        RateLimit reload_ratelimit;
+        /* Allow users to configure a rate limit for Reload()/Reexecute() operations */
+        RateLimit reload_reexec_ratelimit;
         /* Dump*() are slow, so always rate limit them to 10 per 10 minutes */
         RateLimit dump_ratelimit;
 
@@ -501,6 +509,8 @@ struct Manager {
         /* Pin the systemd-executor binary, so that it never changes until re-exec, ensuring we don't have
          * serialization/deserialization compatibility issues during upgrades. */
         int executor_fd;
+
+        unsigned soft_reboots_count;
 };
 
 static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
@@ -550,7 +560,7 @@ int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error
 
 void manager_clear_jobs(Manager *m);
 
-void manager_unwatch_pidref(Manager *m, PidRef *pid);
+void manager_unwatch_pidref(Manager *m, const PidRef *pid);
 
 unsigned manager_dispatch_load_queue(Manager *m);
 
@@ -575,6 +585,7 @@ void manager_reset_failed(Manager *m);
 
 void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success);
 void manager_send_unit_plymouth(Manager *m, Unit *u);
+void manager_send_unit_supervisor(Manager *m, Unit *u, bool active);
 
 bool manager_unit_inactive_or_pending(Manager *m, const char *name);
 
@@ -596,7 +607,7 @@ double manager_get_progress(Manager *m);
 
 void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) _printf_(4,5);
 
-Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path);
+Set* manager_get_units_needing_mounts_for(Manager *m, const char *path, UnitMountDependencyType t);
 
 ManagerState manager_state(Manager *m);
 
@@ -608,8 +619,6 @@ int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc);
 void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now);
 int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc);
 
-char* manager_taint_string(const Manager *m);
-
 void manager_ref_console(Manager *m);
 void manager_unref_console(Manager *m);
 
@@ -619,13 +628,16 @@ void manager_restore_original_log_level(Manager *m);
 void manager_override_log_target(Manager *m, LogTarget target);
 void manager_restore_original_log_target(Manager *m);
 
-const char *manager_state_to_string(ManagerState m) _const_;
+const char* manager_get_confirm_spawn(Manager *m);
+void manager_disable_confirm_spawn(void);
+
+const char* manager_state_to_string(ManagerState m) _const_;
 ManagerState manager_state_from_string(const char *s) _pure_;
 
-const char *manager_get_confirm_spawn(Manager *m);
-void manager_disable_confirm_spawn(void);
+const char* manager_objective_to_string(ManagerObjective m) _const_;
+ManagerObjective manager_objective_from_string(const char *s) _pure_;
 
-const char *manager_timestamp_to_string(ManagerTimestamp m) _const_;
+const char* manager_timestamp_to_string(ManagerTimestamp m) _const_;
 ManagerTimestamp manager_timestamp_from_string(const char *s) _pure_;
 ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
 
@@ -644,3 +656,26 @@ OOMPolicy oom_policy_from_string(const char *s) _pure_;
 
 void unit_defaults_init(UnitDefaults *defaults, RuntimeScope scope);
 void unit_defaults_done(UnitDefaults *defaults);
+
+enum {
+        /* most important … */
+        EVENT_PRIORITY_USER_LOOKUP       = SD_EVENT_PRIORITY_NORMAL-11,
+        EVENT_PRIORITY_MOUNT_TABLE       = SD_EVENT_PRIORITY_NORMAL-10,
+        EVENT_PRIORITY_SWAP_TABLE        = SD_EVENT_PRIORITY_NORMAL-10,
+        EVENT_PRIORITY_CGROUP_AGENT      = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv1 */
+        EVENT_PRIORITY_CGROUP_INOTIFY    = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv2 */
+        EVENT_PRIORITY_CGROUP_OOM        = SD_EVENT_PRIORITY_NORMAL-8,
+        EVENT_PRIORITY_HANDOFF_TIMESTAMP = SD_EVENT_PRIORITY_NORMAL-7,
+        EVENT_PRIORITY_EXEC_FD           = SD_EVENT_PRIORITY_NORMAL-6,
+        EVENT_PRIORITY_NOTIFY            = SD_EVENT_PRIORITY_NORMAL-5,
+        EVENT_PRIORITY_SIGCHLD           = SD_EVENT_PRIORITY_NORMAL-4,
+        EVENT_PRIORITY_SIGNALS           = SD_EVENT_PRIORITY_NORMAL-3,
+        EVENT_PRIORITY_CGROUP_EMPTY      = SD_EVENT_PRIORITY_NORMAL-2,
+        EVENT_PRIORITY_TIME_CHANGE       = SD_EVENT_PRIORITY_NORMAL-1,
+        EVENT_PRIORITY_TIME_ZONE         = SD_EVENT_PRIORITY_NORMAL-1,
+        EVENT_PRIORITY_IPC               = SD_EVENT_PRIORITY_NORMAL,
+        EVENT_PRIORITY_REWATCH_PIDS      = SD_EVENT_PRIORITY_IDLE,
+        EVENT_PRIORITY_SERVICE_WATCHDOG  = SD_EVENT_PRIORITY_IDLE+1,
+        EVENT_PRIORITY_RUN_QUEUE         = SD_EVENT_PRIORITY_IDLE+2,
+        /* … to least important */
+};
diff --git a/src/core/meson.build b/src/core/meson.build
index 7701d3d..7a2012a 100644
--- a/src/core/meson.build
+++ b/src/core/meson.build
@@ -7,7 +7,8 @@ libcore_sources = files(
         'bpf-devices.c',
         'bpf-firewall.c',
         'bpf-foreign.c',
-        'bpf-lsm.c',
+        'bpf-restrict-fs.c',
+        'bpf-restrict-ifaces.c',
         'bpf-socket-bind.c',
         'cgroup.c',
         'core-varlink.c',
@@ -51,7 +52,6 @@ libcore_sources = files(
         'mount.c',
         'namespace.c',
         'path.c',
-        'restrict-ifaces.c',
         'scope.c',
         'selinux-access.c',
         'selinux-setup.c',
@@ -61,6 +61,7 @@ libcore_sources = files(
         'smack-setup.c',
         'socket.c',
         'swap.c',
+        'taint.c',
         'target.c',
         'timer.c',
         'transaction.c',
@@ -125,7 +126,7 @@ libcore = shared_library(
                         libaudit,
                         libblkid,
                         libdl,
-                        libkmod,
+                        libkmod_cflags,
                         libm,
                         libmount,
                         libpam,
diff --git a/src/core/mount.c b/src/core/mount.c
index 3c4971c..ebafcaf 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -39,18 +39,18 @@
 #define RETRY_UMOUNT_MAX 32
 
 static const UnitActiveState state_translation_table[_MOUNT_STATE_MAX] = {
-        [MOUNT_DEAD] = UNIT_INACTIVE,
-        [MOUNT_MOUNTING] = UNIT_ACTIVATING,
-        [MOUNT_MOUNTING_DONE] = UNIT_ACTIVATING,
-        [MOUNT_MOUNTED] = UNIT_ACTIVE,
-        [MOUNT_REMOUNTING] = UNIT_RELOADING,
-        [MOUNT_UNMOUNTING] = UNIT_DEACTIVATING,
+        [MOUNT_DEAD]               = UNIT_INACTIVE,
+        [MOUNT_MOUNTING]           = UNIT_ACTIVATING,
+        [MOUNT_MOUNTING_DONE]      = UNIT_ACTIVATING,
+        [MOUNT_MOUNTED]            = UNIT_ACTIVE,
+        [MOUNT_REMOUNTING]         = UNIT_RELOADING,
+        [MOUNT_UNMOUNTING]         = UNIT_DEACTIVATING,
         [MOUNT_REMOUNTING_SIGTERM] = UNIT_RELOADING,
         [MOUNT_REMOUNTING_SIGKILL] = UNIT_RELOADING,
         [MOUNT_UNMOUNTING_SIGTERM] = UNIT_DEACTIVATING,
         [MOUNT_UNMOUNTING_SIGKILL] = UNIT_DEACTIVATING,
-        [MOUNT_FAILED] = UNIT_FAILED,
-        [MOUNT_CLEANING] = UNIT_MAINTENANCE,
+        [MOUNT_FAILED]             = UNIT_FAILED,
+        [MOUNT_CLEANING]           = UNIT_MAINTENANCE,
 };
 
 static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
@@ -171,24 +171,9 @@ static bool mount_propagate_stop(Mount *m) {
                                   * otherwise let's not bother. */
 }
 
-static bool mount_needs_quota(const MountParameters *p) {
-        assert(p);
-
-        if (p->fstype && !fstype_needs_quota(p->fstype))
-                return false;
-
-        if (mount_is_bind(p))
-                return false;
-
-        return fstab_test_option(p->options,
-                                 "usrquota\0" "grpquota\0" "quota\0" "usrjquota\0" "grpjquota\0");
-}
-
 static void mount_init(Unit *u) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
-        assert(m);
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         m->timeout_usec = u->manager->defaults.timeout_start_usec;
@@ -218,12 +203,7 @@ static int mount_arm_timer(Mount *m, bool relative, usec_t usec) {
 
 static void mount_unwatch_control_pid(Mount *m) {
         assert(m);
-
-        if (!pidref_is_set(&m->control_pid))
-                return;
-
-        unit_unwatch_pidref(UNIT(m), &m->control_pid);
-        pidref_done(&m->control_pid);
+        unit_unwatch_pidref_done(UNIT(m), &m->control_pid);
 }
 
 static void mount_parameters_done(MountParameters *p) {
@@ -235,9 +215,7 @@ static void mount_parameters_done(MountParameters *p) {
 }
 
 static void mount_done(Unit *u) {
-        Mount *m = MOUNT(u);
-
-        assert(m);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
         m->where = mfree(m->where);
 
@@ -245,6 +223,7 @@ static void mount_done(Unit *u) {
         mount_parameters_done(&m->parameters_fragment);
 
         m->exec_runtime = exec_runtime_free(m->exec_runtime);
+
         exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
         m->control_command = NULL;
 
@@ -262,6 +241,8 @@ static int update_parameters_proc_self_mountinfo(
         MountParameters *p;
         int r, q, w;
 
+        assert(m);
+
         p = &m->parameters_proc_self_mountinfo;
 
         r = free_and_strdup(&p->what, what);
@@ -281,8 +262,6 @@ static int update_parameters_proc_self_mountinfo(
 
 static int mount_add_mount_dependencies(Mount *m) {
         MountParameters *pm;
-        Unit *other;
-        Set *s;
         int r;
 
         assert(m);
@@ -296,7 +275,7 @@ static int mount_add_mount_dependencies(Mount *m) {
                 if (r < 0)
                         return r;
 
-                r = unit_require_mounts_for(UNIT(m), parent, UNIT_DEPENDENCY_IMPLICIT);
+                r = unit_add_mounts_for(UNIT(m), parent, UNIT_DEPENDENCY_IMPLICIT, UNIT_MOUNT_REQUIRES);
                 if (r < 0)
                         return r;
         }
@@ -308,30 +287,43 @@ static int mount_add_mount_dependencies(Mount *m) {
             path_is_absolute(pm->what) &&
             (mount_is_bind(pm) || mount_is_loop(pm) || !mount_is_network(pm))) {
 
-                r = unit_require_mounts_for(UNIT(m), pm->what, UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(UNIT(m), pm->what, UNIT_DEPENDENCY_FILE, UNIT_MOUNT_REQUIRES);
                 if (r < 0)
                         return r;
         }
 
         /* Adds in dependencies to other units that use this path or paths further down in the hierarchy */
-        s = manager_get_units_requiring_mounts_for(UNIT(m)->manager, m->where);
-        SET_FOREACH(other, s) {
-
-                if (other->load_state != UNIT_LOADED)
-                        continue;
-
-                if (other == UNIT(m))
-                        continue;
-
-                r = unit_add_dependency(other, UNIT_AFTER, UNIT(m), true, UNIT_DEPENDENCY_PATH);
-                if (r < 0)
-                        return r;
-
-                if (UNIT(m)->fragment_path) {
-                        /* If we have fragment configuration, then make this dependency required */
-                        r = unit_add_dependency(other, UNIT_REQUIRES, UNIT(m), true, UNIT_DEPENDENCY_PATH);
+        for (UnitMountDependencyType t = 0; t < _UNIT_MOUNT_DEPENDENCY_TYPE_MAX; ++t) {
+                Unit *other;
+                Set *s = manager_get_units_needing_mounts_for(UNIT(m)->manager, m->where, t);
+
+                SET_FOREACH(other, s) {
+                        if (other->load_state != UNIT_LOADED)
+                                continue;
+
+                        if (other == UNIT(m))
+                                continue;
+
+                        r = unit_add_dependency(
+                                        other,
+                                        UNIT_AFTER,
+                                        UNIT(m),
+                                        /* add_reference= */ true,
+                                        UNIT_DEPENDENCY_PATH);
                         if (r < 0)
                                 return r;
+
+                        if (UNIT(m)->fragment_path) {
+                                /* If we have fragment configuration, then make this dependency required/wanted */
+                                r = unit_add_dependency(
+                                                other,
+                                                unit_mount_dependency_type_to_dependency_type(t),
+                                                UNIT(m),
+                                                /* add_reference= */ true,
+                                                UNIT_DEPENDENCY_PATH);
+                                if (r < 0)
+                                        return r;
+                        }
                 }
         }
 
@@ -413,39 +405,9 @@ static int mount_add_device_dependencies(Mount *m) {
         return 0;
 }
 
-static int mount_add_quota_dependencies(Mount *m) {
-        MountParameters *p;
-        int r;
-
-        assert(m);
-
-        if (!MANAGER_IS_SYSTEM(UNIT(m)->manager))
-                return 0;
-
-        p = get_mount_parameters_fragment(m);
-        if (!p)
-                return 0;
-
-        if (!mount_needs_quota(p))
-                return 0;
-
-        r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTACHECK_SERVICE,
-                                              /* add_reference= */ true, UNIT_DEPENDENCY_FILE);
-        if (r < 0)
-                return r;
-
-        r = unit_add_two_dependencies_by_name(UNIT(m), UNIT_BEFORE, UNIT_WANTS, SPECIAL_QUOTAON_SERVICE,
-                                              /* add_reference= */true, UNIT_DEPENDENCY_FILE);
-        if (r < 0)
-                return r;
-
-        return 0;
-}
-
 static bool mount_is_extrinsic(Unit *u) {
+        Mount *m = ASSERT_PTR(MOUNT(u));
         MountParameters *p;
-        Mount *m = MOUNT(u);
-        assert(m);
 
         /* Returns true for all units that are "magic" and should be excluded from the usual
          * start-up and shutdown dependencies. We call them "extrinsic" here, as they are generally
@@ -501,10 +463,7 @@ static int mount_add_default_ordering_dependencies(Mount *m, MountParameters *p,
                 after = SPECIAL_LOCAL_FS_PRE_TARGET;
                 before = SPECIAL_INITRD_USR_FS_TARGET;
 
-        } else if (mount_is_credentials(m))
-                after = before = NULL;
-
-        else if (mount_is_network(p)) {
+        } else if (mount_is_network(p)) {
                 after = SPECIAL_REMOTE_FS_PRE_TARGET;
                 before = SPECIAL_REMOTE_FS_TARGET;
 
@@ -645,6 +604,9 @@ static int mount_add_non_exec_dependencies(Mount *m) {
         if (!m->where)
                 return 0;
 
+        if (mount_is_credentials(m))
+                UNIT(m)->default_dependencies = false;
+
         /* Adds in all dependencies directly responsible for ordering the mount, as opposed to dependencies
          * resulting from the ExecContext and such. */
 
@@ -656,10 +618,6 @@ static int mount_add_non_exec_dependencies(Mount *m) {
         if (r < 0)
                 return r;
 
-        r = mount_add_quota_dependencies(m);
-        if (r < 0)
-                return r;
-
         r = mount_add_default_dependencies(m);
         if (r < 0)
                 return r;
@@ -668,11 +626,9 @@ static int mount_add_non_exec_dependencies(Mount *m) {
 }
 
 static int mount_add_extras(Mount *m) {
-        Unit *u = UNIT(m);
+        Unit *u = UNIT(ASSERT_PTR(m));
         int r;
 
-        assert(m);
-
         /* Note: this call might be called after we already have been loaded once (and even when it has already been
          * activated), in case data from /proc/self/mountinfo has changed. This means all code here needs to be ready
          * to run with an already set up unit. */
@@ -717,7 +673,7 @@ static int mount_add_extras(Mount *m) {
 }
 
 static void mount_load_root_mount(Unit *u) {
-        assert(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
         if (!unit_has_name(u, SPECIAL_ROOT_MOUNT))
                 return;
@@ -726,37 +682,35 @@ static void mount_load_root_mount(Unit *u) {
         u->default_dependencies = false;
 
         /* The stdio/kmsg bridge socket is on /, in order to avoid a dep loop, don't use kmsg logging for -.mount */
-        MOUNT(u)->exec_context.std_output = EXEC_OUTPUT_NULL;
-        MOUNT(u)->exec_context.std_input = EXEC_INPUT_NULL;
+        m->exec_context.std_output = EXEC_OUTPUT_NULL;
+        m->exec_context.std_input = EXEC_INPUT_NULL;
 
         if (!u->description)
                 u->description = strdup("Root Mount");
 }
 
 static int mount_load(Unit *u) {
-        Mount *m = MOUNT(u);
-        int r, q = 0;
+        Mount *m = ASSERT_PTR(MOUNT(u));
+        int r;
 
-        assert(m);
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         mount_load_root_mount(u);
 
-        bool fragment_optional = m->from_proc_self_mountinfo || u->perpetual;
-        r = unit_load_fragment_and_dropin(u, !fragment_optional);
+        bool from_kernel = m->from_proc_self_mountinfo || u->perpetual;
+
+        r = unit_load_fragment_and_dropin(u, /* fragment_required = */ !from_kernel);
 
         /* Add in some extras. Note we do this in all cases (even if we failed to load the unit) when announced by the
          * kernel, because we need some things to be set up no matter what when the kernel establishes a mount and thus
          * we need to update the state in our unit to track it. After all, consider that we don't allow changing the
          * 'slice' field for a unit once it is active. */
-        if (u->load_state == UNIT_LOADED || m->from_proc_self_mountinfo || u->perpetual)
-                q = mount_add_extras(m);
+        if (u->load_state == UNIT_LOADED || from_kernel)
+                RET_GATHER(r, mount_add_extras(m));
 
         if (r < 0)
                 return r;
-        if (q < 0)
-                return q;
+
         if (u->load_state != UNIT_LOADED)
                 return 0;
 
@@ -765,6 +719,7 @@ static int mount_load(Unit *u) {
 
 static void mount_set_state(Mount *m, MountState state) {
         MountState old_state;
+
         assert(m);
 
         if (m->state != state)
@@ -787,10 +742,9 @@ static void mount_set_state(Mount *m, MountState state) {
 }
 
 static int mount_coldplug(Unit *u) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
         int r;
 
-        assert(m);
         assert(m->state == MOUNT_DEAD);
 
         if (m->deserialized_state == m->state)
@@ -809,17 +763,17 @@ static int mount_coldplug(Unit *u) {
                         return r;
         }
 
-        if (!IN_SET(m->deserialized_state, MOUNT_DEAD, MOUNT_FAILED))
+        if (!IN_SET(m->deserialized_state, MOUNT_DEAD, MOUNT_FAILED)) {
                 (void) unit_setup_exec_runtime(u);
+                (void) unit_setup_cgroup_runtime(u);
+        }
 
         mount_set_state(m, m->deserialized_state);
         return 0;
 }
 
 static void mount_catchup(Unit *u) {
-        Mount *m = MOUNT(ASSERT_PTR(u));
-
-        assert(m);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
         /* Adjust the deserialized state. See comments in mount_process_proc_self_mountinfo(). */
         if (m->from_proc_self_mountinfo)
@@ -854,12 +808,15 @@ static void mount_catchup(Unit *u) {
 }
 
 static void mount_dump(Unit *u, FILE *f, const char *prefix) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
         MountParameters *p;
+        const char *prefix2;
 
-        assert(m);
         assert(f);
 
+        prefix = strempty(prefix);
+        prefix2 = strjoina(prefix, "\t");
+
         p = get_mount_parameters(m);
 
         fprintf(f,
@@ -904,14 +861,22 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
         exec_context_dump(&m->exec_context, f, prefix);
         kill_context_dump(&m->kill_context, f, prefix);
         cgroup_context_dump(UNIT(m), f, prefix);
+
+        for (MountExecCommand c = 0; c < _MOUNT_EXEC_COMMAND_MAX; c++) {
+                if (!m->exec_command[c].argv)
+                        continue;
+
+                fprintf(f, "%s%s %s:\n",
+                        prefix, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), mount_exec_command_to_string(c));
+
+                exec_command_dump(m->exec_command + c, f, prefix2);
+        }
 }
 
 static int mount_spawn(Mount *m, ExecCommand *c, PidRef *ret_pid) {
-
         _cleanup_(exec_params_shallow_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(
                         EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN);
         _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
-        pid_t pid;
         int r;
 
         assert(m);
@@ -936,11 +901,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, PidRef *ret_pid) {
                        &exec_params,
                        m->exec_runtime,
                        &m->cgroup_context,
-                       &pid);
-        if (r < 0)
-                return r;
-
-        r = pidref_set_pid(&pidref, pid);
+                       &pidref);
         if (r < 0)
                 return r;
 
@@ -1025,13 +986,7 @@ static void mount_enter_signal(Mount *m, MountState state, MountResult f) {
         if (m->result == MOUNT_SUCCESS)
                 m->result = f;
 
-        r = unit_kill_context(
-                        UNIT(m),
-                        &m->kill_context,
-                        state_to_kill_operation(state),
-                        /* main_pid= */ NULL,
-                        &m->control_pid,
-                        /* main_pid_alien= */ false);
+        r = unit_kill_context(UNIT(m), state_to_kill_operation(state));
         if (r < 0) {
                 log_unit_warning_errno(UNIT(m), r, "Failed to kill processes: %m");
                 goto fail;
@@ -1166,9 +1121,9 @@ static int mount_set_mount_command(Mount *m, ExecCommand *c, const MountParamete
 }
 
 static void mount_enter_mounting(Mount *m) {
-        int r;
         MountParameters *p;
         bool source_is_dir = true;
+        int r;
 
         assert(m);
 
@@ -1192,6 +1147,34 @@ static void mount_enter_mounting(Mount *m) {
         if (r < 0 && r != -EEXIST)
                 log_unit_warning_errno(UNIT(m), r, "Failed to create mount point '%s', ignoring: %m", m->where);
 
+        /* If we are asked to create an OverlayFS, create the upper/work directories if they are missing */
+        if (p && streq_ptr(p->fstype, "overlay")) {
+                _cleanup_strv_free_ char **dirs = NULL;
+
+                r = fstab_filter_options(
+                                p->options,
+                                "upperdir\0workdir\0",
+                                /* ret_namefound= */ NULL,
+                                /* ret_value= */ NULL,
+                                &dirs,
+                                /* ret_filtered= */ NULL);
+                if (r < 0)
+                        log_unit_warning_errno(
+                                        UNIT(m),
+                                        r,
+                                        "Failed to determine upper directory for OverlayFS, ignoring: %m");
+                else
+                        STRV_FOREACH(d, dirs) {
+                                r = mkdir_p_label(*d, m->directory_mode);
+                                if (r < 0 && r != -EEXIST)
+                                        log_unit_warning_errno(
+                                                        UNIT(m),
+                                                        r,
+                                                        "Failed to create overlay directory '%s', ignoring: %m",
+                                                        *d);
+                        }
+        }
+
         if (source_is_dir)
                 unit_warn_if_dir_nonempty(UNIT(m), m->where);
         unit_warn_leftover_processes(UNIT(m), unit_log_leftover_process_start);
@@ -1249,8 +1232,8 @@ static void mount_set_reload_result(Mount *m, MountResult result) {
 }
 
 static void mount_enter_remounting(Mount *m) {
-        int r;
         MountParameters *p;
+        int r;
 
         assert(m);
 
@@ -1312,15 +1295,15 @@ static void mount_cycle_clear(Mount *m) {
         m->result = MOUNT_SUCCESS;
         m->reload_result = MOUNT_SUCCESS;
         exec_command_reset_status_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
-        UNIT(m)->reset_accounting = true;
+
+        if (m->cgroup_runtime)
+                m->cgroup_runtime->reset_accounting = true;
 }
 
 static int mount_start(Unit *u) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
         int r;
 
-        assert(m);
-
         /* We cannot fulfill this request right now, try again later
          * please! */
         if (IN_SET(m->state,
@@ -1347,9 +1330,7 @@ static int mount_start(Unit *u) {
 }
 
 static int mount_stop(Unit *u) {
-        Mount *m = MOUNT(u);
-
-        assert(m);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
         /* When we directly call umount() for a path, then the state of the corresponding mount unit may be
          * outdated. Let's re-read mountinfo now and update the state. */
@@ -1401,9 +1382,8 @@ static int mount_stop(Unit *u) {
 }
 
 static int mount_reload(Unit *u) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
-        assert(m);
         assert(m->state == MOUNT_MOUNTED);
 
         mount_enter_remounting(m);
@@ -1412,9 +1392,8 @@ static int mount_reload(Unit *u) {
 }
 
 static int mount_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
-        assert(m);
         assert(f);
         assert(fds);
 
@@ -1431,11 +1410,9 @@ static int mount_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int mount_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
         int r;
 
-        assert(m);
-        assert(u);
         assert(key);
         assert(value);
         assert(fds);
@@ -1495,21 +1472,19 @@ static int mount_deserialize_item(Unit *u, const char *key, const char *value, F
 }
 
 static UnitActiveState mount_active_state(Unit *u) {
-        assert(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
-        return state_translation_table[MOUNT(u)->state];
+        return state_translation_table[m->state];
 }
 
 static const char *mount_sub_state_to_string(Unit *u) {
-        assert(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
-        return mount_state_to_string(MOUNT(u)->state);
+        return mount_state_to_string(m->state);
 }
 
 static bool mount_may_gc(Unit *u) {
-        Mount *m = MOUNT(u);
-
-        assert(m);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
         if (m->from_proc_self_mountinfo)
                 return false;
@@ -1518,10 +1493,9 @@ static bool mount_may_gc(Unit *u) {
 }
 
 static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
         MountResult f;
 
-        assert(m);
         assert(pid >= 0);
 
         if (pid != m->control_pid.pid)
@@ -1653,9 +1627,8 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 }
 
 static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
-        Mount *m = MOUNT(userdata);
+        Mount *m = ASSERT_PTR(MOUNT(userdata));
 
-        assert(m);
         assert(m->timer_event_source == source);
 
         switch (m->state) {
@@ -1738,6 +1711,7 @@ static int mount_setup_new_unit(
                 Unit **ret) {
 
         _cleanup_(unit_freep) Unit *u = NULL;
+        Mount *mnt;
         int r;
 
         assert(m);
@@ -1749,24 +1723,26 @@ static int mount_setup_new_unit(
         if (r < 0)
                 return r;
 
+        mnt = ASSERT_PTR(MOUNT(u));
+
         r = free_and_strdup(&u->source_path, "/proc/self/mountinfo");
         if (r < 0)
                 return r;
 
-        r = free_and_strdup(&MOUNT(u)->where, where);
+        r = free_and_strdup(&mnt->where, where);
         if (r < 0)
                 return r;
 
-        r = update_parameters_proc_self_mountinfo(MOUNT(u), what, options, fstype);
+        r = update_parameters_proc_self_mountinfo(mnt, what, options, fstype);
         if (r < 0)
                 return r;
 
         /* This unit was generated because /proc/self/mountinfo reported it. Remember this, so that by the
          * time we load the unit file for it (and thus add in extra deps right after) we know what source to
          * attributes the deps to. */
-        MOUNT(u)->from_proc_self_mountinfo = true;
+        mnt->from_proc_self_mountinfo = true;
 
-        r = mount_add_non_exec_dependencies(MOUNT(u));
+        r = mount_add_non_exec_dependencies(mnt);
         if (r < 0)
                 return r;
 
@@ -1787,14 +1763,16 @@ static int mount_setup_existing_unit(
                 const char *fstype,
                 MountProcFlags *ret_flags) {
 
+        Mount *m = ASSERT_PTR(MOUNT(u));
         int r;
 
         assert(u);
+        assert(where);
         assert(ret_flags);
 
-        if (!MOUNT(u)->where) {
-                MOUNT(u)->where = strdup(where);
-                if (!MOUNT(u)->where)
+        if (!m->where) {
+                m->where = strdup(where);
+                if (!m->where)
                         return -ENOMEM;
         }
 
@@ -1802,10 +1780,9 @@ static int mount_setup_existing_unit(
          * for the current unit. Note that the flags field is reset on each iteration of reading
          * /proc/self/mountinfo, hence we know for sure anything already set here is from the current
          * iteration and thus worthy of taking into account. */
-        MountProcFlags flags =
-                MOUNT(u)->proc_flags | MOUNT_PROC_IS_MOUNTED;
+        MountProcFlags flags = m->proc_flags | MOUNT_PROC_IS_MOUNTED;
 
-        r = update_parameters_proc_self_mountinfo(MOUNT(u), what, options, fstype);
+        r = update_parameters_proc_self_mountinfo(m, what, options, fstype);
         if (r < 0)
                 return r;
         if (r > 0)
@@ -1818,12 +1795,12 @@ static int mount_setup_existing_unit(
          * from the serialized state), and need to catch up. Since we know that the MOUNT_MOUNTING state is
          * reached when we wait for the mount to appear we hence can assume that if we are in it, we are
          * actually seeing it established for the first time. */
-        if (!MOUNT(u)->from_proc_self_mountinfo || MOUNT(u)->state == MOUNT_MOUNTING)
+        if (!m->from_proc_self_mountinfo || m->state == MOUNT_MOUNTING)
                 flags |= MOUNT_PROC_JUST_MOUNTED;
 
-        MOUNT(u)->from_proc_self_mountinfo = true;
+        m->from_proc_self_mountinfo = true;
 
-        if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) {
+        if (UNIT_IS_LOAD_ERROR(u->load_state)) {
                 /* The unit was previously not found or otherwise not loaded. Now that the unit shows up in
                  * /proc/self/mountinfo we should reconsider it this, hence set it to UNIT_LOADED. */
                 u->load_state = UNIT_LOADED;
@@ -1835,7 +1812,7 @@ static int mount_setup_existing_unit(
         if (FLAGS_SET(flags, MOUNT_PROC_JUST_CHANGED)) {
                 /* If things changed, then make sure that all deps are regenerated. Let's
                  * first remove all automatic deps, and then add in the new ones. */
-                r = mount_add_non_exec_dependencies(MOUNT(u));
+                r = mount_add_non_exec_dependencies(m);
                 if (r < 0)
                         return r;
         }
@@ -1950,14 +1927,27 @@ static void mount_shutdown(Manager *m) {
         m->mount_monitor = NULL;
 }
 
+static void mount_handoff_timestamp(
+                Unit *u,
+                const struct ucred *ucred,
+                const dual_timestamp *ts) {
+
+        Mount *m = ASSERT_PTR(MOUNT(u));
+
+        assert(ucred);
+        assert(ts);
+
+        if (m->control_pid.pid == ucred->pid && m->control_command) {
+                exec_status_handoff(&m->control_command->exec_status, ucred, ts);
+                unit_add_to_dbus_queue(u);
+        }
+}
+
 static int mount_get_timeout(Unit *u, usec_t *timeout) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
         usec_t t;
         int r;
 
-        assert(m);
-        assert(u);
-
         if (!m->timer_event_source)
                 return 0;
 
@@ -2063,7 +2053,7 @@ static void mount_enumerate(Manager *m) {
                         goto fail;
                 }
 
-                r = sd_event_source_set_priority(m->mount_event_source, SD_EVENT_PRIORITY_NORMAL-10);
+                r = sd_event_source_set_priority(m->mount_event_source, EVENT_PRIORITY_MOUNT_TABLE);
                 if (r < 0) {
                         log_error_errno(r, "Failed to adjust mount watch priority: %m");
                         goto fail;
@@ -2330,19 +2320,15 @@ fail:
 }
 
 static int mount_can_clean(Unit *u, ExecCleanMask *ret) {
-        Mount *m = MOUNT(u);
-
-        assert(m);
+        Mount *m = ASSERT_PTR(MOUNT(u));
 
         return exec_context_get_clean_mask(&m->exec_context, ret);
 }
 
 static int mount_can_start(Unit *u) {
-        Mount *m = MOUNT(u);
+        Mount *m = ASSERT_PTR(MOUNT(u));
         int r;
 
-        assert(m);
-
         r = unit_test_start_limit(u);
         if (r < 0) {
                 mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT, /* flush_result = */ false);
@@ -2440,6 +2426,7 @@ const UnitVTable mount_vtable = {
         .cgroup_context_offset = offsetof(Mount, cgroup_context),
         .kill_context_offset = offsetof(Mount, kill_context),
         .exec_runtime_offset = offsetof(Mount, exec_runtime),
+        .cgroup_runtime_offset = offsetof(Mount, cgroup_runtime),
 
         .sections =
                 "Unit\0"
@@ -2482,6 +2469,8 @@ const UnitVTable mount_vtable = {
 
         .reset_failed = mount_reset_failed,
 
+        .notify_handoff_timestamp = mount_handoff_timestamp,
+
         .control_pid = mount_control_pid,
 
         .bus_set_property = bus_mount_set_property,
diff --git a/src/core/mount.h b/src/core/mount.h
index 6712c16..a029dc8 100644
--- a/src/core/mount.h
+++ b/src/core/mount.h
@@ -79,6 +79,7 @@ struct Mount {
         CGroupContext cgroup_context;
 
         ExecRuntime *exec_runtime;
+        CGroupRuntime *cgroup_runtime;
 
         MountState state, deserialized_state;
 
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 88681aa..6c0dc94 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -47,6 +47,7 @@
 #include "tmpfile-util.h"
 #include "umask-util.h"
 #include "user-util.h"
+#include "vpick.h"
 
 #define DEV_MOUNT_OPTIONS (MS_NOSUID|MS_STRICTATIME|MS_NOEXEC)
 
@@ -500,9 +501,24 @@ static int append_extensions(
         /* First, prepare a mount for each image, but these won't be visible to the unit, instead
          * they will be mounted in our propagate directory, and used as a source for the overlay. */
         for (size_t i = 0; i < n; i++) {
+                _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
                 _cleanup_free_ char *mount_point = NULL;
                 const MountImage *m = mount_images + i;
 
+                r = path_pick(/* toplevel_path= */ NULL,
+                              /* toplevel_fd= */ AT_FDCWD,
+                              m->source,
+                              &pick_filter_image_raw,
+                              PICK_ARCHITECTURE|PICK_TRIES,
+                              &result);
+                if (r < 0)
+                        return r;
+                if (!result.path)
+                        return log_debug_errno(
+                                        SYNTHETIC_ERRNO(ENOENT),
+                                        "No matching entry in .v/ directory %s found.",
+                                        m->source);
+
                 if (asprintf(&mount_point, "%s/%zu", extension_dir, i) < 0)
                         return -ENOMEM;
 
@@ -524,7 +540,7 @@ static int append_extensions(
                         .path_malloc = TAKE_PTR(mount_point),
                         .image_options_const = m->mount_options,
                         .ignore = m->ignore_enoent,
-                        .source_const = m->source,
+                        .source_malloc = TAKE_PTR(result.path),
                         .mode = MOUNT_EXTENSION_IMAGE,
                         .has_prefix = true,
                 };
@@ -534,7 +550,8 @@ static int append_extensions(
          * Bind mount them in the same location as the ExtensionImages, so that we
          * can check that they are valid trees (extension-release.d). */
         STRV_FOREACH(extension_directory, extension_directories) {
-                _cleanup_free_ char *mount_point = NULL, *source = NULL;
+                _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
+                _cleanup_free_ char *mount_point = NULL;
                 const char *e = *extension_directory;
                 bool ignore_enoent = false;
 
@@ -551,9 +568,19 @@ static int append_extensions(
                 if (startswith(e, "+"))
                         e++;
 
-                source = strdup(e);
-                if (!source)
-                        return -ENOMEM;
+                r = path_pick(/* toplevel_path= */ NULL,
+                              /* toplevel_fd= */ AT_FDCWD,
+                              e,
+                              &pick_filter_image_dir,
+                              PICK_ARCHITECTURE|PICK_TRIES,
+                              &result);
+                if (r < 0)
+                        return r;
+                if (!result.path)
+                        return log_debug_errno(
+                                        SYNTHETIC_ERRNO(ENOENT),
+                                        "No matching entry in .v/ directory %s found.",
+                                        e);
 
                 for (size_t j = 0; hierarchies && hierarchies[j]; ++j) {
                         char *prefixed_hierarchy = path_join(mount_point, hierarchies[j]);
@@ -571,7 +598,7 @@ static int append_extensions(
 
                 *me = (MountEntry) {
                         .path_malloc = TAKE_PTR(mount_point),
-                        .source_malloc = TAKE_PTR(source),
+                        .source_malloc = TAKE_PTR(result.path),
                         .mode = MOUNT_EXTENSION_DIRECTORY,
                         .ignore = ignore_enoent,
                         .has_prefix = true,
@@ -626,8 +653,7 @@ static int append_tmpfs_mounts(MountList *ml, const TemporaryFileSystem *tmpfs,
                         return log_debug_errno(r, "Failed to parse mount option '%s': %m", str);
 
                 ro = flags & MS_RDONLY;
-                if (ro)
-                        flags ^= MS_RDONLY;
+                flags &= ~MS_RDONLY;
 
                 MountEntry *me = mount_list_extend(ml);
                 if (!me)
@@ -876,42 +902,41 @@ static void drop_outside_root(MountList *ml, const char *root_directory) {
         ml->n_mounts = t - ml->mounts;
 }
 
-static int clone_device_node(
-                const char *d,
-                const char *temporary_mount,
-                bool *make_devnode) {
-
+static int clone_device_node(const char *node, const char *temporary_mount, bool *make_devnode) {
         _cleanup_free_ char *sl = NULL;
-        const char *dn, *bn, *t;
+        const char *dn, *bn;
         struct stat st;
         int r;
 
-        if (stat(d, &st) < 0) {
+        assert(node);
+        assert(path_is_absolute(node));
+        assert(temporary_mount);
+        assert(make_devnode);
+
+        if (stat(node, &st) < 0) {
                 if (errno == ENOENT) {
-                        log_debug_errno(errno, "Device node '%s' to clone does not exist, ignoring.", d);
+                        log_debug_errno(errno, "Device node '%s' to clone does not exist.", node);
                         return -ENXIO;
                 }
 
-                return log_debug_errno(errno, "Failed to stat() device node '%s' to clone, ignoring: %m", d);
+                return log_debug_errno(errno, "Failed to stat() device node '%s' to clone: %m", node);
         }
 
-        if (!S_ISBLK(st.st_mode) &&
-            !S_ISCHR(st.st_mode))
-                return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
-                                       "Device node '%s' to clone is not a device node, ignoring.",
-                                       d);
+        r = stat_verify_device_node(&st);
+        if (r < 0)
+                return log_debug_errno(r, "Cannot clone device node '%s': %m", node);
 
-        dn = strjoina(temporary_mount, d);
+        dn = strjoina(temporary_mount, node);
 
         /* First, try to create device node properly */
         if (*make_devnode) {
-                mac_selinux_create_file_prepare(d, st.st_mode);
+                mac_selinux_create_file_prepare(node, st.st_mode);
                 r = mknod(dn, st.st_mode, st.st_rdev);
                 mac_selinux_create_file_clear();
                 if (r >= 0)
                         goto add_symlink;
                 if (errno != EPERM)
-                        return log_debug_errno(errno, "mknod failed for %s: %m", d);
+                        return log_debug_errno(errno, "Failed to mknod '%s': %m", node);
 
                 /* This didn't work, let's not try this again for the next iterations. */
                 *make_devnode = false;
@@ -921,17 +946,17 @@ static int clone_device_node(
          * Do not prepare device-node SELinux label (see issue 13762) */
         r = mknod(dn, S_IFREG, 0);
         if (r < 0 && errno != EEXIST)
-                return log_debug_errno(errno, "mknod() fallback failed for '%s': %m", d);
+                return log_debug_errno(errno, "Failed to mknod dummy device node for '%s': %m", node);
 
         /* Fallback to bind-mounting: The assumption here is that all used device nodes carry standard
          * properties. Specifically, the devices nodes we bind-mount should either be owned by root:root or
          * root:tty (e.g. /dev/tty, /dev/ptmx) and should not carry ACLs. */
-        r = mount_nofollow_verbose(LOG_DEBUG, d, dn, NULL, MS_BIND, NULL);
+        r = mount_nofollow_verbose(LOG_DEBUG, node, dn, NULL, MS_BIND, NULL);
         if (r < 0)
                 return r;
 
 add_symlink:
-        bn = path_startswith(d, "/dev/");
+        bn = path_startswith(node, "/dev/");
         if (!bn)
                 return 0;
 
@@ -944,14 +969,27 @@ add_symlink:
 
         (void) mkdir_parents(sl, 0755);
 
-        t = strjoina("../", bn);
+        const char *t = strjoina("../", bn);
         if (symlink(t, sl) < 0)
                 log_debug_errno(errno, "Failed to symlink '%s' to '%s', ignoring: %m", t, sl);
 
         return 0;
 }
 
-static char *settle_runtime_dir(RuntimeScope scope) {
+static int bind_mount_device_dir(const char *temporary_mount, const char *dir) {
+        const char *t;
+
+        assert(temporary_mount);
+        assert(dir);
+        assert(path_is_absolute(dir));
+
+        t = strjoina(temporary_mount, dir);
+
+        (void) mkdir(t, 0755);
+        return mount_nofollow_verbose(LOG_DEBUG, dir, t, NULL, MS_BIND, NULL);
+}
+
+static char* settle_runtime_dir(RuntimeScope scope) {
         char *runtime_dir;
 
         if (scope != RUNTIME_SCOPE_USER)
@@ -992,8 +1030,8 @@ static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
                 "/dev/urandom\0"
                 "/dev/tty\0";
 
-        _cleanup_free_ char *temporary_mount = NULL;
-        const char *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
+        _cleanup_(rmdir_and_freep) char *temporary_mount = NULL;
+        _cleanup_(umount_and_rmdir_and_freep) char *dev = NULL;
         bool can_mknod = true;
         int r;
 
@@ -1003,67 +1041,56 @@ static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
         if (r < 0)
                 return r;
 
-        dev = strjoina(temporary_mount, "/dev");
+        dev = path_join(temporary_mount, "dev");
+        if (!dev)
+                return -ENOMEM;
+
         (void) mkdir(dev, 0755);
         r = mount_nofollow_verbose(LOG_DEBUG, "tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS, "mode=0755" TMPFS_LIMITS_PRIVATE_DEV);
         if (r < 0)
-                goto fail;
+                return r;
 
         r = label_fix_full(AT_FDCWD, dev, "/dev", 0);
-        if (r < 0) {
-                log_debug_errno(r, "Failed to fix label of '%s' as /dev: %m", dev);
-                goto fail;
-        }
+        if (r < 0)
+                return log_debug_errno(r, "Failed to fix label of '%s' as /dev/: %m", dev);
 
-        devpts = strjoina(temporary_mount, "/dev/pts");
-        (void) mkdir(devpts, 0755);
-        r = mount_nofollow_verbose(LOG_DEBUG, "/dev/pts", devpts, NULL, MS_BIND, NULL);
+        r = bind_mount_device_dir(temporary_mount, "/dev/pts");
         if (r < 0)
-                goto fail;
+                return r;
 
         /* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx.
          * When /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible.
          * Thus, in that case make a clone.
          * In nspawn and other containers it will be a symlink, in that case make it a symlink. */
         r = is_symlink("/dev/ptmx");
-        if (r < 0) {
-                log_debug_errno(r, "Failed to detect whether /dev/ptmx is a symlink or not: %m");
-                goto fail;
-        } else if (r > 0) {
-                devptmx = strjoina(temporary_mount, "/dev/ptmx");
-                if (symlink("pts/ptmx", devptmx) < 0) {
-                        r = log_debug_errno(errno, "Failed to create a symlink '%s' to pts/ptmx: %m", devptmx);
-                        goto fail;
-                }
+        if (r < 0)
+                return log_debug_errno(r, "Failed to detect whether /dev/ptmx is a symlink or not: %m");
+        if (r > 0) {
+                const char *devptmx = strjoina(temporary_mount, "/dev/ptmx");
+                if (symlink("pts/ptmx", devptmx) < 0)
+                        return log_debug_errno(errno, "Failed to create symlink '%s' to pts/ptmx: %m", devptmx);
         } else {
                 r = clone_device_node("/dev/ptmx", temporary_mount, &can_mknod);
                 if (r < 0)
-                        goto fail;
+                        return r;
         }
 
-        devshm = strjoina(temporary_mount, "/dev/shm");
-        (void) mkdir(devshm, 0755);
-        r = mount_nofollow_verbose(LOG_DEBUG, "/dev/shm", devshm, NULL, MS_BIND, NULL);
+        r = bind_mount_device_dir(temporary_mount, "/dev/shm");
         if (r < 0)
-                goto fail;
-
-        devmqueue = strjoina(temporary_mount, "/dev/mqueue");
-        (void) mkdir(devmqueue, 0755);
-        (void) mount_nofollow_verbose(LOG_DEBUG, "/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
+                return r;
 
-        devhugepages = strjoina(temporary_mount, "/dev/hugepages");
-        (void) mkdir(devhugepages, 0755);
-        (void) mount_nofollow_verbose(LOG_DEBUG, "/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
+        FOREACH_STRING(d, "/dev/mqueue", "/dev/hugepages")
+                (void) bind_mount_device_dir(temporary_mount, d);
 
-        devlog = strjoina(temporary_mount, "/dev/log");
+        const char *devlog = strjoina(temporary_mount, "/dev/log");
         if (symlink("/run/systemd/journal/dev-log", devlog) < 0)
-                log_debug_errno(errno, "Failed to create a symlink '%s' to /run/systemd/journal/dev-log, ignoring: %m", devlog);
+                log_debug_errno(errno, "Failed to create symlink '%s' to /run/systemd/journal/dev-log, ignoring: %m", devlog);
 
         NULSTR_FOREACH(d, devnodes) {
                 r = clone_device_node(d, temporary_mount, &can_mknod);
                 /* ENXIO means the *source* is not a device file, skip creation in that case */
                 if (r < 0 && r != -ENXIO)
-                        goto fail;
+                        return r;
         }
 
         r = dev_setup(temporary_mount, UID_INVALID, GID_INVALID);
@@ -1081,31 +1108,10 @@ static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
 
         r = mount_nofollow_verbose(LOG_DEBUG, dev, mount_entry_path(m), NULL, MS_MOVE, NULL);
         if (r < 0)
-                goto fail;
-
-        (void) rmdir(dev);
-        (void) rmdir(temporary_mount);
+                return r;
+        dev = rmdir_and_free(dev); /* Mount is successfully moved, do not umount() */
 
         return 1;
-
-fail:
-        if (devpts)
-                (void) umount_verbose(LOG_DEBUG, devpts, UMOUNT_NOFOLLOW);
-
-        if (devshm)
-                (void) umount_verbose(LOG_DEBUG, devshm, UMOUNT_NOFOLLOW);
-
-        if (devhugepages)
-                (void) umount_verbose(LOG_DEBUG, devhugepages, UMOUNT_NOFOLLOW);
-
-        if (devmqueue)
-                (void) umount_verbose(LOG_DEBUG, devmqueue, UMOUNT_NOFOLLOW);
-
-        (void) umount_verbose(LOG_DEBUG, dev, UMOUNT_NOFOLLOW);
-        (void) rmdir(dev);
-        (void) rmdir(temporary_mount);
-
-        return r;
 }
 
 static int mount_bind_dev(const MountEntry *m) {
@@ -1118,7 +1124,7 @@ static int mount_bind_dev(const MountEntry *m) {
 
         (void) mkdir_p_label(mount_entry_path(m), 0755);
 
-        r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+        r = path_is_mount_point(mount_entry_path(m));
         if (r < 0)
                 return log_debug_errno(r, "Unable to determine whether /dev is already mounted: %m");
         if (r > 0) /* make this a NOP if /dev is already a mount point */
@@ -1138,7 +1144,7 @@ static int mount_bind_sysfs(const MountEntry *m) {
 
         (void) mkdir_p_label(mount_entry_path(m), 0755);
 
-        r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+        r = path_is_mount_point(mount_entry_path(m));
         if (r < 0)
                 return log_debug_errno(r, "Unable to determine whether /sys is already mounted: %m");
         if (r > 0) /* make this a NOP if /sys is already a mount point */
@@ -1185,7 +1191,7 @@ static int mount_private_apivfs(
                 /* When we do not have enough privileges to mount a new instance, fall back to use an
                  * existing mount. */
 
-                r = path_is_mount_point(entry_path, /* root = */ NULL, /* flags = */ 0);
+                r = path_is_mount_point(entry_path);
                 if (r < 0)
                         return log_debug_errno(r, "Unable to determine whether '%s' is already mounted: %m", entry_path);
                 if (r > 0)
@@ -1300,7 +1306,7 @@ static int mount_run(const MountEntry *m) {
 
         assert(m);
 
-        r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+        r = path_is_mount_point(mount_entry_path(m));
         if (r < 0 && r != -ENOENT)
                 return log_debug_errno(r, "Unable to determine whether /run is already mounted: %m");
         if (r > 0) /* make this a NOP if /run is already a mount point */
@@ -1354,7 +1360,7 @@ static int mount_image(
                 if (r < 0)
                         return log_debug_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory));
                 if (isempty(host_os_release_id))
-                        return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'ID' field not found or empty in 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory));
+                        return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'ID' field not found or empty in 'os-release' data of OS tree '%s'.", empty_to_root(root_directory));
         }
 
         r = verity_dissect_and_mount(
@@ -1448,6 +1454,8 @@ static int follow_symlink(
         _cleanup_free_ char *target = NULL;
         int r;
 
+        assert(m);
+
         /* Let's chase symlinks, but only one step at a time. That's because depending where the symlink points we
          * might need to change the order in which we mount stuff. Hence: let's normalize piecemeal, and do one step at
          * a time by specifying CHASE_STEP. This function returns 0 if we resolved one step, and > 0 if we reached the
@@ -1469,7 +1477,7 @@ static int follow_symlink(
 
         mount_entry_consume_prefix(m, TAKE_PTR(target));
 
-        m->n_followed ++;
+        m->n_followed++;
 
         return 0;
 }
@@ -1524,7 +1532,7 @@ static int apply_one_mount(
                 r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible);
                 if (r < 0)
                         return log_debug_errno(SYNTHETIC_ERRNO(ELOOP),
-                                               "File type not supported for inaccessible mounts. Note that symlinks are not allowed");
+                                               "File type not supported for inaccessible mounts. Note that symlinks are not allowed.");
                 what = inaccessible;
                 break;
         }
@@ -1534,7 +1542,7 @@ static int apply_one_mount(
         case MOUNT_READ_WRITE_IMPLICIT:
         case MOUNT_EXEC:
         case MOUNT_NOEXEC:
-                r = path_is_mount_point(mount_entry_path(m), root_directory, 0);
+                r = path_is_mount_point_full(mount_entry_path(m), root_directory, /* flags = */ 0);
                 if (r == -ENOENT && m->ignore)
                         return 0;
                 if (r < 0)
@@ -1575,7 +1583,7 @@ static int apply_one_mount(
                 if (r < 0)
                         return log_debug_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory));
                 if (isempty(host_os_release_id))
-                        return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'ID' field not found or empty in 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory));
+                        return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'ID' field not found or empty in 'os-release' data of OS tree '%s'.", empty_to_root(root_directory));
 
                 r = load_extension_release_pairs(mount_entry_source(m), class, extension_name, /* relax_extension_release_check= */ false, &extension_release);
                 if (r == -ENOENT && m->ignore)
@@ -1588,13 +1596,13 @@ static int apply_one_mount(
                                 host_os_release_id,
                                 host_os_release_version_id,
                                 host_os_release_level,
-                                /* host_extension_scope */ NULL, /* Leave empty, we need to accept both system and portable */
+                                /* host_extension_scope = */ NULL, /* Leave empty, we need to accept both system and portable */
                                 extension_release,
                                 class);
-                if (r == 0)
-                        return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Directory %s extension-release metadata does not match the root's", extension_name);
                 if (r < 0)
                         return log_debug_errno(r, "Failed to compare directory %s extension-release metadata with the root's os-release: %m", extension_name);
+                if (r == 0)
+                        return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Directory %s extension-release metadata does not match the root's.", extension_name);
 
                 _fallthrough_;
         }
@@ -2049,9 +2057,9 @@ static bool root_read_only(
 }
 
 static bool home_read_only(
-                char** read_only_paths,
-                char** inaccessible_paths,
-                char** empty_directories,
+                char * const *read_only_paths,
+                char * const *inaccessible_paths,
+                char * const *empty_directories,
                 const BindMount *bind_mounts,
                 size_t n_bind_mounts,
                 const TemporaryFileSystem *temporary_filesystems,
@@ -2070,13 +2078,13 @@ static bool home_read_only(
             prefixed_path_strv_contains(empty_directories, "/home"))
                 return true;
 
-        for (size_t i = 0; i < n_temporary_filesystems; i++)
-                if (path_equal(temporary_filesystems[i].path, "/home"))
+        FOREACH_ARRAY(i, temporary_filesystems, n_temporary_filesystems)
+                if (path_equal(i->path, "/home"))
                         return true;
 
         /* If /home is overmounted with some dir from the host it's not writable. */
-        for (size_t i = 0; i < n_bind_mounts; i++)
-                if (path_equal(bind_mounts[i].destination, "/home"))
+        FOREACH_ARRAY(i, bind_mounts, n_bind_mounts)
+                if (path_equal(i->destination, "/home"))
                         return true;
 
         return false;
@@ -2088,6 +2096,7 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
         _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
         _cleanup_strv_free_ char **hierarchies = NULL;
         _cleanup_(mount_list_done) MountList ml = {};
+        _cleanup_close_ int userns_fd = -EBADF;
         bool require_prefix = false;
         const char *root;
         DissectImageFlags dissect_image_flags =
@@ -2099,7 +2108,8 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
                 DISSECT_IMAGE_USR_NO_ROOT |
                 DISSECT_IMAGE_GROWFS |
                 DISSECT_IMAGE_ADD_PARTITION_DEVICES |
-                DISSECT_IMAGE_PIN_PARTITION_DEVICES;
+                DISSECT_IMAGE_PIN_PARTITION_DEVICES |
+                DISSECT_IMAGE_ALLOW_USERSPACE_VERITY;
         int r;
 
         assert(p);
@@ -2123,40 +2133,57 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
 
                 SET_FLAG(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE, p->verity && p->verity->data_path);
 
-                r = loop_device_make_by_path(
-                                p->root_image,
-                                FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : -1 /* < 0 means writable if possible, read-only as fallback */,
-                                /* sector_size= */ UINT32_MAX,
-                                FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
-                                LOCK_SH,
-                                &loop_device);
-                if (r < 0)
-                        return log_debug_errno(r, "Failed to create loop device for root image: %m");
-
-                r = dissect_loop_device(
-                                loop_device,
-                                p->verity,
-                                p->root_image_options,
-                                p->root_image_policy,
-                                dissect_image_flags,
-                                &dissected_image);
-                if (r < 0)
-                        return log_debug_errno(r, "Failed to dissect image: %m");
+                if (p->runtime_scope == RUNTIME_SCOPE_SYSTEM) {
+                        /* In system mode we mount directly */
 
-                r = dissected_image_load_verity_sig_partition(
-                                dissected_image,
-                                loop_device->fd,
-                                p->verity);
-                if (r < 0)
-                        return r;
+                        r = loop_device_make_by_path(
+                                        p->root_image,
+                                        FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : -1 /* < 0 means writable if possible, read-only as fallback */,
+                                        /* sector_size= */ UINT32_MAX,
+                                        FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
+                                        LOCK_SH,
+                                        &loop_device);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to create loop device for root image: %m");
+
+                        r = dissect_loop_device(
+                                        loop_device,
+                                        p->verity,
+                                        p->root_image_options,
+                                        p->root_image_policy,
+                                        dissect_image_flags,
+                                        &dissected_image);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to dissect image: %m");
 
-                r = dissected_image_decrypt(
-                                dissected_image,
-                                NULL,
-                                p->verity,
-                                dissect_image_flags);
-                if (r < 0)
-                        return log_debug_errno(r, "Failed to decrypt dissected image: %m");
+                        r = dissected_image_load_verity_sig_partition(
+                                        dissected_image,
+                                        loop_device->fd,
+                                        p->verity);
+                        if (r < 0)
+                                return r;
+
+                        r = dissected_image_decrypt(
+                                        dissected_image,
+                                        NULL,
+                                        p->verity,
+                                        dissect_image_flags);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to decrypt dissected image: %m");
+                } else {
+                        userns_fd = namespace_open_by_type(NAMESPACE_USER);
+                        if (userns_fd < 0)
+                                return log_debug_errno(userns_fd, "Failed to open our own user namespace: %m");
+
+                        r = mountfsd_mount_image(
+                                        p->root_image,
+                                        userns_fd,
+                                        p->root_image_policy,
+                                        dissect_image_flags,
+                                        &dissected_image);
+                        if (r < 0)
+                                return r;
+                }
         }
 
         if (p->root_directory)
@@ -2520,16 +2547,18 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
                                 root,
                                 /* uid_shift= */ UID_INVALID,
                                 /* uid_range= */ UID_INVALID,
-                                /* userns_fd= */ -EBADF,
+                                userns_fd,
                                 dissect_image_flags);
                 if (r < 0)
                         return log_debug_errno(r, "Failed to mount root image: %m");
 
                 /* Now release the block device lock, so that udevd is free to call BLKRRPART on the device
                  * if it likes. */
-                r = loop_device_flock(loop_device, LOCK_UN);
-                if (r < 0)
-                        return log_debug_errno(r, "Failed to release lock on loopback block device: %m");
+                if (loop_device) {
+                        r = loop_device_flock(loop_device, LOCK_UN);
+                        if (r < 0)
+                                return log_debug_errno(r, "Failed to release lock on loopback block device: %m");
+                }
 
                 r = dissected_image_relinquish(dissected_image);
                 if (r < 0)
@@ -2538,7 +2567,7 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
         } else if (p->root_directory) {
 
                 /* A root directory is specified. Turn its directory into bind mount, if it isn't one yet. */
-                r = path_is_mount_point(root, NULL, AT_SYMLINK_FOLLOW);
+                r = path_is_mount_point_full(root, /* root = */ NULL, AT_SYMLINK_FOLLOW);
                 if (r < 0)
                         return log_debug_errno(r, "Failed to detect that %s is a mount point or not: %m", root);
                 if (r == 0) {
@@ -2595,9 +2624,9 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
 void bind_mount_free_many(BindMount *b, size_t n) {
         assert(b || n == 0);
 
-        for (size_t i = 0; i < n; i++) {
-                free(b[i].source);
-                free(b[i].destination);
+        FOREACH_ARRAY(i, b, n) {
+                free(i->source);
+                free(i->destination);
         }
 
         free(b);
@@ -2625,7 +2654,7 @@ int bind_mount_add(BindMount **b, size_t *n, const BindMount *item) {
 
         *b = c;
 
-        c[(*n) ++] = (BindMount) {
+        c[(*n)++] = (BindMount) {
                 .source = TAKE_PTR(s),
                 .destination = TAKE_PTR(d),
                 .read_only = item->read_only,
@@ -2694,7 +2723,7 @@ int mount_image_add(MountImage **m, size_t *n, const MountImage *item) {
 
         *m = c;
 
-        c[(*n) ++] = (MountImage) {
+        c[(*n)++] = (MountImage) {
                 .source = TAKE_PTR(s),
                 .destination = TAKE_PTR(d),
                 .mount_options = TAKE_PTR(options),
@@ -2745,7 +2774,7 @@ int temporary_filesystem_add(
 
         *t = c;
 
-        c[(*n) ++] = (TemporaryFileSystem) {
+        c[(*n)++] = (TemporaryFileSystem) {
                 .path = TAKE_PTR(p),
                 .options = TAKE_PTR(o),
         };
diff --git a/src/core/path.c b/src/core/path.c
index ef00c20..fdb6ca4 100644
--- a/src/core/path.c
+++ b/src/core/path.c
@@ -90,7 +90,7 @@ int path_spec_watch(PathSpec *s, sd_event_io_handler_t handler) {
                 /* If this is a symlink watch both the symlink inode and where it points to. If the inode is
                  * not a symlink both calls will install the same watch, which is redundant and doesn't
                  * hurt. */
-                for (int follow_symlink = 0; follow_symlink < 2; follow_symlink ++) {
+                for (int follow_symlink = 0; follow_symlink < 2; follow_symlink++) {
                         uint32_t f = flags;
 
                         SET_FLAG(f, IN_DONT_FOLLOW, !follow_symlink);
@@ -249,6 +249,8 @@ static bool path_spec_check_good(PathSpec *s, bool initial, bool from_trigger_no
 static void path_spec_mkdir(PathSpec *s, mode_t mode) {
         int r;
 
+        assert(s);
+
         if (IN_SET(s->type, PATH_EXISTS, PATH_EXISTS_GLOB))
                 return;
 
@@ -260,6 +262,10 @@ static void path_spec_mkdir(PathSpec *s, mode_t mode) {
 static void path_spec_dump(PathSpec *s, FILE *f, const char *prefix) {
         const char *type;
 
+        assert(s);
+        assert(f);
+        assert(prefix);
+
         assert_se(type = path_type_to_string(s->type));
         fprintf(f, "%s%s: %s\n", prefix, type, s->path);
 }
@@ -272,9 +278,8 @@ void path_spec_done(PathSpec *s) {
 }
 
 static void path_init(Unit *u) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
 
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         p->directory_mode = 0755;
@@ -295,9 +300,7 @@ void path_free_specs(Path *p) {
 }
 
 static void path_done(Unit *u) {
-        Path *p = PATH(u);
-
-        assert(p);
+        Path *p = ASSERT_PTR(PATH(u));
 
         p->trigger_notify_event_source = sd_event_source_disable_unref(p->trigger_notify_event_source);
         path_free_specs(p);
@@ -309,7 +312,7 @@ static int path_add_mount_dependencies(Path *p) {
         assert(p);
 
         LIST_FOREACH(spec, s, p->specs) {
-                r = unit_require_mounts_for(UNIT(p), s->path, UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(UNIT(p), s->path, UNIT_DEPENDENCY_FILE, UNIT_MOUNT_REQUIRES);
                 if (r < 0)
                         return r;
         }
@@ -389,10 +392,9 @@ static int path_add_extras(Path *p) {
 }
 
 static int path_load(Unit *u) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
         int r;
 
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         r = unit_load_fragment_and_dropin(u, true);
@@ -410,11 +412,11 @@ static int path_load(Unit *u) {
 }
 
 static void path_dump(Unit *u, FILE *f, const char *prefix) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
         Unit *trigger;
 
-        assert(p);
         assert(f);
+        assert(prefix);
 
         trigger = UNIT_TRIGGER(u);
 
@@ -461,6 +463,7 @@ static int path_watch(Path *p) {
 
 static void path_set_state(Path *p, PathState state) {
         PathState old_state;
+
         assert(p);
 
         if (p->state != state)
@@ -481,9 +484,8 @@ static void path_set_state(Path *p, PathState state) {
 static void path_enter_waiting(Path *p, bool initial, bool from_trigger_notify);
 
 static int path_coldplug(Unit *u) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
 
-        assert(p);
         assert(p->state == PATH_DEAD);
 
         if (p->deserialized_state != p->state) {
@@ -625,10 +627,9 @@ static void path_mkdir(Path *p) {
 }
 
 static int path_start(Unit *u) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
         int r;
 
-        assert(p);
         assert(IN_SET(p->state, PATH_DEAD, PATH_FAILED));
 
         r = unit_test_trigger_loaded(u);
@@ -648,9 +649,8 @@ static int path_start(Unit *u) {
 }
 
 static int path_stop(Unit *u) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
 
-        assert(p);
         assert(IN_SET(p->state, PATH_WAITING, PATH_RUNNING));
 
         path_enter_dead(p, PATH_SUCCESS);
@@ -658,9 +658,8 @@ static int path_stop(Unit *u) {
 }
 
 static int path_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
 
-        assert(u);
         assert(f);
         assert(fds);
 
@@ -688,9 +687,8 @@ static int path_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int path_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
 
-        assert(u);
         assert(key);
         assert(value);
         assert(fds);
@@ -755,28 +753,24 @@ static int path_deserialize_item(Unit *u, const char *key, const char *value, FD
 }
 
 static UnitActiveState path_active_state(Unit *u) {
-        assert(u);
+        Path *p = ASSERT_PTR(PATH(u));
 
-        return state_translation_table[PATH(u)->state];
+        return state_translation_table[p->state];
 }
 
 static const char *path_sub_state_to_string(Unit *u) {
-        assert(u);
+        Path *p = ASSERT_PTR(PATH(u));
 
-        return path_state_to_string(PATH(u)->state);
+        return path_state_to_string(p->state);
 }
 
 static int path_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
-        PathSpec *s = userdata, *found = NULL;
-        Path *p;
+        PathSpec *s = ASSERT_PTR(userdata), *found = NULL;
+        Path *p = ASSERT_PTR(PATH(s->unit));
         int changed;
 
-        assert(s);
-        assert(s->unit);
         assert(fd >= 0);
 
-        p = PATH(s->unit);
-
         if (!IN_SET(p->state, PATH_WAITING, PATH_RUNNING))
                 return 0;
 
@@ -827,10 +821,9 @@ static int path_trigger_notify_on_defer(sd_event_source *s, void *userdata) {
 }
 
 static void path_trigger_notify_impl(Unit *u, Unit *other, bool on_defer) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
         int r;
 
-        assert(u);
         assert(other);
 
         /* Invoked whenever the unit we trigger changes state or gains or loses a job */
@@ -897,9 +890,7 @@ static void path_trigger_notify(Unit *u, Unit *other) {
 }
 
 static void path_reset_failed(Unit *u) {
-        Path *p = PATH(u);
-
-        assert(p);
+        Path *p = ASSERT_PTR(PATH(u));
 
         if (p->state == PATH_FAILED)
                 path_set_state(p, PATH_DEAD);
@@ -908,11 +899,9 @@ static void path_reset_failed(Unit *u) {
 }
 
 static int path_can_start(Unit *u) {
-        Path *p = PATH(u);
+        Path *p = ASSERT_PTR(PATH(u));
         int r;
 
-        assert(p);
-
         r = unit_test_start_limit(u);
         if (r < 0) {
                 path_enter_dead(p, PATH_FAILURE_START_LIMIT_HIT);
@@ -961,13 +950,11 @@ static int activation_details_path_deserialize(const char *key, const char *valu
 }
 
 static int activation_details_path_append_env(ActivationDetails *details, char ***strv) {
-        ActivationDetailsPath *p = ACTIVATION_DETAILS_PATH(details);
+        ActivationDetailsPath *p = ASSERT_PTR(ACTIVATION_DETAILS_PATH(details));
         char *s;
         int r;
 
-        assert(details);
         assert(strv);
-        assert(p);
 
         if (isempty(p->trigger_path_filename))
                 return 0;
@@ -984,21 +971,15 @@ static int activation_details_path_append_env(ActivationDetails *details, char *
 }
 
 static int activation_details_path_append_pair(ActivationDetails *details, char ***strv) {
-        ActivationDetailsPath *p = ACTIVATION_DETAILS_PATH(details);
+        ActivationDetailsPath *p = ASSERT_PTR(ACTIVATION_DETAILS_PATH(details));
         int r;
 
-        assert(details);
         assert(strv);
-        assert(p);
 
         if (isempty(p->trigger_path_filename))
                 return 0;
 
-        r = strv_extend(strv, "trigger_path");
-        if (r < 0)
-                return r;
-
-        r = strv_extend(strv, p->trigger_path_filename);
+        r = strv_extend_many(strv, "trigger_path", p->trigger_path_filename);
         if (r < 0)
                 return r;
 
diff --git a/src/core/restrict-ifaces.c b/src/core/restrict-ifaces.c
deleted file mode 100644
index 4dd8656..0000000
--- a/src/core/restrict-ifaces.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1-or-later */
-
-#include "fd-util.h"
-#include "restrict-ifaces.h"
-#include "netlink-util.h"
-
-#if BPF_FRAMEWORK
-/* libbpf, clang and llc compile time dependencies are satisfied */
-
-#include "bpf-dlopen.h"
-#include "bpf-link.h"
-#include "bpf-util.h"
-#include "bpf/restrict_ifaces/restrict-ifaces-skel.h"
-
-static struct restrict_ifaces_bpf *restrict_ifaces_bpf_free(struct restrict_ifaces_bpf *obj) {
-        restrict_ifaces_bpf__destroy(obj);
-        return NULL;
-}
-
-DEFINE_TRIVIAL_CLEANUP_FUNC(struct restrict_ifaces_bpf *, restrict_ifaces_bpf_free);
-
-static int prepare_restrict_ifaces_bpf(
-                Unit* u,
-                bool is_allow_list,
-                const Set *restrict_network_interfaces,
-                struct restrict_ifaces_bpf **ret_object) {
-
-        _cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL;
-        _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
-        char *iface;
-        int r, map_fd;
-
-        assert(ret_object);
-
-        obj = restrict_ifaces_bpf__open();
-        if (!obj)
-                return log_unit_full_errno(u, u ? LOG_ERR : LOG_DEBUG, errno, "restrict-interfaces: Failed to open BPF object: %m");
-
-        r = sym_bpf_map__set_max_entries(obj->maps.sd_restrictif, MAX(set_size(restrict_network_interfaces), 1u));
-        if (r != 0)
-                return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, r,
-                                "restrict-interfaces: Failed to resize BPF map '%s': %m",
-                                sym_bpf_map__name(obj->maps.sd_restrictif));
-
-        obj->rodata->is_allow_list = is_allow_list;
-
-        r = restrict_ifaces_bpf__load(obj);
-        if (r != 0)
-                return log_unit_full_errno(u, u ? LOG_ERR : LOG_DEBUG, r, "restrict-interfaces: Failed to load BPF object: %m");
-
-        map_fd = sym_bpf_map__fd(obj->maps.sd_restrictif);
-
-        SET_FOREACH(iface, restrict_network_interfaces) {
-                uint8_t dummy = 0;
-                int ifindex;
-
-                ifindex = rtnl_resolve_interface(&rtnl, iface);
-                if (ifindex < 0) {
-                        log_unit_warning_errno(u, ifindex,
-                                               "restrict-interfaces: Couldn't find index of network interface '%s', ignoring: %m",
-                                               iface);
-                        continue;
-                }
-
-                if (sym_bpf_map_update_elem(map_fd, &ifindex, &dummy, BPF_ANY))
-                        return log_unit_full_errno(u, u ? LOG_ERR : LOG_WARNING, errno,
-                                                   "restrict-interfaces: Failed to update BPF map '%s' fd: %m",
-                                                   sym_bpf_map__name(obj->maps.sd_restrictif));
-        }
-
-        *ret_object = TAKE_PTR(obj);
-        return 0;
-}
-
-int restrict_network_interfaces_supported(void) {
-        _cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL;
-        static int supported = -1;
-        int r;
-
-        if (supported >= 0)
-                return supported;
-
-        if (!cgroup_bpf_supported())
-                return (supported = false);
-
-        if (!compat_libbpf_probe_bpf_prog_type(BPF_PROG_TYPE_CGROUP_SKB, /*opts=*/NULL)) {
-                log_debug("restrict-interfaces: BPF program type cgroup_skb is not supported");
-                return (supported = false);
-        }
-
-        r = prepare_restrict_ifaces_bpf(NULL, true, NULL, &obj);
-        if (r < 0) {
-                log_debug_errno(r, "restrict-interfaces: Failed to load BPF object: %m");
-                return (supported = false);
-        }
-
-        return (supported = bpf_can_link_program(obj->progs.sd_restrictif_i));
-}
-
-static int restrict_network_interfaces_install_impl(Unit *u) {
-        _cleanup_(bpf_link_freep) struct bpf_link *egress_link = NULL, *ingress_link = NULL;
-        _cleanup_(restrict_ifaces_bpf_freep) struct restrict_ifaces_bpf *obj = NULL;
-        _cleanup_free_ char *cgroup_path = NULL;
-        _cleanup_close_ int cgroup_fd = -EBADF;
-        CGroupContext *cc;
-        int r;
-
-        cc = unit_get_cgroup_context(u);
-        if (!cc)
-                return 0;
-
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
-        if (r < 0)
-                return log_unit_error_errno(u, r, "restrict-interfaces: Failed to get cgroup path: %m");
-
-        if (!cc->restrict_network_interfaces)
-                return 0;
-
-        r = prepare_restrict_ifaces_bpf(u,
-                cc->restrict_network_interfaces_is_allow_list,
-                cc->restrict_network_interfaces,
-                &obj);
-        if (r < 0)
-                return r;
-
-        cgroup_fd = open(cgroup_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY, 0);
-        if (cgroup_fd < 0)
-                return -errno;
-
-        ingress_link = sym_bpf_program__attach_cgroup(obj->progs.sd_restrictif_i, cgroup_fd);
-        r = sym_libbpf_get_error(ingress_link);
-        if (r != 0)
-                return log_unit_error_errno(u, r, "restrict-interfaces: Failed to create ingress cgroup link: %m");
-
-        egress_link = sym_bpf_program__attach_cgroup(obj->progs.sd_restrictif_e, cgroup_fd);
-        r = sym_libbpf_get_error(egress_link);
-        if (r != 0)
-                return log_unit_error_errno(u, r, "restrict-interfaces: Failed to create egress cgroup link: %m");
-
-        u->restrict_ifaces_ingress_bpf_link = TAKE_PTR(ingress_link);
-        u->restrict_ifaces_egress_bpf_link = TAKE_PTR(egress_link);
-
-        return 0;
-}
-
-int restrict_network_interfaces_install(Unit *u) {
-        int r = restrict_network_interfaces_install_impl(u);
-        fdset_close(u->initial_restric_ifaces_link_fds);
-        return r;
-}
-
-int serialize_restrict_network_interfaces(Unit *u, FILE *f, FDSet *fds) {
-        int r;
-
-        assert(u);
-
-        r = bpf_serialize_link(f, fds, "restrict-ifaces-bpf-fd", u->restrict_ifaces_ingress_bpf_link);
-        if (r < 0)
-                return r;
-
-        return bpf_serialize_link(f, fds, "restrict-ifaces-bpf-fd", u->restrict_ifaces_egress_bpf_link);
-}
-
-int restrict_network_interfaces_add_initial_link_fd(Unit *u, int fd) {
-        int r;
-
-        assert(u);
-
-        if (!u->initial_restric_ifaces_link_fds) {
-                u->initial_restric_ifaces_link_fds = fdset_new();
-                if (!u->initial_restric_ifaces_link_fds)
-                        return log_oom();
-        }
-
-        r = fdset_put(u->initial_restric_ifaces_link_fds, fd);
-        if (r < 0)
-                return log_unit_error_errno(u, r,
-                        "restrict-interfaces: Failed to put restrict-ifaces-bpf-fd %d to restored fdset: %m", fd);
-
-        return 0;
-}
-
-#else /* ! BPF_FRAMEWORK */
-int restrict_network_interfaces_supported(void) {
-        return 0;
-}
-
-int restrict_network_interfaces_install(Unit *u) {
-        return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
-                        "restrict-interfaces: Failed to install; BPF programs built from source code are not supported: %m");
-}
-
-int serialize_restrict_network_interfaces(Unit *u, FILE *f, FDSet *fds) {
-        return 0;
-}
-
-int restrict_network_interfaces_add_initial_link_fd(Unit *u, int fd) {
-        return 0;
-}
-#endif
diff --git a/src/core/restrict-ifaces.h b/src/core/restrict-ifaces.h
deleted file mode 100644
index 6e7a824..0000000
--- a/src/core/restrict-ifaces.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1-or-later */
-#pragma once
-
-#include "fdset.h"
-#include "unit.h"
-
-typedef struct Unit Unit;
-
-int restrict_network_interfaces_supported(void);
-int restrict_network_interfaces_install(Unit *u);
-
-int serialize_restrict_network_interfaces(Unit *u, FILE *f, FDSet *fds);
-
-/* Add BPF link fd created before daemon-reload or daemon-reexec.
- * FDs will be closed at the end of restrict_network_interfaces_install. */
-int restrict_network_interfaces_add_initial_link_fd(Unit *u, int fd);
diff --git a/src/core/scope.c b/src/core/scope.c
index 2841280..cfa2aeb 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -23,21 +23,20 @@
 #include "user-util.h"
 
 static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
-        [SCOPE_DEAD] = UNIT_INACTIVE,
-        [SCOPE_START_CHOWN] = UNIT_ACTIVATING,
-        [SCOPE_RUNNING] = UNIT_ACTIVE,
-        [SCOPE_ABANDONED] = UNIT_ACTIVE,
+        [SCOPE_DEAD]         = UNIT_INACTIVE,
+        [SCOPE_START_CHOWN]  = UNIT_ACTIVATING,
+        [SCOPE_RUNNING]      = UNIT_ACTIVE,
+        [SCOPE_ABANDONED]    = UNIT_ACTIVE,
         [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
         [SCOPE_STOP_SIGKILL] = UNIT_DEACTIVATING,
-        [SCOPE_FAILED] = UNIT_FAILED,
+        [SCOPE_FAILED]       = UNIT_FAILED,
 };
 
 static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
 
 static void scope_init(Unit *u) {
-        Scope *s = SCOPE(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         s->runtime_max_usec = USEC_INFINITY;
@@ -48,9 +47,7 @@ static void scope_init(Unit *u) {
 }
 
 static void scope_done(Unit *u) {
-        Scope *s = SCOPE(u);
-
-        assert(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
         s->controller = mfree(s->controller);
         s->controller_track = sd_bus_track_unref(s->controller_track);
@@ -84,6 +81,7 @@ static int scope_arm_timer(Scope *s, bool relative, usec_t usec) {
 
 static void scope_set_state(Scope *s, ScopeState state) {
         ScopeState old_state;
+
         assert(s);
 
         if (s->state != state)
@@ -101,7 +99,8 @@ static void scope_set_state(Scope *s, ScopeState state) {
         }
 
         if (state != old_state)
-                log_debug("%s changed %s -> %s", UNIT(s)->id, scope_state_to_string(old_state), scope_state_to_string(state));
+                log_unit_debug(UNIT(s), "Changed %s -> %s",
+                               scope_state_to_string(old_state), scope_state_to_string(state));
 
         unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], /* reload_success = */ true);
 }
@@ -181,10 +180,9 @@ static int scope_add_extras(Scope *s) {
 }
 
 static int scope_load(Unit *u) {
-        Scope *s = SCOPE(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
         int r;
 
-        assert(s);
         assert(u->load_state == UNIT_STUB);
 
         if (!u->transient && !MANAGER_IS_RELOADING(u->manager))
@@ -227,10 +225,9 @@ static usec_t scope_coldplug_timeout(Scope *s) {
 }
 
 static int scope_coldplug(Unit *u) {
-        Scope *s = SCOPE(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
         int r;
 
-        assert(s);
         assert(s->state == SCOPE_DEAD);
 
         if (s->deserialized_state == s->state)
@@ -260,10 +257,10 @@ static int scope_coldplug(Unit *u) {
 }
 
 static void scope_dump(Unit *u, FILE *f, const char *prefix) {
-        Scope *s = SCOPE(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
-        assert(s);
         assert(f);
+        assert(prefix);
 
         fprintf(f,
                 "%sScope State: %s\n"
@@ -277,7 +274,7 @@ static void scope_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, FORMAT_TIMESPAN(s->runtime_rand_extra_usec, USEC_PER_SEC),
                 prefix, oom_policy_to_string(s->oom_policy));
 
-        cgroup_context_dump(UNIT(s), f, prefix);
+        cgroup_context_dump(u, f, prefix);
         kill_context_dump(&s->kill_context, f, prefix);
 }
 
@@ -317,13 +314,9 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
         else {
                 r = unit_kill_context(
                                 UNIT(s),
-                                &s->kill_context,
                                 state != SCOPE_STOP_SIGTERM ? KILL_KILL :
                                 s->was_abandoned            ? KILL_TERMINATE_AND_LOG :
-                                                              KILL_TERMINATE,
-                                /* main_pid= */ NULL,
-                                /* control_pid= */ NULL,
-                                /* main_pid_alien= */ false);
+                                                              KILL_TERMINATE);
                 if (r < 0) {
                         log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
                         goto fail;
@@ -350,13 +343,15 @@ fail:
 }
 
 static int scope_enter_start_chown(Scope *s) {
+        Unit *u = UNIT(ASSERT_PTR(s));
         _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
-        Unit *u = UNIT(s);
         int r;
 
-        assert(s);
         assert(s->user);
 
+        if (!s->cgroup_runtime)
+                return -EINVAL;
+
         r = scope_arm_timer(s, /* relative= */ true, u->manager->defaults.timeout_start_usec);
         if (r < 0)
                 return r;
@@ -389,7 +384,7 @@ static int scope_enter_start_chown(Scope *s) {
                         }
                 }
 
-                r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, uid, gid);
+                r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, s->cgroup_runtime->cgroup_path, uid, gid);
                 if (r < 0) {
                         log_unit_error_errno(UNIT(s), r, "Failed to adjust control group access: %m");
                         _exit(EXIT_CGROUP);
@@ -411,11 +406,9 @@ fail:
 }
 
 static int scope_enter_running(Scope *s) {
-        Unit *u = UNIT(s);
+        Unit *u = UNIT(ASSERT_PTR(s));
         int r;
 
-        assert(s);
-
         (void) bus_scope_track_controller(s);
 
         r = unit_acquire_invocation_id(u);
@@ -458,9 +451,7 @@ fail:
 }
 
 static int scope_start(Unit *u) {
-        Scope *s = SCOPE(u);
-
-        assert(s);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
         if (unit_has_name(u, SPECIAL_INIT_SCOPE))
                 return -EPERM;
@@ -489,9 +480,7 @@ static int scope_start(Unit *u) {
 }
 
 static int scope_stop(Unit *u) {
-        Scope *s = SCOPE(u);
-
-        assert(s);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
         if (IN_SET(s->state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
                 return 0;
@@ -503,9 +492,7 @@ static int scope_stop(Unit *u) {
 }
 
 static void scope_reset_failed(Unit *u) {
-        Scope *s = SCOPE(u);
-
-        assert(s);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
         if (s->state == SCOPE_FAILED)
                 scope_set_state(s, SCOPE_DEAD);
@@ -514,7 +501,7 @@ static void scope_reset_failed(Unit *u) {
 }
 
 static int scope_get_timeout(Unit *u, usec_t *timeout) {
-        Scope *s = SCOPE(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
         usec_t t;
         int r;
 
@@ -532,10 +519,9 @@ static int scope_get_timeout(Unit *u, usec_t *timeout) {
 }
 
 static int scope_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Scope *s = SCOPE(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
         PidRef *pid;
 
-        assert(s);
         assert(f);
         assert(fds);
 
@@ -552,10 +538,9 @@ static int scope_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int scope_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Scope *s = SCOPE(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
         int r;
 
-        assert(u);
         assert(key);
         assert(value);
         assert(fds);
@@ -600,8 +585,7 @@ static int scope_deserialize_item(Unit *u, const char *key, const char *value, F
 }
 
 static void scope_notify_cgroup_empty_event(Unit *u) {
-        Scope *s = SCOPE(u);
-        assert(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
         log_unit_debug(u, "cgroup is empty");
 
@@ -610,7 +594,7 @@ static void scope_notify_cgroup_empty_event(Unit *u) {
 }
 
 static void scope_notify_cgroup_oom_event(Unit *u, bool managed_oom) {
-        Scope *s = SCOPE(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
         if (managed_oom)
                 log_unit_debug(u, "Process(es) of control group were killed by systemd-oomd.");
@@ -642,9 +626,7 @@ static void scope_notify_cgroup_oom_event(Unit *u, bool managed_oom) {
 }
 
 static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
-        Scope *s = SCOPE(u);
-
-        assert(s);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
         if (s->state == SCOPE_START_CHOWN) {
                 if (!is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
@@ -662,9 +644,8 @@ static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 }
 
 static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
-        Scope *s = SCOPE(userdata);
+        Scope *s = ASSERT_PTR(SCOPE(userdata));
 
-        assert(s);
         assert(s->timer_event_source == source);
 
         switch (s->state) {
@@ -726,15 +707,15 @@ int scope_abandon(Scope *s) {
 }
 
 static UnitActiveState scope_active_state(Unit *u) {
-        assert(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
-        return state_translation_table[SCOPE(u)->state];
+        return state_translation_table[s->state];
 }
 
 static const char *scope_sub_state_to_string(Unit *u) {
-        assert(u);
+        Scope *s = ASSERT_PTR(SCOPE(u));
 
-        return scope_state_to_string(SCOPE(u)->state);
+        return scope_state_to_string(s->state);
 }
 
 static void scope_enumerate_perpetual(Manager *m) {
@@ -782,6 +763,7 @@ const UnitVTable scope_vtable = {
         .object_size = sizeof(Scope),
         .cgroup_context_offset = offsetof(Scope, cgroup_context),
         .kill_context_offset = offsetof(Scope, kill_context),
+        .cgroup_runtime_offset = offsetof(Scope, cgroup_runtime),
 
         .sections =
                 "Unit\0"
@@ -806,8 +788,7 @@ const UnitVTable scope_vtable = {
         .start = scope_start,
         .stop = scope_stop,
 
-        .freeze = unit_freeze_vtable_common,
-        .thaw = unit_thaw_vtable_common,
+        .freezer_action = unit_cgroup_freezer_action,
 
         .get_timeout = scope_get_timeout,
 
diff --git a/src/core/scope.h b/src/core/scope.h
index c9574a3..1090431 100644
--- a/src/core/scope.h
+++ b/src/core/scope.h
@@ -21,6 +21,7 @@ struct Scope {
 
         CGroupContext cgroup_context;
         KillContext kill_context;
+        CGroupRuntime *cgroup_runtime;
 
         ScopeState state, deserialized_state;
         ScopeResult result;
diff --git a/src/core/selinux-access.c b/src/core/selinux-access.c
index 62181a6..a67a520 100644
--- a/src/core/selinux-access.c
+++ b/src/core/selinux-access.c
@@ -193,7 +193,6 @@ int mac_selinux_access_check_internal(
         assert(message);
         assert(permission);
         assert(function);
-        assert(error);
 
         r = access_init(error);
         if (r <= 0)
@@ -248,7 +247,7 @@ int mac_selinux_access_check_internal(
                 tclass = "system";
         }
 
-        sd_bus_creds_get_cmdline(creds, &cmdline);
+        (void) sd_bus_creds_get_cmdline(creds, &cmdline);
         cl = strv_join(cmdline, " ");
 
         struct audit_info audit_info = {
@@ -268,7 +267,7 @@ int mac_selinux_access_check_internal(
 
         log_full_errno_zerook(LOG_DEBUG, r,
                               "SELinux access check scon=%s tcon=%s tclass=%s perm=%s state=%s function=%s path=%s cmdline=%s: %m",
-                              scon, acon, tclass, permission, enforce ? "enforcing" : "permissive", function, strna(unit_path), strna(empty_to_null(cl)));
+                              scon, acon, tclass, permission, enforce ? "enforcing" : "permissive", function, strna(unit_path), empty_to_na(cl));
         return enforce ? r : 0;
 }
 
diff --git a/src/core/service.c b/src/core/service.c
index ffe92d2..8ec27c4 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -24,6 +24,7 @@
 #include "fd-util.h"
 #include "fileio.h"
 #include "format-util.h"
+#include "io-util.h"
 #include "load-dropin.h"
 #include "load-fragment.h"
 #include "log.h"
@@ -34,6 +35,7 @@
 #include "path-util.h"
 #include "process-util.h"
 #include "random-util.h"
+#include "selinux-util.h"
 #include "serialize.h"
 #include "service.h"
 #include "signal-util.h"
@@ -49,61 +51,61 @@
 #define service_spawn(...) service_spawn_internal(__func__, __VA_ARGS__)
 
 static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
-        [SERVICE_DEAD] = UNIT_INACTIVE,
-        [SERVICE_CONDITION] = UNIT_ACTIVATING,
-        [SERVICE_START_PRE] = UNIT_ACTIVATING,
-        [SERVICE_START] = UNIT_ACTIVATING,
-        [SERVICE_START_POST] = UNIT_ACTIVATING,
-        [SERVICE_RUNNING] = UNIT_ACTIVE,
-        [SERVICE_EXITED] = UNIT_ACTIVE,
-        [SERVICE_RELOAD] = UNIT_RELOADING,
-        [SERVICE_RELOAD_SIGNAL] = UNIT_RELOADING,
-        [SERVICE_RELOAD_NOTIFY] = UNIT_RELOADING,
-        [SERVICE_STOP] = UNIT_DEACTIVATING,
-        [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING,
-        [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
-        [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
-        [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
-        [SERVICE_FINAL_WATCHDOG] = UNIT_DEACTIVATING,
-        [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
-        [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
-        [SERVICE_FAILED] = UNIT_FAILED,
-        [SERVICE_DEAD_BEFORE_AUTO_RESTART] = UNIT_INACTIVE,
+        [SERVICE_DEAD]                       = UNIT_INACTIVE,
+        [SERVICE_CONDITION]                  = UNIT_ACTIVATING,
+        [SERVICE_START_PRE]                  = UNIT_ACTIVATING,
+        [SERVICE_START]                      = UNIT_ACTIVATING,
+        [SERVICE_START_POST]                 = UNIT_ACTIVATING,
+        [SERVICE_RUNNING]                    = UNIT_ACTIVE,
+        [SERVICE_EXITED]                     = UNIT_ACTIVE,
+        [SERVICE_RELOAD]                     = UNIT_RELOADING,
+        [SERVICE_RELOAD_SIGNAL]              = UNIT_RELOADING,
+        [SERVICE_RELOAD_NOTIFY]              = UNIT_RELOADING,
+        [SERVICE_STOP]                       = UNIT_DEACTIVATING,
+        [SERVICE_STOP_WATCHDOG]              = UNIT_DEACTIVATING,
+        [SERVICE_STOP_SIGTERM]               = UNIT_DEACTIVATING,
+        [SERVICE_STOP_SIGKILL]               = UNIT_DEACTIVATING,
+        [SERVICE_STOP_POST]                  = UNIT_DEACTIVATING,
+        [SERVICE_FINAL_WATCHDOG]             = UNIT_DEACTIVATING,
+        [SERVICE_FINAL_SIGTERM]              = UNIT_DEACTIVATING,
+        [SERVICE_FINAL_SIGKILL]              = UNIT_DEACTIVATING,
+        [SERVICE_FAILED]                     = UNIT_FAILED,
+        [SERVICE_DEAD_BEFORE_AUTO_RESTART]   = UNIT_INACTIVE,
         [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
-        [SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
-        [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
-        [SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
-        [SERVICE_CLEANING] = UNIT_MAINTENANCE,
+        [SERVICE_DEAD_RESOURCES_PINNED]      = UNIT_INACTIVE,
+        [SERVICE_AUTO_RESTART]               = UNIT_ACTIVATING,
+        [SERVICE_AUTO_RESTART_QUEUED]        = UNIT_ACTIVATING,
+        [SERVICE_CLEANING]                   = UNIT_MAINTENANCE,
 };
 
 /* For Type=idle we never want to delay any other jobs, hence we
  * consider idle jobs active as soon as we start working on them */
 static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = {
-        [SERVICE_DEAD] = UNIT_INACTIVE,
-        [SERVICE_CONDITION] = UNIT_ACTIVE,
-        [SERVICE_START_PRE] = UNIT_ACTIVE,
-        [SERVICE_START] = UNIT_ACTIVE,
-        [SERVICE_START_POST] = UNIT_ACTIVE,
-        [SERVICE_RUNNING] = UNIT_ACTIVE,
-        [SERVICE_EXITED] = UNIT_ACTIVE,
-        [SERVICE_RELOAD] = UNIT_RELOADING,
-        [SERVICE_RELOAD_SIGNAL] = UNIT_RELOADING,
-        [SERVICE_RELOAD_NOTIFY] = UNIT_RELOADING,
-        [SERVICE_STOP] = UNIT_DEACTIVATING,
-        [SERVICE_STOP_WATCHDOG] = UNIT_DEACTIVATING,
-        [SERVICE_STOP_SIGTERM] = UNIT_DEACTIVATING,
-        [SERVICE_STOP_SIGKILL] = UNIT_DEACTIVATING,
-        [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
-        [SERVICE_FINAL_WATCHDOG] = UNIT_DEACTIVATING,
-        [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
-        [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
-        [SERVICE_FAILED] = UNIT_FAILED,
-        [SERVICE_DEAD_BEFORE_AUTO_RESTART] = UNIT_INACTIVE,
+        [SERVICE_DEAD]                       = UNIT_INACTIVE,
+        [SERVICE_CONDITION]                  = UNIT_ACTIVE,
+        [SERVICE_START_PRE]                  = UNIT_ACTIVE,
+        [SERVICE_START]                      = UNIT_ACTIVE,
+        [SERVICE_START_POST]                 = UNIT_ACTIVE,
+        [SERVICE_RUNNING]                    = UNIT_ACTIVE,
+        [SERVICE_EXITED]                     = UNIT_ACTIVE,
+        [SERVICE_RELOAD]                     = UNIT_RELOADING,
+        [SERVICE_RELOAD_SIGNAL]              = UNIT_RELOADING,
+        [SERVICE_RELOAD_NOTIFY]              = UNIT_RELOADING,
+        [SERVICE_STOP]                       = UNIT_DEACTIVATING,
+        [SERVICE_STOP_WATCHDOG]              = UNIT_DEACTIVATING,
+        [SERVICE_STOP_SIGTERM]               = UNIT_DEACTIVATING,
+        [SERVICE_STOP_SIGKILL]               = UNIT_DEACTIVATING,
+        [SERVICE_STOP_POST]                  = UNIT_DEACTIVATING,
+        [SERVICE_FINAL_WATCHDOG]             = UNIT_DEACTIVATING,
+        [SERVICE_FINAL_SIGTERM]              = UNIT_DEACTIVATING,
+        [SERVICE_FINAL_SIGKILL]              = UNIT_DEACTIVATING,
+        [SERVICE_FAILED]                     = UNIT_FAILED,
+        [SERVICE_DEAD_BEFORE_AUTO_RESTART]   = UNIT_INACTIVE,
         [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
-        [SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
-        [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
-        [SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
-        [SERVICE_CLEANING] = UNIT_MAINTENANCE,
+        [SERVICE_DEAD_RESOURCES_PINNED]      = UNIT_INACTIVE,
+        [SERVICE_AUTO_RESTART]               = UNIT_ACTIVATING,
+        [SERVICE_AUTO_RESTART_QUEUED]        = UNIT_ACTIVATING,
+        [SERVICE_CLEANING]                   = UNIT_MAINTENANCE,
 };
 
 static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata);
@@ -114,6 +116,25 @@ static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t ev
 static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
 static void service_enter_reload_by_notify(Service *s);
 
+static bool SERVICE_STATE_WITH_MAIN_PROCESS(ServiceState state) {
+        return IN_SET(state,
+                      SERVICE_START, SERVICE_START_POST,
+                      SERVICE_RUNNING,
+                      SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
+                      SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+                      SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL);
+}
+
+static bool SERVICE_STATE_WITH_CONTROL_PROCESS(ServiceState state) {
+        return IN_SET(state,
+                      SERVICE_CONDITION,
+                      SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+                      SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
+                      SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+                      SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+                      SERVICE_CLEANING);
+}
+
 static void service_init(Unit *u) {
         Service *s = SERVICE(u);
 
@@ -151,25 +172,17 @@ static void service_init(Unit *u) {
 
 static void service_unwatch_control_pid(Service *s) {
         assert(s);
-
-        if (!pidref_is_set(&s->control_pid))
-                return;
-
-        unit_unwatch_pidref(UNIT(s), &s->control_pid);
-        pidref_done(&s->control_pid);
+        unit_unwatch_pidref_done(UNIT(s), &s->control_pid);
 }
 
 static void service_unwatch_main_pid(Service *s) {
         assert(s);
-
-        if (!pidref_is_set(&s->main_pid))
-                return;
-
-        unit_unwatch_pidref(UNIT(s), &s->main_pid);
-        pidref_done(&s->main_pid);
+        unit_unwatch_pidref_done(UNIT(s), &s->main_pid);
 }
 
 static void service_unwatch_pid_file(Service *s) {
+        assert(s);
+
         if (!s->pid_file_pathspec)
                 return;
 
@@ -179,42 +192,41 @@ static void service_unwatch_pid_file(Service *s) {
         s->pid_file_pathspec = mfree(s->pid_file_pathspec);
 }
 
-static int service_set_main_pidref(Service *s, PidRef *pidref) {
+static int service_set_main_pidref(Service *s, PidRef pidref_consume, const dual_timestamp *start_timestamp) {
+        _cleanup_(pidref_done) PidRef pidref = pidref_consume;
         int r;
 
         assert(s);
 
-        /* Takes ownership of the specified pidref on success, but not on failure. */
+        /* Takes ownership of the specified pidref on both success and failure. */
 
-        if (!pidref_is_set(pidref))
+        if (!pidref_is_set(&pidref))
                 return -ESRCH;
 
-        if (pidref->pid <= 1)
+        if (pidref.pid <= 1)
                 return -EINVAL;
 
-        if (pidref_is_self(pidref))
+        if (pidref_is_self(&pidref))
                 return -EINVAL;
 
-        if (pidref_equal(&s->main_pid, pidref) && s->main_pid_known) {
-                pidref_done(pidref);
+        if (s->main_pid_known && pidref_equal(&s->main_pid, &pidref))
                 return 0;
-        }
 
-        if (!pidref_equal(&s->main_pid, pidref)) {
+        if (!pidref_equal(&s->main_pid, &pidref)) {
                 service_unwatch_main_pid(s);
-                exec_status_start(&s->main_exec_status, pidref->pid);
+                exec_status_start(&s->main_exec_status, pidref.pid, start_timestamp);
         }
 
-        s->main_pid = TAKE_PIDREF(*pidref);
+        s->main_pid = TAKE_PIDREF(pidref);
         s->main_pid_known = true;
 
         r = pidref_is_my_child(&s->main_pid);
         if (r < 0)
                 log_unit_warning_errno(UNIT(s), r, "Can't determine if process "PID_FMT" is our child, assuming it is not: %m", s->main_pid.pid);
-        else if (r == 0)
+        else if (r == 0) // FIXME: Supervise through pidfd here
                 log_unit_warning(UNIT(s), "Supervising process "PID_FMT" which is not our child. We'll most likely not notice when it exits.", s->main_pid.pid);
-
         s->main_pid_alien = r <= 0;
+
         return 0;
 }
 
@@ -290,7 +302,7 @@ static void service_start_watchdog(Service *s) {
 
                 /* Let's process everything else which might be a sign
                  * of living before we consider a service died. */
-                r = sd_event_source_set_priority(s->watchdog_event_source, SD_EVENT_PRIORITY_IDLE);
+                r = sd_event_source_set_priority(s->watchdog_event_source, EVENT_PRIORITY_SERVICE_WATCHDOG);
         }
         if (r < 0)
                 log_unit_warning_errno(UNIT(s), r, "Failed to install watchdog timer: %m");
@@ -429,7 +441,7 @@ static void service_release_fd_store(Service *s) {
 static void service_release_stdio_fd(Service *s) {
         assert(s);
 
-        if (s->stdin_fd < 0 && s->stdout_fd < 0 && s->stdout_fd < 0)
+        if (s->stdin_fd < 0 && s->stdout_fd < 0 && s->stderr_fd < 0)
                 return;
 
         log_unit_debug(UNIT(s), "Releasing stdin/stdout/stderr file descriptors.");
@@ -438,10 +450,9 @@ static void service_release_stdio_fd(Service *s) {
         s->stdout_fd = asynchronous_close(s->stdout_fd);
         s->stderr_fd = asynchronous_close(s->stderr_fd);
 }
-static void service_done(Unit *u) {
-        Service *s = SERVICE(u);
 
-        assert(s);
+static void service_done(Unit *u) {
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         open_file_free_many(&s->open_files);
 
@@ -449,6 +460,7 @@ static void service_done(Unit *u) {
         s->status_text = mfree(s->status_text);
 
         s->exec_runtime = exec_runtime_free(s->exec_runtime);
+
         exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
         s->control_command = NULL;
         s->main_command = NULL;
@@ -511,7 +523,8 @@ static int service_add_fd_store(Service *s, int fd_in, const char *name, bool do
         if (fstat(fd, &st) < 0)
                 return -errno;
 
-        log_unit_debug(UNIT(s), "Trying to stash fd for dev=" DEVNUM_FORMAT_STR "/inode=%" PRIu64, DEVNUM_FORMAT_VAL(st.st_dev), (uint64_t) st.st_ino);
+        log_unit_debug(UNIT(s), "Trying to stash fd for dev=" DEVNUM_FORMAT_STR "/inode=%" PRIu64,
+                       DEVNUM_FORMAT_VAL(st.st_dev), (uint64_t) st.st_ino);
 
         if (s->n_fd_store >= s->n_fd_store_max)
                 /* Our store is full.  Use this errno rather than E[NM]FILE to distinguish from the case
@@ -545,17 +558,16 @@ static int service_add_fd_store(Service *s, int fd_in, const char *name, bool do
                 r = sd_event_add_io(UNIT(s)->manager->event, &fs->event_source, fs->fd, 0, on_fd_store_io, fs);
                 if (r < 0 && r != -EPERM) /* EPERM indicates fds that aren't pollable, which is OK */
                         return r;
-                else if (r >= 0)
+                if (r >= 0)
                         (void) sd_event_source_set_description(fs->event_source, "service-fd-store");
         }
 
+        log_unit_debug(UNIT(s), "Added fd %i (%s) to fd store.", fs->fd, fs->fdname);
+
         fs->service = s;
-        LIST_PREPEND(fd_store, s->fd_store, fs);
+        LIST_PREPEND(fd_store, s->fd_store, TAKE_PTR(fs));
         s->n_fd_store++;
 
-        log_unit_debug(UNIT(s), "Added fd %i (%s) to fd store.", fs->fd, fs->fdname);
-
-        TAKE_PTR(fs);
         return 1; /* fd newly stored */
 }
 
@@ -654,9 +666,6 @@ static int service_verify(Service *s) {
         if (s->type == SERVICE_ONESHOT && IN_SET(s->restart, SERVICE_RESTART_ALWAYS, SERVICE_RESTART_ON_SUCCESS))
                 return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(ENOEXEC), "Service has Restart= set to either always or on-success, which isn't allowed for Type=oneshot services. Refusing.");
 
-        if (s->type == SERVICE_ONESHOT && !exit_status_set_is_empty(&s->restart_force_status))
-                return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(ENOEXEC), "Service has RestartForceExitStatus= set, which isn't allowed for Type=oneshot services. Refusing.");
-
         if (s->type == SERVICE_ONESHOT && s->exit_type == SERVICE_EXIT_CGROUP)
                 return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(ENOEXEC), "Service has ExitType=cgroup set, which isn't allowed for Type=oneshot services. Refusing.");
 
@@ -856,7 +865,7 @@ static int service_add_extras(Service *s) {
 }
 
 static int service_load(Unit *u) {
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
         int r;
 
         r = unit_load_fragment_and_dropin(u, true);
@@ -901,21 +910,19 @@ static void service_dump_fdstore(Service *s, FILE *f, const char *prefix) {
                         "%s%s '%s' (type=%s; dev=" DEVNUM_FORMAT_STR "; inode=%" PRIu64 "; rdev=" DEVNUM_FORMAT_STR "; path=%s; access=%s)\n",
                         prefix, i == s->fd_store ? "File Descriptor Store Entry:" : "                            ",
                         i->fdname,
-                        inode_type_to_string(st.st_mode),
+                        strna(inode_type_to_string(st.st_mode)),
                         DEVNUM_FORMAT_VAL(st.st_dev),
                         (uint64_t) st.st_ino,
                         DEVNUM_FORMAT_VAL(st.st_rdev),
                         strna(path),
-                        accmode_to_string(flags));
+                        strna(accmode_to_string(flags)));
         }
 }
 
 static void service_dump(Unit *u, FILE *f, const char *prefix) {
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
         const char *prefix2;
 
-        assert(s);
-
         prefix = strempty(prefix);
         prefix2 = strjoina(prefix, "\t");
 
@@ -1016,8 +1023,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
                 if (!s->exec_command[c])
                         continue;
 
-                fprintf(f, "%s-> %s:\n",
-                        prefix, service_exec_command_to_string(c));
+                fprintf(f, "%s%s %s:\n",
+                        prefix, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), service_exec_command_to_string(c));
 
                 exec_command_dump_list(s->exec_command[c], f, prefix2);
         }
@@ -1159,7 +1166,7 @@ static int service_load_pid_file(Service *s, bool may_warn) {
         } else
                 log_unit_debug(UNIT(s), "Main PID loaded: "PID_FMT, pidref.pid);
 
-        r = service_set_main_pidref(s, &pidref);
+        r = service_set_main_pidref(s, TAKE_PIDREF(pidref), /* start_timestamp = */ NULL);
         if (r < 0)
                 return r;
 
@@ -1189,7 +1196,7 @@ static void service_search_main_pid(Service *s) {
                 return;
 
         log_unit_debug(UNIT(s), "Main PID guessed: "PID_FMT, pid.pid);
-        if (service_set_main_pidref(s, &pid) < 0)
+        if (service_set_main_pidref(s, TAKE_PIDREF(pid), /* start_timestamp = */ NULL) < 0)
                 return;
 
         r = unit_watch_pidref(UNIT(s), &s->main_pid, /* exclusive= */ false);
@@ -1224,22 +1231,12 @@ static void service_set_state(Service *s, ServiceState state) {
                     SERVICE_CLEANING))
                 s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
 
-        if (!IN_SET(state,
-                    SERVICE_START, SERVICE_START_POST,
-                    SERVICE_RUNNING,
-                    SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
-                    SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
-                    SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
+        if (!SERVICE_STATE_WITH_MAIN_PROCESS(state)) {
                 service_unwatch_main_pid(s);
                 s->main_command = NULL;
         }
 
-        if (!IN_SET(state,
-                    SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
-                    SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
-                    SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
-                    SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
-                    SERVICE_CLEANING)) {
+        if (!SERVICE_STATE_WITH_CONTROL_PROCESS(state)) {
                 service_unwatch_control_pid(s);
                 s->control_command = NULL;
                 s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
@@ -1326,12 +1323,7 @@ static int service_coldplug(Unit *u) {
 
         if (pidref_is_set(&s->main_pid) &&
             pidref_is_unwaited(&s->main_pid) > 0 &&
-            (IN_SET(s->deserialized_state,
-                    SERVICE_START, SERVICE_START_POST,
-                    SERVICE_RUNNING,
-                    SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
-                    SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
-                    SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
+            SERVICE_STATE_WITH_MAIN_PROCESS(s->deserialized_state)) {
                 r = unit_watch_pidref(UNIT(s), &s->main_pid, /* exclusive= */ false);
                 if (r < 0)
                         return r;
@@ -1339,12 +1331,7 @@ static int service_coldplug(Unit *u) {
 
         if (pidref_is_set(&s->control_pid) &&
             pidref_is_unwaited(&s->control_pid) > 0 &&
-            IN_SET(s->deserialized_state,
-                   SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
-                   SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
-                   SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
-                   SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
-                   SERVICE_CLEANING)) {
+            SERVICE_STATE_WITH_CONTROL_PROCESS(s->deserialized_state)) {
                 r = unit_watch_pidref(UNIT(s), &s->control_pid, /* exclusive= */ false);
                 if (r < 0)
                         return r;
@@ -1357,6 +1344,7 @@ static int service_coldplug(Unit *u) {
                     SERVICE_DEAD_RESOURCES_PINNED)) {
                 (void) unit_enqueue_rewatch_pids(u);
                 (void) unit_setup_exec_runtime(u);
+                (void) unit_setup_cgroup_runtime(u);
         }
 
         if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY))
@@ -1418,13 +1406,12 @@ static int service_collect_fds(
 
                 UNIT_FOREACH_DEPENDENCY(u, UNIT(s), UNIT_ATOM_TRIGGERED_BY) {
                         _cleanup_free_ int *cfds = NULL;
-                        Socket *sock;
                         int cn_fds;
-
-                        if (u->type != UNIT_SOCKET)
-                                continue;
+                        Socket *sock;
 
                         sock = SOCKET(u);
+                        if (!sock)
+                                continue;
 
                         cn_fds = socket_collect_fds(sock, &cfds);
                         if (cn_fds < 0)
@@ -1436,18 +1423,8 @@ static int service_collect_fds(
                         if (!rfds) {
                                 rfds = TAKE_PTR(cfds);
                                 rn_socket_fds = cn_fds;
-                        } else {
-                                int *t;
-
-                                t = reallocarray(rfds, rn_socket_fds + cn_fds, sizeof(int));
-                                if (!t)
-                                        return -ENOMEM;
-
-                                memcpy(t + rn_socket_fds, cfds, cn_fds * sizeof(int));
-
-                                rfds = t;
-                                rn_socket_fds += cn_fds;
-                        }
+                        } else if (!GREEDY_REALLOC_APPEND(rfds, rn_socket_fds, cfds, cn_fds))
+                                return -ENOMEM;
 
                         r = strv_extend_n(&rfd_names, socket_fdname(sock), cn_fds);
                         if (r < 0)
@@ -1510,9 +1487,10 @@ static int service_allocate_exec_fd_event_source(
         if (r < 0)
                 return log_unit_error_errno(UNIT(s), r, "Failed to allocate exec_fd event source: %m");
 
-        /* This is a bit lower priority than SIGCHLD, as that carries a lot more interesting failure information */
+        /* This is a bit higher priority than SIGCHLD, to make sure we don't confuse the case "failed to
+         * start" from the case "succeeded to start, but failed immediately after". */
 
-        r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_NORMAL-3);
+        r = sd_event_source_set_priority(source, EVENT_PRIORITY_EXEC_FD);
         if (r < 0)
                 return log_unit_error_errno(UNIT(s), r, "Failed to adjust priority of exec_fd event source: %m");
 
@@ -1602,12 +1580,52 @@ static Service *service_get_triggering_service(Service *s) {
         return NULL;
 }
 
+static ExecFlags service_exec_flags(ServiceExecCommand command_id, ExecFlags cred_flag) {
+        /* All service main/control processes honor sandboxing and namespacing options (except those
+        explicitly excluded in service_spawn()) */
+        ExecFlags flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT;
+
+        assert(command_id >= 0);
+        assert(command_id < _SERVICE_EXEC_COMMAND_MAX);
+        assert((cred_flag & ~(EXEC_SETUP_CREDENTIALS_FRESH|EXEC_SETUP_CREDENTIALS)) == 0);
+        assert((cred_flag != 0) == (command_id == SERVICE_EXEC_START));
+
+        /* Control processes spawned before main process also get tty access */
+        if (IN_SET(command_id, SERVICE_EXEC_CONDITION, SERVICE_EXEC_START_PRE, SERVICE_EXEC_START))
+                flags |= EXEC_APPLY_TTY_STDIN;
+
+        /* All start phases get access to credentials. ExecStartPre= gets a new credential store upon
+         * every invocation, so that updating credential files through it works. When the first main process
+         * starts, passed creds become stable. Also see 'cred_flag'. */
+        if (command_id == SERVICE_EXEC_START_PRE)
+                flags |= EXEC_SETUP_CREDENTIALS_FRESH;
+        if (command_id == SERVICE_EXEC_START_POST)
+                flags |= EXEC_SETUP_CREDENTIALS;
+
+        if (IN_SET(command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_START))
+                flags |= EXEC_SETENV_MONITOR_RESULT;
+
+        if (command_id == SERVICE_EXEC_START)
+                return flags|cred_flag|EXEC_PASS_FDS|EXEC_SET_WATCHDOG;
+
+        flags |= EXEC_IS_CONTROL;
+
+        /* Put control processes spawned later than main process under .control sub-cgroup if appropriate */
+        if (!IN_SET(command_id, SERVICE_EXEC_CONDITION, SERVICE_EXEC_START_PRE))
+                flags |= EXEC_CONTROL_CGROUP;
+
+        if (IN_SET(command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST))
+                flags |= EXEC_SETENV_RESULT;
+
+        return flags;
+}
+
 static int service_spawn_internal(
                 const char *caller,
                 Service *s,
                 ExecCommand *c,
-                usec_t timeout,
                 ExecFlags flags,
+                usec_t timeout,
                 PidRef *ret_pid) {
 
         _cleanup_(exec_params_shallow_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(flags);
@@ -1615,7 +1633,6 @@ static int service_spawn_internal(
         _cleanup_strv_free_ char **final_env = NULL, **our_env = NULL;
         _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
         size_t n_env = 0;
-        pid_t pid;
         int r;
 
         assert(caller);
@@ -1631,7 +1648,7 @@ static int service_spawn_internal(
 
         assert(!s->exec_fd_event_source);
 
-        if (flags & EXEC_IS_CONTROL) {
+        if (FLAGS_SET(exec_params.flags, EXEC_IS_CONTROL)) {
                 /* If this is a control process, mask the permissions/chroot application if this is requested. */
                 if (s->permissions_start_only)
                         exec_params.flags &= ~EXEC_APPLY_SANDBOXING;
@@ -1639,7 +1656,7 @@ static int service_spawn_internal(
                         exec_params.flags &= ~EXEC_APPLY_CHROOT;
         }
 
-        if ((flags & EXEC_PASS_FDS) ||
+        if (FLAGS_SET(exec_params.flags, EXEC_PASS_FDS) ||
             s->exec_context.std_input == EXEC_INPUT_SOCKET ||
             s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
             s->exec_context.std_error == EXEC_OUTPUT_SOCKET) {
@@ -1654,10 +1671,12 @@ static int service_spawn_internal(
 
                 exec_params.open_files = s->open_files;
 
+                exec_params.flags |= EXEC_PASS_FDS;
+
                 log_unit_debug(UNIT(s), "Passing %zu fds to service", exec_params.n_socket_fds + exec_params.n_storage_fds);
         }
 
-        if (!FLAGS_SET(flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) {
+        if (!FLAGS_SET(exec_params.flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) {
                 r = service_allocate_exec_fd(s, &exec_fd_source, &exec_params.exec_fd);
                 if (r < 0)
                         return r;
@@ -1671,7 +1690,7 @@ static int service_spawn_internal(
         if (!our_env)
                 return -ENOMEM;
 
-        if (service_exec_needs_notify_socket(s, flags)) {
+        if (service_exec_needs_notify_socket(s, exec_params.flags)) {
                 if (asprintf(our_env + n_env++, "NOTIFY_SOCKET=%s", UNIT(s)->manager->notify_socket) < 0)
                         return -ENOMEM;
 
@@ -1730,10 +1749,10 @@ static int service_spawn_internal(
 
         Service *env_source = NULL;
         const char *monitor_prefix;
-        if (flags & EXEC_SETENV_RESULT) {
+        if (FLAGS_SET(exec_params.flags, EXEC_SETENV_RESULT)) {
                 env_source = s;
                 monitor_prefix = "";
-        } else if (flags & EXEC_SETENV_MONITOR_RESULT) {
+        } else if (FLAGS_SET(exec_params.flags, EXEC_SETENV_MONITOR_RESULT)) {
                 env_source = service_get_triggering_service(s);
                 monitor_prefix = "MONITOR_";
         }
@@ -1751,18 +1770,15 @@ static int service_spawn_internal(
                                 r = asprintf(our_env + n_env++, "%sEXIT_STATUS=%i", monitor_prefix, env_source->main_exec_status.status);
                         else
                                 r = asprintf(our_env + n_env++, "%sEXIT_STATUS=%s", monitor_prefix, signal_to_string(env_source->main_exec_status.status));
-
                         if (r < 0)
                                 return -ENOMEM;
                 }
 
                 if (env_source != s) {
-                        if (!sd_id128_is_null(UNIT(env_source)->invocation_id)) {
-                                r = asprintf(our_env + n_env++, "%sINVOCATION_ID=" SD_ID128_FORMAT_STR,
-                                             monitor_prefix, SD_ID128_FORMAT_VAL(UNIT(env_source)->invocation_id));
-                                if (r < 0)
+                        if (!sd_id128_is_null(UNIT(env_source)->invocation_id))
+                                if (asprintf(our_env + n_env++, "%sINVOCATION_ID=" SD_ID128_FORMAT_STR,
+                                             monitor_prefix, SD_ID128_FORMAT_VAL(UNIT(env_source)->invocation_id)) < 0)
                                         return -ENOMEM;
-                        }
 
                         if (asprintf(our_env + n_env++, "%sUNIT=%s", monitor_prefix, UNIT(env_source)->id) < 0)
                                 return -ENOMEM;
@@ -1806,17 +1822,13 @@ static int service_spawn_internal(
                        &exec_params,
                        s->exec_runtime,
                        &s->cgroup_context,
-                       &pid);
+                       &pidref);
         if (r < 0)
                 return r;
 
         s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
         s->exec_fd_hot = false;
 
-        r = pidref_set_pid(&pidref, pid);
-        if (r < 0)
-                return r;
-
         r = unit_watch_pidref(UNIT(s), &pidref, /* exclusive= */ true);
         if (r < 0)
                 return r;
@@ -1864,10 +1876,10 @@ static int cgroup_good(Service *s) {
         /* Returns 0 if the cgroup is empty or doesn't exist, > 0 if it is exists and is populated, < 0 if we can't
          * figure it out */
 
-        if (!UNIT(s)->cgroup_path)
+        if (!s->cgroup_runtime || !s->cgroup_runtime->cgroup_path)
                 return 0;
 
-        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path);
+        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, s->cgroup_runtime->cgroup_path);
         if (r < 0)
                 return r;
 
@@ -1876,6 +1888,7 @@ static int cgroup_good(Service *s) {
 
 static bool service_shall_restart(Service *s, const char **reason) {
         assert(s);
+        assert(reason);
 
         /* Don't restart after manual stops */
         if (s->forbid_restart) {
@@ -1891,6 +1904,13 @@ static bool service_shall_restart(Service *s, const char **reason) {
 
         /* Restart if the exit code/status are configured as restart triggers */
         if (exit_status_set_test(&s->restart_force_status,  s->main_exec_status.code, s->main_exec_status.status)) {
+                /* Don't allow Type=oneshot services to restart on success. Note that Restart=always/on-success
+                 * is already rejected in service_verify. */
+                if (s->type == SERVICE_ONESHOT && s->result == SERVICE_SUCCESS) {
+                        *reason = "service type and exit status";
+                        return false;
+                }
+
                 *reason = "forced by exit status";
                 return true;
         }
@@ -1962,7 +1982,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
         } else if (s->result == SERVICE_SKIP_CONDITION) {
                 unit_log_skip(UNIT(s), service_result_to_string(s->result));
                 end_state = service_determine_dead_state(s);
-                restart_state = SERVICE_DEAD_BEFORE_AUTO_RESTART;
+                restart_state = _SERVICE_STATE_INVALID; /* Never restart if skipped due to condition failure */
         } else {
                 unit_log_failure(UNIT(s), service_result_to_string(s->result));
                 end_state = SERVICE_FAILED;
@@ -1984,8 +2004,10 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
         if (allow_restart) {
                 usec_t restart_usec_next;
 
+                assert(restart_state >= 0 && restart_state < _SERVICE_STATE_MAX);
+
                 /* We make two state changes here: one that maps to the high-level UNIT_INACTIVE/UNIT_FAILED
-                 * state (i.e. a state indicating deactivation), and then one that that maps to the
+                 * state (i.e. a state indicating deactivation), and then one that maps to the
                  * high-level UNIT_STARTING state (i.e. a state indicating activation). We do this so that
                  * external software can watch the state changes and see all service failures, even if they
                  * are only transitionary and followed by an automatic restart. We have fine-grained
@@ -1999,8 +2021,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
                 r = service_arm_timer(s, /* relative= */ true, restart_usec_next);
                 if (r < 0) {
                         log_unit_warning_errno(UNIT(s), r, "Failed to install restart timer: %m");
-                        service_enter_dead(s, SERVICE_FAILURE_RESOURCES, /* allow_restart= */ false);
-                        return;
+                        return service_enter_dead(s, SERVICE_FAILURE_RESOURCES, /* allow_restart= */ false);
                 }
 
                 log_unit_debug(UNIT(s), "Next restart interval calculated as: %s", FORMAT_TIMESPAN(restart_usec_next, 0));
@@ -2064,8 +2085,8 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
 
                 r = service_spawn(s,
                                   s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                   s->timeout_stop_usec,
-                                  EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
                                   &s->control_pid);
                 if (r < 0) {
                         log_unit_warning_errno(UNIT(s), r, "Failed to spawn 'stop-post' task: %m");
@@ -2118,13 +2139,7 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
         (void) unit_enqueue_rewatch_pids(UNIT(s));
 
         kill_operation = state_to_kill_operation(s, state);
-        r = unit_kill_context(
-                        UNIT(s),
-                        &s->kill_context,
-                        kill_operation,
-                        &s->main_pid,
-                        &s->control_pid,
-                        s->main_pid_alien);
+        r = unit_kill_context(UNIT(s), kill_operation);
         if (r < 0) {
                 log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
                 goto fail;
@@ -2193,8 +2208,8 @@ static void service_enter_stop(Service *s, ServiceResult f) {
 
                 r = service_spawn(s,
                                   s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                   s->timeout_stop_usec,
-                                  EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP,
                                   &s->control_pid);
                 if (r < 0) {
                         log_unit_warning_errno(UNIT(s), r, "Failed to spawn 'stop' task: %m");
@@ -2209,6 +2224,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
 
 static bool service_good(Service *s) {
         int main_pid_ok;
+
         assert(s);
 
         if (s->type == SERVICE_DBUS && !s->bus_name_good)
@@ -2265,6 +2281,7 @@ static void service_enter_running(Service *s, ServiceResult f) {
 
 static void service_enter_start_post(Service *s) {
         int r;
+
         assert(s);
 
         service_unwatch_control_pid(s);
@@ -2277,8 +2294,8 @@ static void service_enter_start_post(Service *s) {
 
                 r = service_spawn(s,
                                   s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                   s->timeout_start_usec,
-                                  EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
                                   &s->control_pid);
                 if (r < 0) {
                         log_unit_warning_errno(UNIT(s), r, "Failed to spawn 'start-post' task: %m");
@@ -2387,43 +2404,44 @@ static void service_enter_start(Service *s) {
 
         r = service_spawn(s,
                           c,
+                          service_exec_flags(SERVICE_EXEC_START, EXEC_SETUP_CREDENTIALS_FRESH),
                           timeout,
-                          EXEC_PASS_FDS|EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG|EXEC_WRITE_CREDENTIALS|EXEC_SETENV_MONITOR_RESULT,
                           &pidref);
         if (r < 0) {
                 log_unit_warning_errno(UNIT(s), r, "Failed to spawn 'start' task: %m");
                 goto fail;
         }
 
-        if (IN_SET(s->type, SERVICE_SIMPLE, SERVICE_IDLE)) {
-                /* For simple services we immediately start
-                 * the START_POST binaries. */
+        assert(pidref.pid == c->exec_status.pid);
 
-                (void) service_set_main_pidref(s, &pidref);
-                service_enter_start_post(s);
-
-        } else  if (s->type == SERVICE_FORKING) {
+        switch (s->type) {
 
-                /* For forking services we wait until the start
-                 * process exited. */
+        case SERVICE_SIMPLE:
+        case SERVICE_IDLE:
+                /* For simple services we immediately start the START_POST binaries. */
+                (void) service_set_main_pidref(s, TAKE_PIDREF(pidref), &c->exec_status.start_timestamp);
+                return service_enter_start_post(s);
 
+        case SERVICE_FORKING:
+                /* For forking services we wait until the start process exited. */
                 pidref_done(&s->control_pid);
                 s->control_pid = TAKE_PIDREF(pidref);
-                service_set_state(s, SERVICE_START);
-
-        } else if (IN_SET(s->type, SERVICE_ONESHOT, SERVICE_DBUS, SERVICE_NOTIFY, SERVICE_NOTIFY_RELOAD, SERVICE_EXEC)) {
+                return service_set_state(s, SERVICE_START);
+
+        case SERVICE_ONESHOT: /* For oneshot services we wait until the start process exited, too, but it is our main process. */
+        case SERVICE_EXEC:
+        case SERVICE_DBUS:
+        case SERVICE_NOTIFY:
+        case SERVICE_NOTIFY_RELOAD:
+                /* For D-Bus services we know the main pid right away, but wait for the bus name to appear
+                 * on the bus. 'notify' and 'exec' services wait for readiness notification and EOF
+                 * on exec_fd, respectively. */
+                (void) service_set_main_pidref(s, TAKE_PIDREF(pidref), &c->exec_status.start_timestamp);
+                return service_set_state(s, SERVICE_START);
 
-                /* For oneshot services we wait until the start process exited, too, but it is our main process. */
-
-                /* For D-Bus services we know the main pid right away, but wait for the bus name to appear on the
-                 * bus. 'notify' and 'exec' services are similar. */
-
-                (void) service_set_main_pidref(s, &pidref);
-                service_set_state(s, SERVICE_START);
-        } else
+        default:
                 assert_not_reached();
-
-        return;
+        }
 
 fail:
         service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_RESOURCES);
@@ -2447,8 +2465,8 @@ static void service_enter_start_pre(Service *s) {
 
                 r = service_spawn(s,
                                   s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                   s->timeout_start_usec,
-                                  EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_APPLY_TTY_STDIN|EXEC_SETENV_MONITOR_RESULT|EXEC_WRITE_CREDENTIALS,
                                   &s->control_pid);
                 if (r < 0) {
                         log_unit_warning_errno(UNIT(s), r, "Failed to spawn 'start-pre' task: %m");
@@ -2484,10 +2502,9 @@ static void service_enter_condition(Service *s) {
 
                 r = service_spawn(s,
                                   s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                   s->timeout_start_usec,
-                                  EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_APPLY_TTY_STDIN,
                                   &s->control_pid);
-
                 if (r < 0) {
                         log_unit_warning_errno(UNIT(s), r, "Failed to spawn 'exec-condition' task: %m");
                         goto fail;
@@ -2527,11 +2544,9 @@ static void service_enter_restart(Service *s) {
         /* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't
          * fully stopped, i.e. as long as it remains up or remains in auto-start states. The user can reset
          * the counter explicitly however via the usual "systemctl reset-failure" logic. */
-        s->n_restarts ++;
+        s->n_restarts++;
         s->flush_n_restarts = false;
 
-        s->notify_access_override = _NOTIFY_ACCESS_INVALID;
-
         log_unit_struct(UNIT(s), LOG_INFO,
                         "MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
                         LOG_UNIT_INVOCATION_ID(UNIT(s)),
@@ -2595,8 +2610,8 @@ static void service_enter_reload(Service *s) {
 
                 r = service_spawn(s,
                                   s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                   s->timeout_start_usec,
-                                  EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP,
                                   &s->control_pid);
                 if (r < 0) {
                         log_unit_warning_errno(UNIT(s), r, "Failed to spawn 'reload' task: %m");
@@ -2651,13 +2666,8 @@ static void service_run_next_control(Service *s) {
 
         r = service_spawn(s,
                           s->control_command,
+                          service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                           timeout,
-                          EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|
-                          (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD) ? EXEC_WRITE_CREDENTIALS : 0)|
-                          (IN_SET(s->control_command_id, SERVICE_EXEC_CONDITION, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)|
-                          (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)|
-                          (IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_START) ? EXEC_SETENV_MONITOR_RESULT : 0)|
-                          (IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0),
                           &s->control_pid);
         if (r < 0) {
                 log_unit_warning_errno(UNIT(s), r, "Failed to spawn next control task: %m");
@@ -2688,8 +2698,8 @@ static void service_run_next_main(Service *s) {
 
         r = service_spawn(s,
                           s->main_command,
+                          service_exec_flags(SERVICE_EXEC_START, EXEC_SETUP_CREDENTIALS),
                           s->timeout_start_usec,
-                          EXEC_PASS_FDS|EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_SET_WATCHDOG|EXEC_SETENV_MONITOR_RESULT|EXEC_WRITE_CREDENTIALS,
                           &pidref);
         if (r < 0) {
                 log_unit_warning_errno(UNIT(s), r, "Failed to spawn next main task: %m");
@@ -2697,7 +2707,7 @@ static void service_run_next_main(Service *s) {
                 return;
         }
 
-        (void) service_set_main_pidref(s, &pidref);
+        (void) service_set_main_pidref(s, TAKE_PIDREF(pidref), &s->main_command->exec_status.start_timestamp);
 }
 
 static int service_start(Unit *u) {
@@ -2755,16 +2765,16 @@ static int service_start(Unit *u) {
                 s->flush_n_restarts = false;
         }
 
-        u->reset_accounting = true;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (crt)
+                crt->reset_accounting = true;
 
         service_enter_condition(s);
         return 1;
 }
 
 static int service_stop(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(s);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         /* Don't create restart jobs from manual stops. */
         s->forbid_restart = true;
@@ -2821,9 +2831,7 @@ static int service_stop(Unit *u) {
 }
 
 static int service_reload(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(s);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         assert(IN_SET(s->state, SERVICE_RUNNING, SERVICE_EXITED));
 
@@ -2832,9 +2840,7 @@ static int service_reload(Unit *u) {
 }
 
 static bool service_can_reload(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(s);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         return s->exec_command[SERVICE_EXEC_RELOAD] ||
                 s->type == SERVICE_NOTIFY_RELOAD;
@@ -2858,14 +2864,13 @@ static unsigned service_exec_command_index(Unit *u, ServiceExecCommand id, const
 }
 
 static int service_serialize_exec_command(Unit *u, FILE *f, const ExecCommand *command) {
+        Service *s = ASSERT_PTR(SERVICE(u));
         _cleanup_free_ char *args = NULL, *p = NULL;
-        Service *s = SERVICE(u);
         const char *type, *key;
         ServiceExecCommand id;
         size_t length = 0;
         unsigned idx;
 
-        assert(s);
         assert(f);
 
         if (!command)
@@ -2927,10 +2932,9 @@ static int service_serialize_exec_command(Unit *u, FILE *f, const ExecCommand *c
 }
 
 static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
         int r;
 
-        assert(u);
         assert(f);
         assert(fds);
 
@@ -2996,13 +3000,14 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
                 if (!c)
                         return log_oom();
 
-                (void) serialize_item_format(f, "fd-store-fd", "%i \"%s\" %i", copy, c, fs->do_poll);
+                (void) serialize_item_format(f, "fd-store-fd", "%i \"%s\" %s", copy, c, one_zero(fs->do_poll));
         }
 
         if (s->main_exec_status.pid > 0) {
                 (void) serialize_item_format(f, "main-exec-status-pid", PID_FMT, s->main_exec_status.pid);
                 (void) serialize_dual_timestamp(f, "main-exec-status-start", &s->main_exec_status.start_timestamp);
                 (void) serialize_dual_timestamp(f, "main-exec-status-exit", &s->main_exec_status.exit_timestamp);
+                (void) serialize_dual_timestamp(f, "main-exec-status-handoff", &s->main_exec_status.handoff_timestamp);
 
                 if (dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
                         (void) serialize_item_format(f, "main-exec-status-code", "%i", s->main_exec_status.code);
@@ -3033,14 +3038,14 @@ int service_deserialize_exec_command(
                 const char *key,
                 const char *value) {
 
-        Service *s = SERVICE(u);
-        int r;
-        unsigned idx = 0, i;
-        bool control, found = false, last = false;
-        ServiceExecCommand id = _SERVICE_EXEC_COMMAND_INVALID;
+        Service *s = ASSERT_PTR(SERVICE(u));
         ExecCommand *command = NULL;
+        ServiceExecCommand id = _SERVICE_EXEC_COMMAND_INVALID;
         _cleanup_free_ char *path = NULL;
         _cleanup_strv_free_ char **argv = NULL;
+        unsigned idx = 0, i;
+        bool control, found = false, last = false;
+        int r;
 
         enum ExecCommandState {
                 STATE_EXEC_COMMAND_TYPE,
@@ -3051,7 +3056,6 @@ int service_deserialize_exec_command(
                 _STATE_EXEC_COMMAND_INVALID = -EINVAL,
         } state;
 
-        assert(s);
         assert(key);
         assert(value);
 
@@ -3096,7 +3100,7 @@ int service_deserialize_exec_command(
                 case STATE_EXEC_COMMAND_ARGS:
                         r = strv_extend(&argv, arg);
                         if (r < 0)
-                                return -ENOMEM;
+                                return r;
                         break;
                 default:
                         assert_not_reached();
@@ -3139,10 +3143,9 @@ int service_deserialize_exec_command(
 }
 
 static int service_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
         int r;
 
-        assert(u);
         assert(key);
         assert(value);
         assert(fds);
@@ -3179,10 +3182,10 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                         (void) deserialize_pidref(fds, value, &s->control_pid);
 
         } else if (streq(key, "main-pid")) {
-                _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
+                PidRef pidref;
 
                 if (!pidref_is_set(&s->main_pid) && deserialize_pidref(fds, value, &pidref) >= 0)
-                        (void) service_set_main_pidref(s, &pidref);
+                        (void) service_set_main_pidref(s, pidref, /* start_timestamp = */ NULL);
 
         } else if (streq(key, "main-pid-known")) {
                 int b;
@@ -3239,9 +3242,9 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                 _cleanup_close_ int fd = -EBADF;
                 int do_poll;
 
-                r = extract_first_word(&value, &fdv, NULL, 0);
-                if (r <= 0) {
-                        log_unit_debug(u, "Failed to parse fd-store-fd value, ignoring: %s", value);
+                r = extract_many_words(&value, " ", EXTRACT_CUNESCAPE|EXTRACT_UNQUOTE, &fdv, &fdn, &fdp);
+                if (r < 2 || r > 3) {
+                        log_unit_debug(u, "Failed to deserialize fd-store-fd, ignoring: %s", value);
                         return 0;
                 }
 
@@ -3249,24 +3252,17 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                 if (fd < 0)
                         return 0;
 
-                r = extract_first_word(&value, &fdn, NULL, EXTRACT_CUNESCAPE | EXTRACT_UNQUOTE);
-                if (r <= 0) {
-                        log_unit_debug(u, "Failed to parse fd-store-fd value, ignoring: %s", value);
-                        return 0;
-                }
-
-                r = extract_first_word(&value, &fdp, NULL, 0);
-                if (r == 0) {
-                        /* If the value is not present, we assume the default */
-                        do_poll = 1;
-                } else if (r < 0 || (r = safe_atoi(fdp, &do_poll)) < 0) {
-                        log_unit_debug_errno(u, r, "Failed to parse fd-store-fd value \"%s\", ignoring: %m", value);
+                do_poll = r == 3 ? parse_boolean(fdp) : true;
+                if (do_poll < 0) {
+                        log_unit_debug_errno(u, do_poll,
+                                             "Failed to deserialize fd-store-fd do_poll, ignoring: %s", fdp);
                         return 0;
                 }
 
                 r = service_add_fd_store(s, fd, fdn, do_poll);
                 if (r < 0) {
-                        log_unit_debug_errno(u, r, "Failed to store deserialized fd %i, ignoring: %m", fd);
+                        log_unit_debug_errno(u, r,
+                                             "Failed to store deserialized fd '%s', ignoring: %m", fdn);
                         return 0;
                 }
 
@@ -3296,6 +3292,8 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                 deserialize_dual_timestamp(value, &s->main_exec_status.start_timestamp);
         else if (streq(key, "main-exec-status-exit"))
                 deserialize_dual_timestamp(value, &s->main_exec_status.exit_timestamp);
+        else if (streq(key, "main-exec-status-handoff"))
+                deserialize_dual_timestamp(value, &s->main_exec_status.handoff_timestamp);
         else if (streq(key, "notify-access-override")) {
                 NotifyAccess notify_access;
 
@@ -3383,13 +3381,12 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
 }
 
 static UnitActiveState service_active_state(Unit *u) {
+        Service *s = ASSERT_PTR(SERVICE(u));
         const UnitActiveState *table;
 
-        assert(u);
-
-        table = SERVICE(u)->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table;
+        table = s->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table;
 
-        return table[SERVICE(u)->state];
+        return table[s->state];
 }
 
 static const char *service_sub_state_to_string(Unit *u) {
@@ -3399,9 +3396,7 @@ static const char *service_sub_state_to_string(Unit *u) {
 }
 
 static bool service_may_gc(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(s);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         /* Never clean up services that still have a process around, even if the service is formally dead. Note that
          * unit_may_gc() already checked our cgroup for us, we just check our two additional PIDs, too, in case they
@@ -3422,6 +3417,7 @@ static bool service_may_gc(Unit *u) {
 static int service_retry_pid_file(Service *s) {
         int r;
 
+        assert(s);
         assert(s->pid_file);
         assert(IN_SET(s->state, SERVICE_START, SERVICE_START_POST));
 
@@ -3438,6 +3434,8 @@ static int service_retry_pid_file(Service *s) {
 static int service_watch_pid_file(Service *s) {
         int r;
 
+        assert(s);
+
         log_unit_debug(UNIT(s), "Setting watch for PID file %s", s->pid_file_pathspec->path);
 
         r = path_spec_watch(s->pid_file_pathspec, service_dispatch_inotify_io);
@@ -3457,6 +3455,7 @@ static int service_watch_pid_file(Service *s) {
 static int service_demand_pid_file(Service *s) {
         _cleanup_free_ PathSpec *ps = NULL;
 
+        assert(s);
         assert(s->pid_file);
         assert(!s->pid_file_pathspec);
 
@@ -3485,11 +3484,8 @@ static int service_demand_pid_file(Service *s) {
 
 static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
         PathSpec *p = ASSERT_PTR(userdata);
-        Service *s;
+        Service *s = ASSERT_PTR(SERVICE(p->unit));
 
-        s = SERVICE(p->unit);
-
-        assert(s);
         assert(fd >= 0);
         assert(IN_SET(s->state, SERVICE_START, SERVICE_START_POST));
         assert(s->pid_file_pathspec);
@@ -3515,20 +3511,19 @@ fail:
 }
 
 static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
-        Service *s = SERVICE(userdata);
-
-        assert(s);
+        Service *s = ASSERT_PTR(SERVICE(userdata));
 
         log_unit_debug(UNIT(s), "got exec-fd event");
 
         /* If Type=exec is set, we'll consider a service started successfully the instant we invoked execve()
-         * successfully for it. We implement this through a pipe() towards the child, which the kernel automatically
-         * closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on the pipe in the
-         * parent. We need to be careful however, as there are other reasons that we might cause the child's side of
-         * the pipe to be closed (for example, a simple exit()). To deal with that we'll ignore EOFs on the pipe unless
-         * the child signalled us first that it is about to call the execve(). It does so by sending us a simple
-         * non-zero byte via the pipe. We also provide the child with a way to inform us in case execve() failed: if it
-         * sends a zero byte we'll ignore POLLHUP on the fd again. */
+         * successfully for it. We implement this through a pipe() towards the child, which the kernel
+         * automatically closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on
+         * the pipe in the parent. We need to be careful however, as there are other reasons that we might
+         * cause the child's side of the pipe to be closed (for example, a simple exit()). To deal with that
+         * we'll ignore EOFs on the pipe unless the child signalled us first that it is about to call the
+         * execve(). It does so by sending us a simple non-zero byte via the pipe. We also provide the child
+         * with a way to inform us in case execve() failed: if it sends a zero byte we'll ignore POLLHUP on
+         * the fd again. */
 
         for (;;) {
                 uint8_t x;
@@ -3541,8 +3536,7 @@ static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t ev
 
                         return log_unit_error_errno(UNIT(s), errno, "Failed to read from exec_fd: %m");
                 }
-                if (n == 0) { /* EOF → the event we are waiting for */
-
+                if (n == 0) { /* EOF → the event we are waiting for in case of Type=exec */
                         s->exec_fd_event_source = sd_event_source_disable_unref(s->exec_fd_event_source);
 
                         if (s->exec_fd_hot) { /* Did the child tell us to expect EOF now? */
@@ -3561,16 +3555,13 @@ static int service_dispatch_exec_io(sd_event_source *source, int fd, uint32_t ev
 
                 /* A byte was read → this turns on/off the exec fd logic */
                 assert(n == sizeof(x));
+
                 s->exec_fd_hot = x;
         }
-
-        return 0;
 }
 
 static void service_notify_cgroup_empty_event(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         log_unit_debug(u, "Control group is empty.");
 
@@ -3647,7 +3638,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
 }
 
 static void service_notify_cgroup_oom_event(Unit *u, bool managed_oom) {
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         if (managed_oom)
                 log_unit_debug(u, "Process(es) of control group were killed by systemd-oomd.");
@@ -3702,12 +3693,12 @@ static void service_notify_cgroup_oom_event(Unit *u, bool managed_oom) {
 }
 
 static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+        Service *s = ASSERT_PTR(SERVICE(u));
         bool notify_dbus = true;
-        Service *s = SERVICE(u);
         ServiceResult f;
         ExitClean clean_mode;
+        int r;
 
-        assert(s);
         assert(pid >= 0);
 
         /* Oneshot services and non-SERVICE_EXEC_START commands should not be
@@ -3918,7 +3909,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                     s->control_command->command_next &&
                     f == SERVICE_SUCCESS) {
 
-                        /* There is another command to * execute, so let's do that. */
+                        /* There is another command to execute, so let's do that. */
 
                         log_unit_debug(u, "Running next control command for state %s.", service_state_to_string(s->state));
                         service_run_next_control(s);
@@ -3959,7 +3950,6 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
                                 if (s->pid_file) {
                                         bool has_start_post;
-                                        int r;
 
                                         /* Let's try to load the pid file here if we can.
                                          * The PID file might actually be created by a START_POST
@@ -3986,8 +3976,6 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                                 }
 
                                 if (s->pid_file) {
-                                        int r;
-
                                         r = service_load_pid_file(s, true);
                                         if (r < 0) {
                                                 r = service_demand_pid_file(s);
@@ -4076,9 +4064,8 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 }
 
 static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
-        Service *s = SERVICE(userdata);
+        Service *s = ASSERT_PTR(SERVICE(userdata));
 
-        assert(s);
         assert(source == s->timer_event_source);
 
         switch (s->state) {
@@ -4275,10 +4262,9 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us
 }
 
 static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata) {
-        Service *s = SERVICE(userdata);
+        Service *s = ASSERT_PTR(SERVICE(userdata));
         usec_t watchdog_usec;
 
-        assert(s);
         assert(source == s->watchdog_event_source);
 
         watchdog_usec = service_get_watchdog_usec(s);
@@ -4295,35 +4281,49 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void
         return 0;
 }
 
-static bool service_notify_message_authorized(Service *s, pid_t pid, FDSet *fds) {
+static void service_force_watchdog(Service *s) {
         assert(s);
 
+        if (!UNIT(s)->manager->service_watchdogs)
+                return;
+
+        log_unit_error(UNIT(s), "Watchdog request (last status: %s)!",
+                       s->status_text ?: "<unset>");
+
+        service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_WATCHDOG);
+}
+
+static bool service_notify_message_authorized(Service *s, pid_t pid) {
+        assert(s);
+        assert(pid_is_valid(pid));
+
         NotifyAccess notify_access = service_get_notify_access(s);
 
         if (notify_access == NOTIFY_NONE) {
-                log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception is disabled.", pid);
+                /* Warn level only if no notifications are expected */
+                log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception is disabled", pid);
                 return false;
         }
 
         if (notify_access == NOTIFY_MAIN && pid != s->main_pid.pid) {
                 if (pidref_is_set(&s->main_pid))
-                        log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid.pid);
+                        log_unit_debug(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid.pid);
                 else
-                        log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID which is currently not known", pid);
+                        log_unit_debug(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID which is currently not known", pid);
 
                 return false;
         }
 
         if (notify_access == NOTIFY_EXEC && pid != s->main_pid.pid && pid != s->control_pid.pid) {
                 if (pidref_is_set(&s->main_pid) && pidref_is_set(&s->control_pid))
-                        log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT" and control PID "PID_FMT,
-                                         pid, s->main_pid.pid, s->control_pid.pid);
+                        log_unit_debug(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT" and control PID "PID_FMT,
+                                       pid, s->main_pid.pid, s->control_pid.pid);
                 else if (pidref_is_set(&s->main_pid))
-                        log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid.pid);
+                        log_unit_debug(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID "PID_FMT, pid, s->main_pid.pid);
                 else if (pidref_is_set(&s->control_pid))
-                        log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for control PID "PID_FMT, pid, s->control_pid.pid);
+                        log_unit_debug(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for control PID "PID_FMT, pid, s->control_pid.pid);
                 else
-                        log_unit_warning(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID and control PID which are currently not known", pid);
+                        log_unit_debug(UNIT(s), "Got notification message from PID "PID_FMT", but reception only permitted for main PID and control PID which are currently not known", pid);
 
                 return false;
         }
@@ -4331,44 +4331,35 @@ static bool service_notify_message_authorized(Service *s, pid_t pid, FDSet *fds)
         return true;
 }
 
-static void service_force_watchdog(Service *s) {
-        if (!UNIT(s)->manager->service_watchdogs)
-                return;
-
-        log_unit_error(UNIT(s), "Watchdog request (last status: %s)!",
-                       s->status_text ?: "<unset>");
-
-        service_enter_signal(s, SERVICE_STOP_WATCHDOG, SERVICE_FAILURE_WATCHDOG);
-}
-
 static void service_notify_message(
                 Unit *u,
                 const struct ucred *ucred,
                 char * const *tags,
                 FDSet *fds) {
 
-        Service *s = SERVICE(u);
-        bool notify_dbus = false;
-        usec_t monotonic_usec = USEC_INFINITY;
-        const char *e;
+        Service *s = ASSERT_PTR(SERVICE(u));
         int r;
 
-        assert(u);
         assert(ucred);
 
-        if (!service_notify_message_authorized(s, ucred->pid, fds))
+        if (!service_notify_message_authorized(s, ucred->pid))
                 return;
 
         if (DEBUG_LOGGING) {
-                _cleanup_free_ char *cc = NULL;
-
-                cc = strv_join(tags, ", ");
+                _cleanup_free_ char *cc = strv_join(tags, ", ");
                 log_unit_debug(u, "Got notification message from PID "PID_FMT" (%s)", ucred->pid, empty_to_na(cc));
         }
 
+        usec_t monotonic_usec = USEC_INFINITY;
+        bool notify_dbus = false;
+        const char *e;
+
         /* Interpret MAINPID= */
         e = strv_find_startswith(tags, "MAINPID=");
-        if (e && IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY)) {
+        if (e && IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING,
+                        SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
+                        SERVICE_STOP, SERVICE_STOP_SIGTERM)) {
+
                 _cleanup_(pidref_done) PidRef new_main_pid = PIDREF_NULL;
 
                 r = pidref_set_pidstr(&new_main_pid, e);
@@ -4384,10 +4375,10 @@ static void service_notify_message(
                                         log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, but we'll accept it as the request to change it came from a privileged process.", new_main_pid.pid);
                                         r = 1;
                                 } else
-                                        log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid.pid);
+                                        log_unit_warning(u, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid.pid);
                         }
                         if (r > 0) {
-                                (void) service_set_main_pidref(s, &new_main_pid);
+                                (void) service_set_main_pidref(s, TAKE_PIDREF(new_main_pid), /* start_timestamp = */ NULL);
 
                                 r = unit_watch_pidref(UNIT(s), &s->main_pid, /* exclusive= */ false);
                                 if (r < 0)
@@ -4585,11 +4576,36 @@ static void service_notify_message(
                 unit_add_to_dbus_queue(u);
 }
 
+static void service_handoff_timestamp(
+                Unit *u,
+                const struct ucred *ucred,
+                const dual_timestamp *ts) {
+
+        Service *s = ASSERT_PTR(SERVICE(u));
+
+        assert(ucred);
+        assert(ts);
+
+        if (s->main_pid.pid == ucred->pid) {
+                if (s->main_command)
+                        exec_status_handoff(&s->main_command->exec_status, ucred, ts);
+
+                exec_status_handoff(&s->main_exec_status, ucred, ts);
+        } else if (s->control_pid.pid == ucred->pid && s->control_command)
+                exec_status_handoff(&s->control_command->exec_status, ucred, ts);
+        else
+                return;
+
+        unit_add_to_dbus_queue(u);
+}
+
 static int service_get_timeout(Unit *u, usec_t *timeout) {
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
         uint64_t t;
         int r;
 
+        assert(timeout);
+
         if (!s->timer_event_source)
                 return 0;
 
@@ -4604,7 +4620,7 @@ static int service_get_timeout(Unit *u, usec_t *timeout) {
 }
 
 static usec_t service_get_timeout_start_usec(Unit *u) {
-        Service *s = SERVICE(ASSERT_PTR(u));
+        Service *s = ASSERT_PTR(SERVICE(u));
         return s->timeout_start_usec;
 }
 
@@ -4624,16 +4640,14 @@ static bool pick_up_pid_from_bus_name(Service *s) {
 }
 
 static int bus_name_pid_lookup_callback(sd_bus_message *reply, void *userdata, sd_bus_error *ret_error) {
+        Service *s = ASSERT_PTR(SERVICE(userdata));
         _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
         const sd_bus_error *e;
-        Unit *u = ASSERT_PTR(userdata);
         uint32_t pid;
-        Service *s;
         int r;
 
         assert(reply);
 
-        s = SERVICE(u);
         s->bus_name_pid_lookup_slot = sd_bus_slot_unref(s->bus_name_pid_lookup_slot);
 
         if (!s->bus_name || !pick_up_pid_from_bus_name(s))
@@ -4658,20 +4672,17 @@ static int bus_name_pid_lookup_callback(sd_bus_message *reply, void *userdata, s
                 return 1;
         }
 
-        log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, s->bus_name, pidref.pid);
+        log_unit_debug(UNIT(s), "D-Bus name %s is now owned by process " PID_FMT, s->bus_name, pidref.pid);
 
-        (void) service_set_main_pidref(s, &pidref);
+        (void) service_set_main_pidref(s, TAKE_PIDREF(pidref), /* start_timestamp = */ NULL);
         (void) unit_watch_pidref(UNIT(s), &s->main_pid, /* exclusive= */ false);
         return 1;
 }
 
 static void service_bus_name_owner_change(Unit *u, const char *new_owner) {
-
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
         int r;
 
-        assert(s);
-
         if (new_owner)
                 log_unit_debug(u, "D-Bus name %s now owned by %s", s->bus_name, new_owner);
         else
@@ -4721,7 +4732,7 @@ int service_set_socket_fd(
                 Service *s,
                 int fd,
                 Socket *sock,
-                SocketPeer *peer,
+                SocketPeer *peer, /* reference to object is donated to us on success */
                 bool selinux_context_net) {
 
         _cleanup_free_ char *peer_text = NULL;
@@ -4729,6 +4740,7 @@ int service_set_socket_fd(
 
         assert(s);
         assert(fd >= 0);
+        assert(sock);
 
         /* This is called by the socket code when instantiating a new service for a stream socket and the socket needs
          * to be configured. We take ownership of the passed fd on success. */
@@ -4760,12 +4772,13 @@ int service_set_socket_fd(
                         return r;
         }
 
-        r = unit_add_two_dependencies(UNIT(sock), UNIT_BEFORE, UNIT_TRIGGERS, UNIT(s), false, UNIT_DEPENDENCY_IMPLICIT);
+        r = unit_add_two_dependencies(UNIT(s), UNIT_AFTER, UNIT_TRIGGERED_BY, UNIT(sock), false, UNIT_DEPENDENCY_IMPLICIT);
         if (r < 0)
-                return r;
+                return log_unit_debug_errno(UNIT(s), r,
+                                            "Failed to add After=/TriggeredBy= dependencies on socket unit: %m");
 
         s->socket_fd = fd;
-        s->socket_peer = socket_peer_ref(peer);
+        s->socket_peer = peer;
         s->socket_fd_selinux_context_net = selinux_context_net;
 
         unit_ref_set(&s->accept_socket, UNIT(s), UNIT(sock));
@@ -4773,9 +4786,7 @@ int service_set_socket_fd(
 }
 
 static void service_reset_failed(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(s);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         if (s->state == SERVICE_FAILED)
                 service_set_state(s, service_determine_dead_state(s));
@@ -4787,8 +4798,13 @@ static void service_reset_failed(Unit *u) {
         s->flush_n_restarts = false;
 }
 
-static PidRef* service_main_pid(Unit *u) {
-        return &ASSERT_PTR(SERVICE(u))->main_pid;
+static PidRef* service_main_pid(Unit *u, bool *ret_is_alien) {
+        Service *s = ASSERT_PTR(SERVICE(u));
+
+        if (ret_is_alien)
+                *ret_is_alien = s->main_pid_alien;
+
+        return &s->main_pid;
 }
 
 static PidRef* service_control_pid(Unit *u) {
@@ -4796,9 +4812,7 @@ static PidRef* service_control_pid(Unit *u) {
 }
 
 static bool service_needs_console(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(s);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         /* We provide our own implementation of this here, instead of relying of the generic implementation
          * unit_needs_console() provides, since we want to return false if we are in SERVICE_EXITED state. */
@@ -4826,9 +4840,7 @@ static bool service_needs_console(Unit *u) {
 }
 
 static int service_exit_status(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         if (s->main_exec_status.pid <= 0 ||
             !dual_timestamp_is_set(&s->main_exec_status.exit_timestamp))
@@ -4841,20 +4853,17 @@ static int service_exit_status(Unit *u) {
 }
 
 static const char* service_status_text(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(s);
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         return s->status_text;
 }
 
 static int service_clean(Unit *u, ExecCleanMask mask) {
+        Service *s = ASSERT_PTR(SERVICE(u));
         _cleanup_strv_free_ char **l = NULL;
         bool may_clean_fdstore = false;
-        Service *s = SERVICE(u);
         int r;
 
-        assert(s);
         assert(mask != 0);
 
         if (!IN_SET(s->state, SERVICE_DEAD, SERVICE_DEAD_RESOURCES_PINNED))
@@ -4910,11 +4919,10 @@ fail:
 }
 
 static int service_can_clean(Unit *u, ExecCleanMask *ret) {
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
         ExecCleanMask mask = 0;
         int r;
 
-        assert(s);
         assert(ret);
 
         r = exec_context_get_clean_mask(&s->exec_context, &mask);
@@ -4928,10 +4936,12 @@ static int service_can_clean(Unit *u, ExecCleanMask *ret) {
         return 0;
 }
 
-static const char *service_finished_job(Unit *u, JobType t, JobResult result) {
+static const char* service_finished_job(Unit *u, JobType t, JobResult result) {
+        Service *s = ASSERT_PTR(SERVICE(u));
+
         if (t == JOB_START &&
             result == JOB_DONE &&
-            SERVICE(u)->type == SERVICE_ONESHOT)
+            s->type == SERVICE_ONESHOT)
                 return "Finished %s.";
 
         /* Fall back to generic */
@@ -4939,11 +4949,9 @@ static const char *service_finished_job(Unit *u, JobType t, JobResult result) {
 }
 
 static int service_can_start(Unit *u) {
-        Service *s = SERVICE(u);
+        Service *s = ASSERT_PTR(SERVICE(u));
         int r;
 
-        assert(s);
-
         /* Make sure we don't enter a busy loop of some kind. */
         r = unit_test_start_limit(u);
         if (r < 0) {
@@ -4955,7 +4963,7 @@ static int service_can_start(Unit *u) {
 }
 
 static void service_release_resources(Unit *u) {
-        Service *s = SERVICE(ASSERT_PTR(u));
+        Service *s = ASSERT_PTR(SERVICE(u));
 
         /* Invoked by the unit state engine, whenever it realizes that unit is dead and there's no job
          * anymore for it, and it hence is a good idea to release resources */
@@ -4978,6 +4986,52 @@ static void service_release_resources(Unit *u) {
                 service_set_state(s, SERVICE_DEAD);
 }
 
+int service_determine_exec_selinux_label(Service *s, char **ret) {
+        int r;
+
+        assert(s);
+        assert(ret);
+
+        if (!mac_selinux_use())
+                return -ENODATA;
+
+        /* Returns the SELinux label used for execution of the main service binary */
+
+        if (s->exec_context.selinux_context)
+                /* Prefer the explicitly configured label if there is one */
+                return strdup_to(ret, s->exec_context.selinux_context);
+
+        if (s->exec_context.root_image ||
+            s->exec_context.n_extension_images > 0 ||
+            !strv_isempty(s->exec_context.extension_directories)) /* We cannot chase paths through images */
+                return log_unit_debug_errno(UNIT(s), SYNTHETIC_ERRNO(ENODATA), "Service with RootImage=, ExtensionImages= or ExtensionDirectories= set, cannot determine socket SELinux label before activation, ignoring.");
+
+        ExecCommand *c = s->exec_command[SERVICE_EXEC_START];
+        if (!c)
+                return -ENODATA;
+
+        _cleanup_free_ char *path = NULL;
+        r = chase(c->path, s->exec_context.root_directory, CHASE_PREFIX_ROOT, &path, NULL);
+        if (r < 0) {
+                log_unit_debug_errno(UNIT(s), r, "Failed to resolve service binary '%s', ignoring.", c->path);
+                return -ENODATA;
+        }
+
+        r = mac_selinux_get_create_label_from_exe(path, ret);
+        if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) {
+                log_unit_debug_errno(UNIT(s), r, "Reading SELinux label off binary '%s' is not supported, ignoring.", path);
+                return -ENODATA;
+        }
+        if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+                log_unit_debug_errno(UNIT(s), r, "Can't read SELinux label off binary '%s', due to privileges, ignoring.", path);
+                return -ENODATA;
+        }
+        if (r < 0)
+                return log_unit_debug_errno(UNIT(s), r, "Failed to read SELinux label off binary '%s': %m", path);
+
+        return 0;
+}
+
 static const char* const service_restart_table[_SERVICE_RESTART_MAX] = {
         [SERVICE_RESTART_NO]          = "no",
         [SERVICE_RESTART_ON_SUCCESS]  = "on-success",
@@ -4992,7 +5046,7 @@ DEFINE_STRING_TABLE_LOOKUP(service_restart, ServiceRestart);
 
 static const char* const service_restart_mode_table[_SERVICE_RESTART_MODE_MAX] = {
         [SERVICE_RESTART_MODE_NORMAL] = "normal",
-        [SERVICE_RESTART_MODE_DIRECT]  = "direct",
+        [SERVICE_RESTART_MODE_DIRECT] = "direct",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(service_restart_mode, ServiceRestartMode);
@@ -5080,6 +5134,7 @@ const UnitVTable service_vtable = {
         .cgroup_context_offset = offsetof(Service, cgroup_context),
         .kill_context_offset = offsetof(Service, kill_context),
         .exec_runtime_offset = offsetof(Service, exec_runtime),
+        .cgroup_runtime_offset = offsetof(Service, cgroup_runtime),
 
         .sections =
                 "Unit\0"
@@ -5110,8 +5165,7 @@ const UnitVTable service_vtable = {
         .clean = service_clean,
         .can_clean = service_can_clean,
 
-        .freeze = unit_freeze_vtable_common,
-        .thaw = unit_thaw_vtable_common,
+        .freezer_action = unit_cgroup_freezer_action,
 
         .serialize = service_serialize,
         .deserialize_item = service_deserialize_item,
@@ -5130,6 +5184,7 @@ const UnitVTable service_vtable = {
         .notify_cgroup_empty = service_notify_cgroup_empty_event,
         .notify_cgroup_oom = service_notify_cgroup_oom_event,
         .notify_message = service_notify_message,
+        .notify_handoff_timestamp = service_handoff_timestamp,
 
         .main_pid = service_main_pid,
         .control_pid = service_control_pid,
diff --git a/src/core/service.h b/src/core/service.h
index e85302e..59598f7 100644
--- a/src/core/service.h
+++ b/src/core/service.h
@@ -168,6 +168,8 @@ struct Service {
         /* Runtime data of the execution context */
         ExecRuntime *exec_runtime;
 
+        CGroupRuntime *cgroup_runtime;
+
         PidRef main_pid, control_pid;
 
         /* if we are a socket activated service instance, store information of the connection/peer/socket */
@@ -255,6 +257,8 @@ void service_release_socket_fd(Service *s);
 
 usec_t service_restart_usec_next(Service *s);
 
+int service_determine_exec_selinux_label(Service *s, char **ret);
+
 const char* service_restart_to_string(ServiceRestart i) _const_;
 ServiceRestart service_restart_from_string(const char *s) _pure_;
 
diff --git a/src/core/show-status.c b/src/core/show-status.c
index 5b003ba..57ad4db 100644
--- a/src/core/show-status.c
+++ b/src/core/show-status.c
@@ -38,13 +38,13 @@ int parse_show_status(const char *v, ShowStatus *ret) {
 
 int status_vprintf(const char *status, ShowStatusFlags flags, const char *format, va_list ap) {
         static const char status_indent[] = "         "; /* "[" STATUS "] " */
+        static bool prev_ephemeral = false;
         static int dumb = -1;
 
         _cleanup_free_ char *s = NULL;
         _cleanup_close_ int fd = -EBADF;
         struct iovec iovec[7] = {};
         int n = 0;
-        static bool prev_ephemeral;
 
         assert(format);
 
@@ -75,7 +75,7 @@ int status_vprintf(const char *status, ShowStatusFlags flags, const char *format
                 if (c <= 0)
                         c = 80;
 
-                sl = status ? sizeof(status_indent)-1 : 0;
+                sl = status ? strlen(status_indent) : 0;
 
                 emax = c - sl - 1;
                 if (emax < 3)
diff --git a/src/core/slice.c b/src/core/slice.c
index fb4f23c..4e71976 100644
--- a/src/core/slice.c
+++ b/src/core/slice.c
@@ -16,8 +16,8 @@
 #include "unit.h"
 
 static const UnitActiveState state_translation_table[_SLICE_STATE_MAX] = {
-        [SLICE_DEAD] = UNIT_INACTIVE,
-        [SLICE_ACTIVE] = UNIT_ACTIVE
+        [SLICE_DEAD]   = UNIT_INACTIVE,
+        [SLICE_ACTIVE] = UNIT_ACTIVE,
 };
 
 static void slice_init(Unit *u) {
@@ -27,32 +27,29 @@ static void slice_init(Unit *u) {
         u->ignore_on_isolate = true;
 }
 
-static void slice_set_state(Slice *t, SliceState state) {
+static void slice_set_state(Slice *s, SliceState state) {
         SliceState old_state;
-        assert(t);
 
-        if (t->state != state)
-                bus_unit_send_pending_change_signal(UNIT(t), false);
+        assert(s);
+
+        if (s->state != state)
+                bus_unit_send_pending_change_signal(UNIT(s), false);
 
-        old_state = t->state;
-        t->state = state;
+        old_state = s->state;
+        s->state = state;
 
         if (state != old_state)
-                log_debug("%s changed %s -> %s",
-                          UNIT(t)->id,
-                          slice_state_to_string(old_state),
-                          slice_state_to_string(state));
+                log_unit_debug(UNIT(s), "Changed %s -> %s",
+                               slice_state_to_string(old_state), slice_state_to_string(state));
 
-        unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], /* reload_success = */ true);
+        unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], /* reload_success = */ true);
 }
 
 static int slice_add_parent_slice(Slice *s) {
-        Unit *u = UNIT(s);
+        Unit *u = UNIT(ASSERT_PTR(s));
         _cleanup_free_ char *a = NULL;
         int r;
 
-        assert(s);
-
         if (UNIT_GET_SLICE(u))
                 return 0;
 
@@ -151,10 +148,9 @@ static int slice_load_system_slice(Unit *u) {
 }
 
 static int slice_load(Unit *u) {
-        Slice *s = SLICE(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
         int r;
 
-        assert(s);
         assert(u->load_state == UNIT_STUB);
 
         r = slice_load_root_slice(u);
@@ -196,36 +192,35 @@ static int slice_load(Unit *u) {
 }
 
 static int slice_coldplug(Unit *u) {
-        Slice *t = SLICE(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
 
-        assert(t);
-        assert(t->state == SLICE_DEAD);
+        assert(s->state == SLICE_DEAD);
 
-        if (t->deserialized_state != t->state)
-                slice_set_state(t, t->deserialized_state);
+        if (s->deserialized_state != s->state)
+                slice_set_state(s, s->deserialized_state);
 
         return 0;
 }
 
 static void slice_dump(Unit *u, FILE *f, const char *prefix) {
-        Slice *t = SLICE(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
 
-        assert(t);
+        assert(s);
         assert(f);
+        assert(prefix);
 
         fprintf(f,
                 "%sSlice State: %s\n",
-                prefix, slice_state_to_string(t->state));
+                prefix, slice_state_to_string(s->state));
 
-        cgroup_context_dump(UNIT(t), f, prefix);
+        cgroup_context_dump(u, f, prefix);
 }
 
 static int slice_start(Unit *u) {
-        Slice *t = SLICE(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
         int r;
 
-        assert(t);
-        assert(t->state == SLICE_DEAD);
+        assert(s->state == SLICE_DEAD);
 
         r = unit_acquire_invocation_id(u);
         if (r < 0)
@@ -234,27 +229,25 @@ static int slice_start(Unit *u) {
         (void) unit_realize_cgroup(u);
         (void) unit_reset_accounting(u);
 
-        slice_set_state(t, SLICE_ACTIVE);
+        slice_set_state(s, SLICE_ACTIVE);
         return 1;
 }
 
 static int slice_stop(Unit *u) {
-        Slice *t = SLICE(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
 
-        assert(t);
-        assert(t->state == SLICE_ACTIVE);
+        assert(s->state == SLICE_ACTIVE);
 
         /* We do not need to destroy the cgroup explicitly,
          * unit_notify() will do that for us anyway. */
 
-        slice_set_state(t, SLICE_DEAD);
+        slice_set_state(s, SLICE_DEAD);
         return 1;
 }
 
 static int slice_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Slice *s = SLICE(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
 
-        assert(s);
         assert(f);
         assert(fds);
 
@@ -264,9 +257,8 @@ static int slice_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int slice_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Slice *s = SLICE(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
 
-        assert(u);
         assert(key);
         assert(value);
         assert(fds);
@@ -276,26 +268,26 @@ static int slice_deserialize_item(Unit *u, const char *key, const char *value, F
 
                 state = slice_state_from_string(value);
                 if (state < 0)
-                        log_debug("Failed to parse state value %s", value);
+                        log_unit_debug(u, "Failed to parse state: %s", value);
                 else
                         s->deserialized_state = state;
 
         } else
-                log_debug("Unknown serialization key '%s'", key);
+                log_unit_debug(u, "Unknown serialization key: %s", key);
 
         return 0;
 }
 
 static UnitActiveState slice_active_state(Unit *u) {
-        assert(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
 
-        return state_translation_table[SLICE(u)->state];
+        return state_translation_table[s->state];
 }
 
 static const char *slice_sub_state_to_string(Unit *u) {
-        assert(u);
+        Slice *s = ASSERT_PTR(SLICE(u));
 
-        return slice_state_to_string(SLICE(u)->state);
+        return slice_state_to_string(s->state);
 }
 
 static int slice_make_perpetual(Manager *m, const char *name, Unit **ret) {
@@ -347,46 +339,47 @@ static void slice_enumerate_perpetual(Manager *m) {
                 (void) slice_make_perpetual(m, SPECIAL_SYSTEM_SLICE, NULL);
 }
 
-static bool slice_freezer_action_supported_by_children(Unit *s) {
+static bool slice_can_freeze(Unit *s) {
         Unit *member;
 
         assert(s);
 
-        UNIT_FOREACH_DEPENDENCY(member, s, UNIT_ATOM_SLICE_OF) {
-
-                if (member->type == UNIT_SLICE &&
-                    !slice_freezer_action_supported_by_children(member))
+        UNIT_FOREACH_DEPENDENCY(member, s, UNIT_ATOM_SLICE_OF)
+                if (!unit_can_freeze(member))
                         return false;
-
-                if (!UNIT_VTABLE(member)->freeze)
-                        return false;
-        }
-
         return true;
 }
 
 static int slice_freezer_action(Unit *s, FreezerAction action) {
+        FreezerAction child_action;
         Unit *member;
         int r;
 
         assert(s);
-        assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW));
-
-        if (action == FREEZER_FREEZE && !slice_freezer_action_supported_by_children(s)) {
+        assert(IN_SET(action, FREEZER_FREEZE, FREEZER_PARENT_FREEZE,
+                      FREEZER_THAW, FREEZER_PARENT_THAW));
+
+        if (action == FREEZER_FREEZE && !slice_can_freeze(s)) {
+                /* We're intentionally only checking for FREEZER_FREEZE here and ignoring the
+                 * _BY_PARENT variant. If we're being frozen by parent, that means someone has
+                 * already checked if we can be frozen further up the call stack. No point to
+                 * redo that work */
                 log_unit_warning(s, "Requested freezer operation is not supported by all children of the slice");
                 return 0;
         }
 
-        UNIT_FOREACH_DEPENDENCY(member, s, UNIT_ATOM_SLICE_OF) {
-                if (!member->cgroup_realized)
-                        continue;
+        if (action == FREEZER_FREEZE)
+                child_action = FREEZER_PARENT_FREEZE;
+        else if (action == FREEZER_THAW)
+                child_action = FREEZER_PARENT_THAW;
+        else
+                child_action = action;
 
-                if (action == FREEZER_FREEZE)
-                        r = UNIT_VTABLE(member)->freeze(member);
-                else if (UNIT_VTABLE(member)->thaw)
-                        r = UNIT_VTABLE(member)->thaw(member);
+        UNIT_FOREACH_DEPENDENCY(member, s, UNIT_ATOM_SLICE_OF) {
+                if (UNIT_VTABLE(member)->freezer_action)
+                        r = UNIT_VTABLE(member)->freezer_action(member, child_action);
                 else
-                        /* Thawing is requested but no corresponding method is available, ignore. */
+                        /* Only thawing will reach here, since freezing checks for a method in can_freeze */
                         r = 0;
                 if (r < 0)
                         return r;
@@ -395,27 +388,10 @@ static int slice_freezer_action(Unit *s, FreezerAction action) {
         return unit_cgroup_freezer_action(s, action);
 }
 
-static int slice_freeze(Unit *s) {
-        assert(s);
-
-        return slice_freezer_action(s, FREEZER_FREEZE);
-}
-
-static int slice_thaw(Unit *s) {
-        assert(s);
-
-        return slice_freezer_action(s, FREEZER_THAW);
-}
-
-static bool slice_can_freeze(Unit *s) {
-        assert(s);
-
-        return slice_freezer_action_supported_by_children(s);
-}
-
 const UnitVTable slice_vtable = {
         .object_size = sizeof(Slice),
         .cgroup_context_offset = offsetof(Slice, cgroup_context),
+        .cgroup_runtime_offset = offsetof(Slice, cgroup_runtime),
 
         .sections =
                 "Unit\0"
@@ -436,8 +412,7 @@ const UnitVTable slice_vtable = {
         .start = slice_start,
         .stop = slice_stop,
 
-        .freeze = slice_freeze,
-        .thaw = slice_thaw,
+        .freezer_action = slice_freezer_action,
         .can_freeze = slice_can_freeze,
 
         .serialize = slice_serialize,
diff --git a/src/core/slice.h b/src/core/slice.h
index e2f9274..004349d 100644
--- a/src/core/slice.h
+++ b/src/core/slice.h
@@ -11,6 +11,8 @@ struct Slice {
         SliceState state, deserialized_state;
 
         CGroupContext cgroup_context;
+
+        CGroupRuntime *cgroup_runtime;
 };
 
 extern const UnitVTable slice_vtable;
diff --git a/src/core/socket.c b/src/core/socket.c
index 9adae16..41147d4 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -53,29 +53,44 @@ struct SocketPeer {
         Socket *socket;
         union sockaddr_union peer;
         socklen_t peer_salen;
+        struct ucred peer_cred;
 };
 
 static const UnitActiveState state_translation_table[_SOCKET_STATE_MAX] = {
-        [SOCKET_DEAD] = UNIT_INACTIVE,
-        [SOCKET_START_PRE] = UNIT_ACTIVATING,
-        [SOCKET_START_CHOWN] = UNIT_ACTIVATING,
-        [SOCKET_START_POST] = UNIT_ACTIVATING,
-        [SOCKET_LISTENING] = UNIT_ACTIVE,
-        [SOCKET_RUNNING] = UNIT_ACTIVE,
-        [SOCKET_STOP_PRE] = UNIT_DEACTIVATING,
+        [SOCKET_DEAD]             = UNIT_INACTIVE,
+        [SOCKET_START_PRE]        = UNIT_ACTIVATING,
+        [SOCKET_START_CHOWN]      = UNIT_ACTIVATING,
+        [SOCKET_START_POST]       = UNIT_ACTIVATING,
+        [SOCKET_LISTENING]        = UNIT_ACTIVE,
+        [SOCKET_RUNNING]          = UNIT_ACTIVE,
+        [SOCKET_STOP_PRE]         = UNIT_DEACTIVATING,
         [SOCKET_STOP_PRE_SIGTERM] = UNIT_DEACTIVATING,
         [SOCKET_STOP_PRE_SIGKILL] = UNIT_DEACTIVATING,
-        [SOCKET_STOP_POST] = UNIT_DEACTIVATING,
-        [SOCKET_FINAL_SIGTERM] = UNIT_DEACTIVATING,
-        [SOCKET_FINAL_SIGKILL] = UNIT_DEACTIVATING,
-        [SOCKET_FAILED] = UNIT_FAILED,
-        [SOCKET_CLEANING] = UNIT_MAINTENANCE,
+        [SOCKET_STOP_POST]        = UNIT_DEACTIVATING,
+        [SOCKET_FINAL_SIGTERM]    = UNIT_DEACTIVATING,
+        [SOCKET_FINAL_SIGKILL]    = UNIT_DEACTIVATING,
+        [SOCKET_FAILED]           = UNIT_FAILED,
+        [SOCKET_CLEANING]         = UNIT_MAINTENANCE,
 };
 
 static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
 static int socket_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
 static void flush_ports(Socket *s);
 
+static bool SOCKET_STATE_WITH_PROCESS(SocketState state) {
+        return IN_SET(state,
+                      SOCKET_START_PRE,
+                      SOCKET_START_CHOWN,
+                      SOCKET_START_POST,
+                      SOCKET_STOP_PRE,
+                      SOCKET_STOP_PRE_SIGTERM,
+                      SOCKET_STOP_PRE_SIGKILL,
+                      SOCKET_STOP_POST,
+                      SOCKET_FINAL_SIGTERM,
+                      SOCKET_FINAL_SIGKILL,
+                      SOCKET_CLEANING);
+}
+
 static void socket_init(Unit *u) {
         Socket *s = SOCKET(u);
 
@@ -108,12 +123,7 @@ static void socket_init(Unit *u) {
 
 static void socket_unwatch_control_pid(Socket *s) {
         assert(s);
-
-        if (!pidref_is_set(&s->control_pid))
-                return;
-
-        unit_unwatch_pidref(UNIT(s), &s->control_pid);
-        pidref_done(&s->control_pid);
+        unit_unwatch_pidref_done(UNIT(s), &s->control_pid);
 }
 
 static void socket_cleanup_fd_list(SocketPort *p) {
@@ -144,11 +154,9 @@ void socket_free_ports(Socket *s) {
 }
 
 static void socket_done(Unit *u) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         SocketPeer *p;
 
-        assert(s);
-
         socket_free_ports(s);
 
         while ((p = set_steal_first(s->peers_by_address)))
@@ -157,6 +165,7 @@ static void socket_done(Unit *u) {
         s->peers_by_address = set_free(s->peers_by_address);
 
         s->exec_runtime = exec_runtime_free(s->exec_runtime);
+
         exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
         s->control_command = NULL;
 
@@ -221,7 +230,7 @@ static int socket_add_mount_dependencies(Socket *s) {
                 if (!path)
                         continue;
 
-                r = unit_require_mounts_for(UNIT(s), path, UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(UNIT(s), path, UNIT_DEPENDENCY_FILE, UNIT_MOUNT_REQUIRES);
                 if (r < 0)
                         return r;
         }
@@ -243,6 +252,7 @@ static int socket_add_device_dependencies(Socket *s) {
 
 static int socket_add_default_dependencies(Socket *s) {
         int r;
+
         assert(s);
 
         if (!UNIT(s)->default_dependencies)
@@ -263,6 +273,7 @@ static int socket_add_default_dependencies(Socket *s) {
 
 static bool socket_has_exec(Socket *s) {
         unsigned i;
+
         assert(s);
 
         for (i = 0; i < _SOCKET_EXEC_COMMAND_MAX; i++)
@@ -273,11 +284,9 @@ static bool socket_has_exec(Socket *s) {
 }
 
 static int socket_add_extras(Socket *s) {
-        Unit *u = UNIT(s);
+        Unit *u = UNIT(ASSERT_PTR(s));
         int r;
 
-        assert(s);
-
         /* Pick defaults for the trigger limit, if nothing was explicitly configured. We pick a relatively high limit
          * in Accept=yes mode, and a lower limit for Accept=no. Reason: in Accept=yes mode we are invoking accept()
          * ourselves before the trigger limit can hit, thus incoming connections are taken off the socket queue quickly
@@ -406,11 +415,13 @@ static void peer_address_hash_func(const SocketPeer *s, struct siphash *state) {
         assert(s);
 
         if (s->peer.sa.sa_family == AF_INET)
-                siphash24_compress(&s->peer.in.sin_addr, sizeof(s->peer.in.sin_addr), state);
+                siphash24_compress_typesafe(s->peer.in.sin_addr, state);
         else if (s->peer.sa.sa_family == AF_INET6)
-                siphash24_compress(&s->peer.in6.sin6_addr, sizeof(s->peer.in6.sin6_addr), state);
+                siphash24_compress_typesafe(s->peer.in6.sin6_addr, state);
         else if (s->peer.sa.sa_family == AF_VSOCK)
-                siphash24_compress(&s->peer.vm.svm_cid, sizeof(s->peer.vm.svm_cid), state);
+                siphash24_compress_typesafe(s->peer.vm.svm_cid, state);
+        else if (s->peer.sa.sa_family == AF_UNIX)
+                siphash24_compress_typesafe(s->peer_cred.uid, state);
         else
                 assert_not_reached();
 }
@@ -429,6 +440,8 @@ static int peer_address_compare_func(const SocketPeer *x, const SocketPeer *y) {
                 return memcmp(&x->peer.in6.sin6_addr, &y->peer.in6.sin6_addr, sizeof(x->peer.in6.sin6_addr));
         case AF_VSOCK:
                 return CMP(x->peer.vm.svm_cid, y->peer.vm.svm_cid);
+        case AF_UNIX:
+                return CMP(x->peer_cred.uid, y->peer_cred.uid);
         }
         assert_not_reached();
 }
@@ -436,10 +449,9 @@ static int peer_address_compare_func(const SocketPeer *x, const SocketPeer *y) {
 DEFINE_PRIVATE_HASH_OPS(peer_address_hash_ops, SocketPeer, peer_address_hash_func, peer_address_compare_func);
 
 static int socket_load(Unit *u) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         int r;
 
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         r = unit_load_fragment_and_dropin(u, true);
@@ -457,16 +469,22 @@ static int socket_load(Unit *u) {
         return socket_verify(s);
 }
 
-static SocketPeer *socket_peer_new(void) {
+static SocketPeer *socket_peer_dup(const SocketPeer *q) {
         SocketPeer *p;
 
+        assert(q);
+
         p = new(SocketPeer, 1);
         if (!p)
                 return NULL;
 
         *p = (SocketPeer) {
                 .n_ref = 1,
+                .peer = q->peer,
+                .peer_salen = q->peer_salen,
+                .peer_cred = q->peer_cred,
         };
+
         return p;
 }
 
@@ -483,36 +501,46 @@ DEFINE_TRIVIAL_REF_UNREF_FUNC(SocketPeer, socket_peer, socket_peer_free);
 
 int socket_acquire_peer(Socket *s, int fd, SocketPeer **ret) {
         _cleanup_(socket_peer_unrefp) SocketPeer *remote = NULL;
-        SocketPeer sa = {
+        SocketPeer key = {
                 .peer_salen = sizeof(union sockaddr_union),
+                .peer_cred = UCRED_INVALID,
         }, *i;
         int r;
 
-        assert(fd >= 0);
         assert(s);
+        assert(fd >= 0);
         assert(ret);
 
-        if (getpeername(fd, &sa.peer.sa, &sa.peer_salen) < 0)
+        if (getpeername(fd, &key.peer.sa, &key.peer_salen) < 0)
                 return log_unit_error_errno(UNIT(s), errno, "getpeername() failed: %m");
 
-        if (!IN_SET(sa.peer.sa.sa_family, AF_INET, AF_INET6, AF_VSOCK)) {
+        switch (key.peer.sa.sa_family) {
+        case AF_INET:
+        case AF_INET6:
+        case AF_VSOCK:
+                break;
+
+        case AF_UNIX:
+                r = getpeercred(fd, &key.peer_cred);
+                if (r < 0)
+                        return log_unit_error_errno(UNIT(s), r, "Failed to get peer credentials of socket: %m");
+                break;
+
+        default:
                 *ret = NULL;
                 return 0;
         }
 
-        i = set_get(s->peers_by_address, &sa);
+        i = set_get(s->peers_by_address, &key);
         if (i) {
                 *ret = socket_peer_ref(i);
                 return 1;
         }
 
-        remote = socket_peer_new();
+        remote = socket_peer_dup(&key);
         if (!remote)
                 return log_oom();
 
-        remote->peer = sa.peer;
-        remote->peer_salen = sa.peer_salen;
-
         r = set_ensure_put(&s->peers_by_address, &peer_address_hash_ops, remote);
         if (r < 0)
                 return log_unit_error_errno(UNIT(s), r, "Failed to insert peer info into hash table: %m");
@@ -540,10 +568,9 @@ static const char* listen_lookup(int family, int type) {
 }
 
 static void socket_dump(Unit *u, FILE *f, const char *prefix) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         const char *prefix2, *str;
 
-        assert(s);
         assert(f);
 
         prefix = strempty(prefix);
@@ -563,6 +590,7 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
                 "%sTransparent: %s\n"
                 "%sBroadcast: %s\n"
                 "%sPassCredentials: %s\n"
+                "%sPassFileDescriptorsToExec: %s\n"
                 "%sPassSecurity: %s\n"
                 "%sPassPacketInfo: %s\n"
                 "%sTCPCongestion: %s\n"
@@ -583,6 +611,7 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
                 prefix, yes_no(s->transparent),
                 prefix, yes_no(s->broadcast),
                 prefix, yes_no(s->pass_cred),
+                prefix, yes_no(s->pass_fds_to_exec),
                 prefix, yes_no(s->pass_sec),
                 prefix, yes_no(s->pass_pktinfo),
                 prefix, strna(s->tcp_congestion),
@@ -776,8 +805,8 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
                 if (!s->exec_command[c])
                         continue;
 
-                fprintf(f, "%s-> %s:\n",
-                        prefix, socket_exec_command_to_string(c));
+                fprintf(f, "%s%s %s:\n",
+                        prefix, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), socket_exec_command_to_string(c));
 
                 exec_command_dump_list(s->exec_command[c], f, prefix2);
         }
@@ -1274,6 +1303,9 @@ static int socket_symlink(Socket *s) {
 static int usbffs_write_descs(int fd, Service *s) {
         int r;
 
+        assert(fd >= 0);
+        assert(s);
+
         if (!s->usb_function_descriptors || !s->usb_function_strings)
                 return -EINVAL;
 
@@ -1339,12 +1371,17 @@ clear:
 }
 
 int socket_load_service_unit(Socket *s, int cfd, Unit **ret) {
+        int r;
+
         /* Figure out what the unit that will be used to handle the connections on the socket looks like.
          *
          * If cfd < 0, then we don't have a connection yet. In case of Accept=yes sockets, use a fake
          * instance name.
          */
 
+        assert(s);
+        assert(ret);
+
         if (UNIT_ISSET(s->service)) {
                 *ret = UNIT_DEREF(s->service);
                 return 0;
@@ -1355,7 +1392,6 @@ int socket_load_service_unit(Socket *s, int cfd, Unit **ret) {
 
         /* Build the instance name and load the unit */
         _cleanup_free_ char *prefix = NULL, *instance = NULL, *name = NULL;
-        int r;
 
         r = unit_name_to_prefix(UNIT(s)->id, &prefix);
         if (r < 0)
@@ -1385,50 +1421,26 @@ int socket_load_service_unit(Socket *s, int cfd, Unit **ret) {
 }
 
 static int socket_determine_selinux_label(Socket *s, char **ret) {
+        Unit *service;
         int r;
 
         assert(s);
         assert(ret);
 
-        Unit *service;
-        ExecCommand *c;
-        const char *exec_context;
-        _cleanup_free_ char *path = NULL;
-
-        r = socket_load_service_unit(s, -1, &service);
-        if (r == -ENODATA)
-                goto no_label;
+        r = socket_load_service_unit(s, /* cfd= */ -EBADF, &service);
+        if (r == -ENODATA) {
+                *ret = NULL;
+                return 0;
+        }
         if (r < 0)
                 return r;
 
-        exec_context = SERVICE(service)->exec_context.selinux_context;
-        if (exec_context) {
-                char *con;
-
-                con = strdup(exec_context);
-                if (!con)
-                        return -ENOMEM;
-
-                *ret = TAKE_PTR(con);
+        r = service_determine_exec_selinux_label(SERVICE(service), ret);
+        if (r == -ENODATA) {
+                *ret = NULL;
                 return 0;
         }
-
-        c = SERVICE(service)->exec_command[SERVICE_EXEC_START];
-        if (!c)
-                goto no_label;
-
-        r = chase(c->path, SERVICE(service)->exec_context.root_directory, CHASE_PREFIX_ROOT, &path, NULL);
-        if (r < 0)
-                goto no_label;
-
-        r = mac_selinux_get_create_label_from_exe(path, ret);
-        if (IN_SET(r, -EPERM, -EOPNOTSUPP))
-                goto no_label;
         return r;
-
-no_label:
-        *ret = NULL;
-        return 0;
 }
 
 static int socket_address_listen_do(
@@ -1794,6 +1806,7 @@ static int socket_check_open(Socket *s) {
 
 static void socket_set_state(Socket *s, SocketState state) {
         SocketState old_state;
+
         assert(s);
 
         if (s->state != state)
@@ -1802,18 +1815,7 @@ static void socket_set_state(Socket *s, SocketState state) {
         old_state = s->state;
         s->state = state;
 
-        if (!IN_SET(state,
-                    SOCKET_START_PRE,
-                    SOCKET_START_CHOWN,
-                    SOCKET_START_POST,
-                    SOCKET_STOP_PRE,
-                    SOCKET_STOP_PRE_SIGTERM,
-                    SOCKET_STOP_PRE_SIGKILL,
-                    SOCKET_STOP_POST,
-                    SOCKET_FINAL_SIGTERM,
-                    SOCKET_FINAL_SIGKILL,
-                    SOCKET_CLEANING)) {
-
+        if (!SOCKET_STATE_WITH_PROCESS(state)) {
                 s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source);
                 socket_unwatch_control_pid(s);
                 s->control_command = NULL;
@@ -1841,10 +1843,9 @@ static void socket_set_state(Socket *s, SocketState state) {
 }
 
 static int socket_coldplug(Unit *u) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         int r;
 
-        assert(s);
         assert(s->state == SOCKET_DEAD);
 
         if (s->deserialized_state == s->state)
@@ -1852,17 +1853,7 @@ static int socket_coldplug(Unit *u) {
 
         if (pidref_is_set(&s->control_pid) &&
             pidref_is_unwaited(&s->control_pid) > 0 &&
-            IN_SET(s->deserialized_state,
-                   SOCKET_START_PRE,
-                   SOCKET_START_CHOWN,
-                   SOCKET_START_POST,
-                   SOCKET_STOP_PRE,
-                   SOCKET_STOP_PRE_SIGTERM,
-                   SOCKET_STOP_PRE_SIGKILL,
-                   SOCKET_STOP_POST,
-                   SOCKET_FINAL_SIGTERM,
-                   SOCKET_FINAL_SIGKILL,
-                   SOCKET_CLEANING)) {
+            SOCKET_STATE_WITH_PROCESS(s->deserialized_state)) {
 
                 r = unit_watch_pidref(UNIT(s), &s->control_pid, /* exclusive= */ false);
                 if (r < 0)
@@ -1911,11 +1902,9 @@ static int socket_coldplug(Unit *u) {
 }
 
 static int socket_spawn(Socket *s, ExecCommand *c, PidRef *ret_pid) {
-
         _cleanup_(exec_params_shallow_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(
                         EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN);
         _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
-        pid_t pid;
         int r;
 
         assert(s);
@@ -1934,17 +1923,33 @@ static int socket_spawn(Socket *s, ExecCommand *c, PidRef *ret_pid) {
         if (r < 0)
                 return r;
 
+        /* Note that ExecStartPre= command doesn't inherit any FDs. It runs before we open listen FDs. */
+        if (s->pass_fds_to_exec) {
+                _cleanup_strv_free_ char **fd_names = NULL;
+                _cleanup_free_ int *fds = NULL;
+                int n_fds;
+
+                n_fds = socket_collect_fds(s, &fds);
+                if (n_fds < 0)
+                        return n_fds;
+
+                r = strv_extend_n(&fd_names, socket_fdname(s), n_fds);
+                if (r < 0)
+                        return r;
+
+                exec_params.flags |= EXEC_PASS_FDS;
+                exec_params.fds = TAKE_PTR(fds);
+                exec_params.fd_names = TAKE_PTR(fd_names);
+                exec_params.n_socket_fds = n_fds;
+        }
+
         r = exec_spawn(UNIT(s),
                        c,
                        &s->exec_context,
                        &exec_params,
                        s->exec_runtime,
                        &s->cgroup_context,
-                       &pid);
-        if (r < 0)
-                return r;
-
-        r = pidref_set_pid(&pidref, pid);
+                       &pidref);
         if (r < 0)
                 return r;
 
@@ -2052,6 +2057,7 @@ static void socket_enter_signal(Socket *s, SocketState state, SocketResult f);
 
 static void socket_enter_stop_post(Socket *s, SocketResult f) {
         int r;
+
         assert(s);
 
         if (s->result == SOCKET_SUCCESS)
@@ -2094,13 +2100,7 @@ static void socket_enter_signal(Socket *s, SocketState state, SocketResult f) {
         if (s->result == SOCKET_SUCCESS)
                 s->result = f;
 
-        r = unit_kill_context(
-                        UNIT(s),
-                        &s->kill_context,
-                        state_to_kill_operation(s, state),
-                        /* main_pid= */ NULL,
-                        &s->control_pid,
-                        /* main_pid_alien= */ false);
+        r = unit_kill_context(UNIT(s), state_to_kill_operation(s, state));
         if (r < 0) {
                 log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
                 goto fail;
@@ -2134,6 +2134,7 @@ fail:
 
 static void socket_enter_stop_pre(Socket *s, SocketResult f) {
         int r;
+
         assert(s);
 
         if (s->result == SOCKET_SUCCESS)
@@ -2160,6 +2161,7 @@ static void socket_enter_stop_pre(Socket *s, SocketResult f) {
 
 static void socket_enter_listening(Socket *s) {
         int r;
+
         assert(s);
 
         if (!s->accept && s->flush_pending) {
@@ -2179,6 +2181,7 @@ static void socket_enter_listening(Socket *s) {
 
 static void socket_enter_start_post(Socket *s) {
         int r;
+
         assert(s);
 
         socket_unwatch_control_pid(s);
@@ -2235,6 +2238,7 @@ fail:
 
 static void socket_enter_start_pre(Socket *s) {
         int r;
+
         assert(s);
 
         socket_unwatch_control_pid(s);
@@ -2278,7 +2282,6 @@ static void socket_enter_running(Socket *s, int cfd_in) {
         /* Note that this call takes possession of the connection fd passed. It either has to assign it
          * somewhere or close it. */
         _cleanup_close_ int cfd = cfd_in;
-
         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
         int r;
 
@@ -2315,8 +2318,8 @@ static void socket_enter_running(Socket *s, int cfd_in) {
 
                 if (!pending) {
                         if (!UNIT_ISSET(s->service)) {
-                                r = log_unit_warning_errno(UNIT(s), SYNTHETIC_ERRNO(ENOENT),
-                                                           "Service to activate vanished, refusing activation.");
+                                log_unit_warning(UNIT(s),
+                                                 "Service to activate vanished, refusing activation.");
                                 goto fail;
                         }
 
@@ -2347,7 +2350,10 @@ static void socket_enter_running(Socket *s, int cfd_in) {
                         if (r > 0 && p->n_ref > s->max_connections_per_source) {
                                 _cleanup_free_ char *t = NULL;
 
-                                (void) sockaddr_pretty(&p->peer.sa, p->peer_salen, true, false, &t);
+                                if (p->peer.sa.sa_family == AF_UNIX)
+                                        (void) asprintf(&t, "UID " UID_FMT, p->peer_cred.uid);
+                                else
+                                        (void) sockaddr_pretty(&p->peer.sa, p->peer_salen, /* translate_ipv6= */ true, /* include_port= */ false, &t);
 
                                 log_unit_warning(UNIT(s),
                                                  "Too many incoming connections (%u) from source %s, dropping connection.",
@@ -2357,18 +2363,15 @@ static void socket_enter_running(Socket *s, int cfd_in) {
                 }
 
                 r = socket_load_service_unit(s, cfd, &service);
-                if (r < 0) {
-                        if (ERRNO_IS_DISCONNECT(r))
-                                return;
-
-                        log_unit_warning_errno(UNIT(s), r, "Failed to load connection service unit: %m");
+                if (ERRNO_IS_NEG_DISCONNECT(r))
+                        return;
+                if (r < 0 || UNIT_IS_LOAD_ERROR(service->load_state)) {
+                        log_unit_warning_errno(UNIT(s), r < 0 ? r : service->load_error,
+                                               "Failed to load connection service unit: %m");
                         goto fail;
                 }
-
-                r = unit_add_two_dependencies(UNIT(s), UNIT_BEFORE, UNIT_TRIGGERS, service,
-                                              false, UNIT_DEPENDENCY_IMPLICIT);
-                if (r < 0) {
-                        log_unit_warning_errno(UNIT(s), r, "Failed to add Before=/Triggers= dependencies on connection unit: %m");
+                if (service->load_state == UNIT_MASKED) {
+                        log_unit_warning(UNIT(s), "Connection service unit is masked, refusing.");
                         goto fail;
                 }
 
@@ -2383,7 +2386,10 @@ static void socket_enter_running(Socket *s, int cfd_in) {
                         goto fail;
                 }
 
-                TAKE_FD(cfd); /* We passed ownership of the fd to the service now. Forget it here. */
+                /* We passed ownership of the fd and socket peer to the service now. */
+                TAKE_FD(cfd);
+                TAKE_PTR(p);
+
                 s->n_connections++;
 
                 r = manager_add_job(UNIT(s)->manager, JOB_START, service, JOB_REPLACE, NULL, &error, NULL);
@@ -2405,13 +2411,9 @@ refuse:
         return;
 
 queue_error:
-        if (ERRNO_IS_RESOURCE(r))
-                log_unit_warning(UNIT(s), "Failed to queue service startup job: %s",
-                                 bus_error_message(&error, r));
-        else
-                log_unit_warning(UNIT(s), "Failed to queue service startup job (Maybe the service file is missing or not a %s unit?): %s",
-                                 cfd >= 0 ? "template" : "non-template",
-                                 bus_error_message(&error, r));
+        log_unit_warning_errno(UNIT(s), r, "Failed to queue service startup job%s: %s",
+                               cfd >= 0 && !ERRNO_IS_RESOURCE(r) ? " (Maybe the service is missing or is a template unit?)" : "",
+                               bus_error_message(&error, r));
 
 fail:
         socket_enter_stop_pre(s, SOCKET_FAILURE_RESOURCES);
@@ -2444,11 +2446,9 @@ static void socket_run_next(Socket *s) {
 }
 
 static int socket_start(Unit *u) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         int r;
 
-        assert(s);
-
         /* We cannot fulfill this request right now, try again later
          * please! */
         if (IN_SET(s->state,
@@ -2496,16 +2496,15 @@ static int socket_start(Unit *u) {
         s->result = SOCKET_SUCCESS;
         exec_command_reset_status_list_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
 
-        u->reset_accounting = true;
+        if (s->cgroup_runtime)
+                s->cgroup_runtime->reset_accounting = true;
 
         socket_enter_start_pre(s);
         return 1;
 }
 
 static int socket_stop(Unit *u) {
-        Socket *s = SOCKET(u);
-
-        assert(s);
+        Socket *s = ASSERT_PTR(SOCKET(u));
 
         /* Already on it */
         if (IN_SET(s->state,
@@ -2540,10 +2539,9 @@ static int socket_stop(Unit *u) {
 }
 
 static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         int r;
 
-        assert(u);
         assert(f);
         assert(fds);
 
@@ -2595,10 +2593,9 @@ static int socket_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int socket_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         int r;
 
-        assert(u);
         assert(key);
         assert(value);
 
@@ -2836,9 +2833,7 @@ static int socket_deserialize_item(Unit *u, const char *key, const char *value,
 }
 
 static void socket_distribute_fds(Unit *u, FDSet *fds) {
-        Socket *s = SOCKET(u);
-
-        assert(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
 
         LIST_FOREACH(port, p, s->ports) {
                 int fd;
@@ -2860,15 +2855,15 @@ static void socket_distribute_fds(Unit *u, FDSet *fds) {
 }
 
 static UnitActiveState socket_active_state(Unit *u) {
-        assert(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
 
-        return state_translation_table[SOCKET(u)->state];
+        return state_translation_table[s->state];
 }
 
 static const char *socket_sub_state_to_string(Unit *u) {
-        assert(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
 
-        return socket_state_to_string(SOCKET(u)->state);
+        return socket_state_to_string(s->state);
 }
 
 int socket_port_to_address(const SocketPort *p, char **ret) {
@@ -2906,7 +2901,6 @@ int socket_port_to_address(const SocketPort *p, char **ret) {
 }
 
 const char* socket_port_type_to_string(SocketPort *p) {
-
         assert(p);
 
         switch (p->type) {
@@ -2968,9 +2962,7 @@ SocketType socket_port_type_from_string(const char *s) {
 }
 
 static bool socket_may_gc(Unit *u) {
-        Socket *s = SOCKET(u);
-
-        assert(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
 
         return s->n_connections == 0;
 }
@@ -3108,10 +3100,9 @@ fail:
 }
 
 static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         SocketResult f;
 
-        assert(s);
         assert(pid >= 0);
 
         if (pid != s->control_pid.pid)
@@ -3215,9 +3206,8 @@ static void socket_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 }
 
 static int socket_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
-        Socket *s = SOCKET(userdata);
+        Socket *s = ASSERT_PTR(SOCKET(userdata));
 
-        assert(s);
         assert(s->timer_event_source == source);
 
         switch (s->state) {
@@ -3289,12 +3279,11 @@ static int socket_dispatch_timer(sd_event_source *source, usec_t usec, void *use
         return 0;
 }
 
-int socket_collect_fds(Socket *s, int **fds) {
-        size_t k = 0, n = 0;
-        int *rfds;
+int socket_collect_fds(Socket *s, int **ret) {
+        size_t n = 0, k = 0;
 
         assert(s);
-        assert(fds);
+        assert(ret);
 
         /* Called from the service code for requesting our fds */
 
@@ -3304,25 +3293,25 @@ int socket_collect_fds(Socket *s, int **fds) {
                 n += p->n_auxiliary_fds;
         }
 
-        if (n <= 0) {
-                *fds = NULL;
+        if (n == 0) {
+                *ret = NULL;
                 return 0;
         }
 
-        rfds = new(int, n);
-        if (!rfds)
+        int *fds = new(int, n);
+        if (!fds)
                 return -ENOMEM;
 
         LIST_FOREACH(port, p, s->ports) {
                 if (p->fd >= 0)
-                        rfds[k++] = p->fd;
-                for (size_t i = 0; i < p->n_auxiliary_fds; ++i)
-                        rfds[k++] = p->auxiliary_fds[i];
+                        fds[k++] = p->fd;
+                FOREACH_ARRAY(i, p->auxiliary_fds, p->n_auxiliary_fds)
+                        fds[k++] = *i;
         }
 
         assert(k == n);
 
-        *fds = rfds;
+        *ret = fds;
         return (int) n;
 }
 
@@ -3353,9 +3342,8 @@ void socket_connection_unref(Socket *s) {
 }
 
 static void socket_trigger_notify(Unit *u, Unit *other) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
 
-        assert(u);
         assert(other);
 
         /* Filter out invocations with bogus state */
@@ -3390,8 +3378,24 @@ static void socket_trigger_notify(Unit *u, Unit *other) {
                 socket_set_state(s, SOCKET_RUNNING);
 }
 
+static void socket_handoff_timestamp(
+                Unit *u,
+                const struct ucred *ucred,
+                const dual_timestamp *ts) {
+
+        Socket *s = ASSERT_PTR(SOCKET(u));
+
+        assert(ucred);
+        assert(ts);
+
+        if (s->control_pid.pid == ucred->pid && s->control_command) {
+                exec_status_handoff(&s->control_command->exec_status, ucred, ts);
+                unit_add_to_dbus_queue(u);
+        }
+}
+
 static int socket_get_timeout(Unit *u, usec_t *timeout) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         usec_t t;
         int r;
 
@@ -3423,11 +3427,10 @@ static PidRef *socket_control_pid(Unit *u) {
 }
 
 static int socket_clean(Unit *u, ExecCleanMask mask) {
+        Socket *s = ASSERT_PTR(SOCKET(u));
         _cleanup_strv_free_ char **l = NULL;
-        Socket *s = SOCKET(u);
         int r;
 
-        assert(s);
         assert(mask != 0);
 
         if (s->state != SOCKET_DEAD)
@@ -3467,19 +3470,15 @@ fail:
 }
 
 static int socket_can_clean(Unit *u, ExecCleanMask *ret) {
-        Socket *s = SOCKET(u);
-
-        assert(s);
+        Socket *s = ASSERT_PTR(SOCKET(u));
 
         return exec_context_get_clean_mask(&s->exec_context, ret);
 }
 
 static int socket_can_start(Unit *u) {
-        Socket *s = SOCKET(u);
+        Socket *s = ASSERT_PTR(SOCKET(u));
         int r;
 
-        assert(s);
-
         r = unit_test_start_limit(u);
         if (r < 0) {
                 socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
@@ -3494,7 +3493,7 @@ static const char* const socket_exec_command_table[_SOCKET_EXEC_COMMAND_MAX] = {
         [SOCKET_EXEC_START_CHOWN] = "ExecStartChown",
         [SOCKET_EXEC_START_POST]  = "ExecStartPost",
         [SOCKET_EXEC_STOP_PRE]    = "ExecStopPre",
-        [SOCKET_EXEC_STOP_POST]   = "ExecStopPost"
+        [SOCKET_EXEC_STOP_POST]   = "ExecStopPost",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(socket_exec_command, SocketExecCommand);
@@ -3508,7 +3507,7 @@ static const char* const socket_result_table[_SOCKET_RESULT_MAX] = {
         [SOCKET_FAILURE_CORE_DUMP]               = "core-dump",
         [SOCKET_FAILURE_START_LIMIT_HIT]         = "start-limit-hit",
         [SOCKET_FAILURE_TRIGGER_LIMIT_HIT]       = "trigger-limit-hit",
-        [SOCKET_FAILURE_SERVICE_START_LIMIT_HIT] = "service-start-limit-hit"
+        [SOCKET_FAILURE_SERVICE_START_LIMIT_HIT] = "service-start-limit-hit",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(socket_result, SocketResult);
@@ -3552,6 +3551,7 @@ const UnitVTable socket_vtable = {
         .cgroup_context_offset = offsetof(Socket, cgroup_context),
         .kill_context_offset = offsetof(Socket, kill_context),
         .exec_runtime_offset = offsetof(Socket, exec_runtime),
+        .cgroup_runtime_offset = offsetof(Socket, cgroup_runtime),
 
         .sections =
                 "Unit\0"
@@ -3596,6 +3596,8 @@ const UnitVTable socket_vtable = {
 
         .reset_failed = socket_reset_failed,
 
+        .notify_handoff_timestamp = socket_handoff_timestamp,
+
         .control_pid = socket_control_pid,
 
         .bus_set_property = bus_socket_set_property,
diff --git a/src/core/socket.h b/src/core/socket.h
index 0983e8c..5e3929c 100644
--- a/src/core/socket.h
+++ b/src/core/socket.h
@@ -92,6 +92,7 @@ struct Socket {
         CGroupContext cgroup_context;
 
         ExecRuntime *exec_runtime;
+        CGroupRuntime *cgroup_runtime;
 
         /* For Accept=no sockets refers to the one service we'll
          * activate. For Accept=yes sockets is either NULL, or filled
@@ -128,6 +129,7 @@ struct Socket {
         bool transparent;
         bool broadcast;
         bool pass_cred;
+        bool pass_fds_to_exec;
         bool pass_sec;
         bool pass_pktinfo;
         SocketTimestamping timestamping;
@@ -170,7 +172,7 @@ int socket_acquire_peer(Socket *s, int fd, SocketPeer **p);
 DEFINE_TRIVIAL_CLEANUP_FUNC(SocketPeer*, socket_peer_unref);
 
 /* Called from the service code when collecting fds */
-int socket_collect_fds(Socket *s, int **fds);
+int socket_collect_fds(Socket *s, int **ret);
 
 /* Called from the service code when a per-connection service ended */
 void socket_connection_unref(Socket *s);
diff --git a/src/core/swap.c b/src/core/swap.c
index 682c2b9..c4d2ba8 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -30,15 +30,15 @@
 #include "virt.h"
 
 static const UnitActiveState state_translation_table[_SWAP_STATE_MAX] = {
-        [SWAP_DEAD] = UNIT_INACTIVE,
-        [SWAP_ACTIVATING] = UNIT_ACTIVATING,
-        [SWAP_ACTIVATING_DONE] = UNIT_ACTIVE,
-        [SWAP_ACTIVE] = UNIT_ACTIVE,
-        [SWAP_DEACTIVATING] = UNIT_DEACTIVATING,
+        [SWAP_DEAD]                 = UNIT_INACTIVE,
+        [SWAP_ACTIVATING]           = UNIT_ACTIVATING,
+        [SWAP_ACTIVATING_DONE]      = UNIT_ACTIVE,
+        [SWAP_ACTIVE]               = UNIT_ACTIVE,
+        [SWAP_DEACTIVATING]         = UNIT_DEACTIVATING,
         [SWAP_DEACTIVATING_SIGTERM] = UNIT_DEACTIVATING,
         [SWAP_DEACTIVATING_SIGKILL] = UNIT_DEACTIVATING,
-        [SWAP_FAILED] = UNIT_FAILED,
-        [SWAP_CLEANING] = UNIT_MAINTENANCE,
+        [SWAP_FAILED]               = UNIT_FAILED,
+        [SWAP_CLEANING]             = UNIT_MAINTENANCE,
 };
 
 static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
@@ -68,9 +68,7 @@ static const char *swap_sub_state_to_string(Unit *u) {
 }
 
 static bool swap_may_gc(Unit *u) {
-        Swap *s = SWAP(u);
-
-        assert(s);
+        Swap *s = ASSERT_PTR(SWAP(u));
 
         if (s->from_proc_swaps)
                 return false;
@@ -134,10 +132,9 @@ static int swap_set_devnode(Swap *s, const char *devnode) {
 }
 
 static void swap_init(Unit *u) {
-        Swap *s = SWAP(u);
+        Swap *s = ASSERT_PTR(SWAP(u));
 
-        assert(s);
-        assert(UNIT(s)->load_state == UNIT_STUB);
+        assert(u->load_state == UNIT_STUB);
 
         s->timeout_usec = u->manager->defaults.timeout_start_usec;
 
@@ -152,18 +149,11 @@ static void swap_init(Unit *u) {
 
 static void swap_unwatch_control_pid(Swap *s) {
         assert(s);
-
-        if (!pidref_is_set(&s->control_pid))
-                return;
-
-        unit_unwatch_pidref(UNIT(s), &s->control_pid);
-        pidref_done(&s->control_pid);
+        unit_unwatch_pidref_done(UNIT(s), &s->control_pid);
 }
 
 static void swap_done(Unit *u) {
-        Swap *s = SWAP(u);
-
-        assert(s);
+        Swap *s = ASSERT_PTR(SWAP(u));
 
         swap_unset_proc_swaps(s);
         swap_set_devnode(s, NULL);
@@ -173,6 +163,7 @@ static void swap_done(Unit *u) {
         s->parameters_fragment.options = mfree(s->parameters_fragment.options);
 
         s->exec_runtime = exec_runtime_free(s->exec_runtime);
+
         exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
         s->control_command = NULL;
 
@@ -255,6 +246,7 @@ static int swap_verify(Swap *s) {
         _cleanup_free_ char *e = NULL;
         int r;
 
+        assert(s);
         assert(UNIT(s)->load_state == UNIT_LOADED);
 
         r = unit_name_from_path(s->what, ".swap", &e);
@@ -321,7 +313,7 @@ static int swap_add_extras(Swap *s) {
                         return r;
         }
 
-        r = unit_require_mounts_for(UNIT(s), s->what, UNIT_DEPENDENCY_IMPLICIT);
+        r = unit_add_mounts_for(UNIT(s), s->what, UNIT_DEPENDENCY_IMPLICIT, UNIT_MOUNT_REQUIRES);
         if (r < 0)
                 return r;
 
@@ -353,25 +345,22 @@ static int swap_add_extras(Swap *s) {
 }
 
 static int swap_load(Unit *u) {
-        Swap *s = SWAP(u);
-        int r, q = 0;
+        Swap *s = ASSERT_PTR(SWAP(u));
+        int r;
 
-        assert(s);
         assert(u->load_state == UNIT_STUB);
 
         /* Load a .swap file */
-        bool fragment_optional = s->from_proc_swaps;
-        r = unit_load_fragment_and_dropin(u, !fragment_optional);
+        r = unit_load_fragment_and_dropin(u, /* fragment_required = */ !s->from_proc_swaps);
 
         /* Add in some extras, and do so either when we successfully loaded something or when /proc/swaps is
          * already active. */
         if (u->load_state == UNIT_LOADED || s->from_proc_swaps)
-                q = swap_add_extras(s);
+                RET_GATHER(r, swap_add_extras(s));
 
         if (r < 0)
                 return r;
-        if (q < 0)
-                return q;
+
         if (u->load_state != UNIT_LOADED)
                 return 0;
 
@@ -385,11 +374,11 @@ static int swap_setup_unit(
                 int priority,
                 bool set_flags) {
 
+        _cleanup_(unit_freep) Unit *new_unit = NULL;
         _cleanup_free_ char *e = NULL;
-        bool delete = false;
-        Unit *u = NULL;
+        Unit *u;
+        Swap *s;
         int r;
-        SwapParameters *p;
 
         assert(m);
         assert(what);
@@ -397,70 +386,61 @@ static int swap_setup_unit(
 
         r = unit_name_from_path(what, ".swap", &e);
         if (r < 0)
-                return log_unit_error_errno(u, r, "Failed to generate unit name from path: %m");
+                return log_error_errno(r, "Failed to generate unit name from path: %m");
 
         u = manager_get_unit(m, e);
-        if (u &&
-            SWAP(u)->from_proc_swaps &&
-            !path_equal(SWAP(u)->parameters_proc_swaps.what, what_proc_swaps))
-                return log_error_errno(SYNTHETIC_ERRNO(EEXIST),
-                                       "Swap %s appeared twice with different device paths %s and %s",
-                                       e, SWAP(u)->parameters_proc_swaps.what, what_proc_swaps);
-
-        if (!u) {
-                delete = true;
+        if (u) {
+                s = ASSERT_PTR(SWAP(u));
+
+                if (s->from_proc_swaps &&
+                    !path_equal(s->parameters_proc_swaps.what, what_proc_swaps))
+                        return log_unit_error_errno(u, SYNTHETIC_ERRNO(EEXIST),
+                                                    "Swap appeared twice with different device paths %s and %s, refusing.",
+                                                    s->parameters_proc_swaps.what, what_proc_swaps);
+        } else {
+                r = unit_new_for_name(m, sizeof(Swap), e, &new_unit);
+                if (r < 0)
+                        return log_warning_errno(r, "Failed to load swap unit '%s': %m", e);
 
-                r = unit_new_for_name(m, sizeof(Swap), e, &u);
-                if (r < 0) {
-                        log_unit_warning_errno(u, r, "Failed to load swap unit: %m");
-                        goto fail;
-                }
+                u = new_unit;
+                s = ASSERT_PTR(SWAP(u));
 
-                SWAP(u)->what = strdup(what);
-                if (!SWAP(u)->what) {
-                        r = log_oom();
-                        goto fail;
-                }
+                s->what = strdup(what);
+                if (!s->what)
+                        return log_oom();
 
                 unit_add_to_load_queue(u);
-        } else
-                delete = false;
+        }
 
-        p = &SWAP(u)->parameters_proc_swaps;
+        SwapParameters *p = &s->parameters_proc_swaps;
 
         if (!p->what) {
                 p->what = strdup(what_proc_swaps);
-                if (!p->what) {
-                        r = log_oom();
-                        goto fail;
-                }
+                if (!p->what)
+                        return log_oom();
         }
 
-        /* The unit is definitely around now, mark it as loaded if it was previously referenced but could not be
-         * loaded. After all we can load it now, from the data in /proc/swaps. */
-        if (IN_SET(u->load_state, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR)) {
+        /* The unit is definitely around now, mark it as loaded if it was previously referenced but
+         * could not be loaded. After all we can load it now, from the data in /proc/swaps. */
+        if (UNIT_IS_LOAD_ERROR(u->load_state)) {
                 u->load_state = UNIT_LOADED;
                 u->load_error = 0;
         }
 
         if (set_flags) {
-                SWAP(u)->is_active = true;
-                SWAP(u)->just_activated = !SWAP(u)->from_proc_swaps;
+                s->is_active = true;
+                s->just_activated = !s->from_proc_swaps;
         }
 
-        SWAP(u)->from_proc_swaps = true;
+        s->from_proc_swaps = true;
 
         p->priority = priority;
         p->priority_set = true;
 
         unit_add_to_dbus_queue(u);
-        return 0;
+        TAKE_PTR(new_unit);
 
-fail:
-        if (delete)
-                unit_free(u);
-
-        return r;
+        return 0;
 }
 
 static void swap_process_new(Manager *m, const char *device, int prio, bool set_flags) {
@@ -541,11 +521,10 @@ static void swap_set_state(Swap *s, SwapState state) {
 }
 
 static int swap_coldplug(Unit *u) {
-        Swap *s = SWAP(u);
+        Swap *s = ASSERT_PTR(SWAP(u));
         SwapState new_state = SWAP_DEAD;
         int r;
 
-        assert(s);
         assert(s->state == SWAP_DEAD);
 
         if (s->deserialized_state != s->state)
@@ -569,20 +548,25 @@ static int swap_coldplug(Unit *u) {
                         return r;
         }
 
-        if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED))
+        if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED)) {
                 (void) unit_setup_exec_runtime(u);
+                (void) unit_setup_cgroup_runtime(u);
+        }
 
         swap_set_state(s, new_state);
         return 0;
 }
 
 static void swap_dump(Unit *u, FILE *f, const char *prefix) {
-        Swap *s = SWAP(u);
+        Swap *s = ASSERT_PTR(SWAP(u));
         SwapParameters *p;
+        const char *prefix2;
 
-        assert(s);
         assert(f);
 
+        prefix = strempty(prefix);
+        prefix2 = strjoina(prefix, "\t");
+
         if (s->from_proc_swaps)
                 p = &s->parameters_proc_swaps;
         else if (s->from_fragment)
@@ -628,14 +612,23 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {
         exec_context_dump(&s->exec_context, f, prefix);
         kill_context_dump(&s->kill_context, f, prefix);
         cgroup_context_dump(UNIT(s), f, prefix);
+
+        for (SwapExecCommand c = 0; c < _SWAP_EXEC_COMMAND_MAX; c++) {
+                if (!s->exec_command[c].argv)
+                        continue;
+
+                fprintf(f, "%s%s %s:\n",
+                        prefix, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), swap_exec_command_to_string(c));
+
+                exec_command_dump(s->exec_command + c, f, prefix2);
+        }
+
 }
 
 static int swap_spawn(Swap *s, ExecCommand *c, PidRef *ret_pid) {
-
         _cleanup_(exec_params_shallow_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(
                         EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN);
         _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
-        pid_t pid;
         int r;
 
         assert(s);
@@ -660,11 +653,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, PidRef *ret_pid) {
                        &exec_params,
                        s->exec_runtime,
                        &s->cgroup_context,
-                       &pid);
-        if (r < 0)
-                return r;
-
-        r = pidref_set_pid(&pidref, pid);
+                       &pidref);
         if (r < 0)
                 return r;
 
@@ -734,13 +723,7 @@ static void swap_enter_signal(Swap *s, SwapState state, SwapResult f) {
         if (s->result == SWAP_SUCCESS)
                 s->result = f;
 
-        r = unit_kill_context(
-                        UNIT(s),
-                        &s->kill_context,
-                        state_to_kill_operation(s, state),
-                        /* main_pid= */ NULL,
-                        &s->control_pid,
-                        /* main_pid_alien= */ false);
+        r = unit_kill_context(UNIT(s), state_to_kill_operation(s, state));
         if (r < 0) {
                 log_unit_warning_errno(UNIT(s), r, "Failed to kill processes: %m");
                 goto fail;
@@ -870,7 +853,9 @@ static void swap_cycle_clear(Swap *s) {
 
         s->result = SWAP_SUCCESS;
         exec_command_reset_status_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
-        UNIT(s)->reset_accounting = true;
+
+        if (s->cgroup_runtime)
+                s->cgroup_runtime->reset_accounting = true;
 }
 
 static int swap_start(Unit *u) {
@@ -913,9 +898,7 @@ static int swap_start(Unit *u) {
 }
 
 static int swap_stop(Unit *u) {
-        Swap *s = SWAP(u);
-
-        assert(s);
+        Swap *s = ASSERT_PTR(SWAP(u));
 
         switch (s->state) {
 
@@ -949,9 +932,8 @@ static int swap_stop(Unit *u) {
 }
 
 static int swap_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Swap *s = SWAP(u);
+        Swap *s = ASSERT_PTR(SWAP(u));
 
-        assert(s);
         assert(f);
         assert(fds);
 
@@ -966,9 +948,8 @@ static int swap_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int swap_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Swap *s = SWAP(u);
+        Swap *s = ASSERT_PTR(SWAP(u));
 
-        assert(s);
         assert(fds);
 
         if (streq(key, "state")) {
@@ -1009,10 +990,9 @@ static int swap_deserialize_item(Unit *u, const char *key, const char *value, FD
 }
 
 static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
-        Swap *s = SWAP(u);
+        Swap *s = ASSERT_PTR(SWAP(u));
         SwapResult f;
 
-        assert(s);
         assert(pid >= 0);
 
         if (pid != s->control_pid.pid)
@@ -1086,9 +1066,8 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 }
 
 static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
-        Swap *s = SWAP(userdata);
+        Swap *s = ASSERT_PTR(SWAP(userdata));
 
-        assert(s);
         assert(s->timer_event_source == source);
 
         switch (s->state) {
@@ -1261,12 +1240,10 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v
         return swap_process_proc_swaps(m);
 }
 
-static Unit *swap_following(Unit *u) {
-        Swap *s = SWAP(u);
+static Unit* swap_following(Unit *u) {
+        Swap *s = ASSERT_PTR(SWAP(u));
         Swap *first = NULL;
 
-        assert(s);
-
         /* If the user configured the swap through /etc/fstab or
          * a device unit, follow that. */
 
@@ -1298,16 +1275,15 @@ static Unit *swap_following(Unit *u) {
         return UNIT(first);
 }
 
-static int swap_following_set(Unit *u, Set **_set) {
-        Swap *s = SWAP(u);
+static int swap_following_set(Unit *u, Set **ret) {
+        Swap *s = ASSERT_PTR(SWAP(u));
         _cleanup_set_free_ Set *set = NULL;
         int r;
 
-        assert(s);
-        assert(_set);
+        assert(ret);
 
         if (LIST_JUST_US(same_devnode, s)) {
-                *_set = NULL;
+                *ret = NULL;
                 return 0;
         }
 
@@ -1321,7 +1297,7 @@ static int swap_following_set(Unit *u, Set **_set) {
                         return r;
         }
 
-        *_set = TAKE_PTR(set);
+        *ret = TAKE_PTR(set);
         return 1;
 }
 
@@ -1358,7 +1334,7 @@ static void swap_enumerate(Manager *m) {
                 /* Dispatch this before we dispatch SIGCHLD, so that
                  * we always get the events from /proc/swaps before
                  * the SIGCHLD of /sbin/swapon. */
-                r = sd_event_source_set_priority(m->swap_event_source, SD_EVENT_PRIORITY_NORMAL-10);
+                r = sd_event_source_set_priority(m->swap_event_source, EVENT_PRIORITY_SWAP_TABLE);
                 if (r < 0) {
                         log_error_errno(r, "Failed to change /proc/swaps priority: %m");
                         goto fail;
@@ -1422,28 +1398,22 @@ int swap_process_device_new(Manager *m, sd_device *dev) {
 
 int swap_process_device_remove(Manager *m, sd_device *dev) {
         const char *dn;
-        int r;
         Swap *s;
+        int r;
 
         r = sd_device_get_devname(dev, &dn);
         if (r < 0)
                 return 0;
 
-        while ((s = hashmap_get(m->swaps_by_devnode, dn))) {
-                int q;
-
-                q = swap_set_devnode(s, NULL);
-                if (q < 0)
-                        r = q;
-        }
+        r = 0;
+        while ((s = hashmap_get(m->swaps_by_devnode, dn)))
+                RET_GATHER(r, swap_set_devnode(s, NULL));
 
         return r;
 }
 
 static void swap_reset_failed(Unit *u) {
-        Swap *s = SWAP(u);
-
-        assert(s);
+        Swap *s = ASSERT_PTR(SWAP(u));
 
         if (s->state == SWAP_FAILED)
                 swap_set_state(s, SWAP_DEAD);
@@ -1452,14 +1422,27 @@ static void swap_reset_failed(Unit *u) {
         s->clean_result = SWAP_SUCCESS;
 }
 
+static void swap_handoff_timestamp(
+                Unit *u,
+                const struct ucred *ucred,
+                const dual_timestamp *ts) {
+
+        Swap *s = ASSERT_PTR(SWAP(u));
+
+        assert(ucred);
+        assert(ts);
+
+        if (s->control_pid.pid == ucred->pid && s->control_command) {
+                exec_status_handoff(&s->control_command->exec_status, ucred, ts);
+                unit_add_to_dbus_queue(u);
+        }
+}
+
 static int swap_get_timeout(Unit *u, usec_t *timeout) {
-        Swap *s = SWAP(u);
+        Swap *s = ASSERT_PTR(SWAP(u));
         usec_t t;
         int r;
 
-        assert(s);
-        assert(u);
-
         if (!s->timer_event_source)
                 return 0;
 
@@ -1493,11 +1476,10 @@ static PidRef* swap_control_pid(Unit *u) {
 }
 
 static int swap_clean(Unit *u, ExecCleanMask mask) {
+        Swap *s = ASSERT_PTR(SWAP(u));
         _cleanup_strv_free_ char **l = NULL;
-        Swap *s = SWAP(u);
         int r;
 
-        assert(s);
         assert(mask != 0);
 
         if (s->state != SWAP_DEAD)
@@ -1537,19 +1519,15 @@ fail:
 }
 
 static int swap_can_clean(Unit *u, ExecCleanMask *ret) {
-        Swap *s = SWAP(u);
-
-        assert(s);
+        Swap *s = ASSERT_PTR(SWAP(u));
 
         return exec_context_get_clean_mask(&s->exec_context, ret);
 }
 
 static int swap_can_start(Unit *u) {
-        Swap *s = SWAP(u);
+        Swap *s = ASSERT_PTR(SWAP(u));
         int r;
 
-        assert(s);
-
         r = unit_test_start_limit(u);
         if (r < 0) {
                 swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
@@ -1605,6 +1583,7 @@ const UnitVTable swap_vtable = {
         .cgroup_context_offset = offsetof(Swap, cgroup_context),
         .kill_context_offset = offsetof(Swap, kill_context),
         .exec_runtime_offset = offsetof(Swap, exec_runtime),
+        .cgroup_runtime_offset = offsetof(Swap, cgroup_runtime),
 
         .sections =
                 "Unit\0"
@@ -1645,6 +1624,8 @@ const UnitVTable swap_vtable = {
 
         .reset_failed = swap_reset_failed,
 
+        .notify_handoff_timestamp = swap_handoff_timestamp,
+
         .control_pid = swap_control_pid,
 
         .bus_set_property = bus_swap_set_property,
diff --git a/src/core/swap.h b/src/core/swap.h
index ef20f0f..d9bbd37 100644
--- a/src/core/swap.h
+++ b/src/core/swap.h
@@ -70,6 +70,7 @@ struct Swap {
         CGroupContext cgroup_context;
 
         ExecRuntime *exec_runtime;
+        CGroupRuntime *cgroup_runtime;
 
         SwapState state, deserialized_state;
 
diff --git a/src/core/system.conf.in b/src/core/system.conf.in
index 05eb681..1c08aa4 100644
--- a/src/core/system.conf.in
+++ b/src/core/system.conf.in
@@ -26,7 +26,7 @@
 #ShowStatus=yes
 #CrashChangeVT=no
 #CrashShell=no
-#CrashReboot=no
+#CrashAction=freeze
 #CtrlAltDelBurstAction=reboot-force
 #CPUAffinity=
 #NUMAPolicy=default
@@ -39,6 +39,7 @@
 #WatchdogDevice=
 #CapabilityBoundingSet=
 #NoNewPrivileges=no
+#ProtectSystem=auto
 #SystemCallArchitectures=
 #TimerSlackNSec=
 #StatusUnitFormat={{STATUS_UNIT_FORMAT_DEFAULT_STR}}
diff --git a/src/core/taint.c b/src/core/taint.c
new file mode 100644
index 0000000..969b37f
--- /dev/null
+++ b/src/core/taint.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/utsname.h>
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "clock-util.h"
+#include "errno-util.h"
+#include "fileio.h"
+#include "fs-util.h"
+#include "log.h"
+#include "os-util.h"
+#include "path-util.h"
+#include "strv.h"
+#include "taint.h"
+#include "uid-range.h"
+
+static int short_uid_gid_range(UIDRangeUsernsMode mode) {
+        _cleanup_(uid_range_freep) UIDRange *p = NULL;
+        int r;
+
+        /* Taint systemd if we the UID/GID range assigned to this environment doesn't at least cover 0…65534,
+         * i.e. from root to nobody. */
+
+        r = uid_range_load_userns(/* path= */ NULL, mode, &p);
+        if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
+                return false;
+        if (r < 0)
+                return log_debug_errno(r, "Failed to load uid_map or gid_map: %m");
+
+        return !uid_range_covers(p, 0, 65535);
+}
+
+char* taint_string(void) {
+        const char *stage[12] = {};
+        size_t n = 0;
+
+        /* Returns a "taint string", e.g. "local-hwclock:var-run-bad". Only things that are detected at
+         * runtime should be tagged here. For stuff that is known during compilation, emit a warning in the
+         * configuration phase. */
+
+        _cleanup_free_ char *bin = NULL, *usr_sbin = NULL, *var_run = NULL;
+
+        if (readlink_malloc("/bin", &bin) < 0 || !PATH_IN_SET(bin, "usr/bin", "/usr/bin"))
+                stage[n++] = "unmerged-usr";
+
+        /* Note that the check is different from default_PATH(), as we want to taint on uncanonical symlinks
+         * too. */
+        if (readlink_malloc("/usr/sbin", &usr_sbin) < 0 || !PATH_IN_SET(usr_sbin, "bin", "/usr/bin"))
+                stage[n++] = "unmerged-bin";
+
+        if (readlink_malloc("/var/run", &var_run) < 0 || !PATH_IN_SET(var_run, "../run", "/run"))
+                stage[n++] = "var-run-bad";
+
+        if (cg_all_unified() == 0)
+                stage[n++] = "cgroupsv1";
+
+        if (clock_is_localtime(NULL) > 0)
+                stage[n++] = "local-hwclock";
+
+        if (os_release_support_ended(NULL, /* quiet= */ true, NULL) > 0)
+                stage[n++] = "support-ended";
+
+        struct utsname uts;
+        assert_se(uname(&uts) >= 0);
+        if (strverscmp_improved(uts.release, KERNEL_BASELINE_VERSION) < 0)
+                stage[n++] = "old-kernel";
+
+        _cleanup_free_ char *overflowuid = NULL, *overflowgid = NULL;
+        if (read_one_line_file("/proc/sys/kernel/overflowuid", &overflowuid) >= 0 &&
+            !streq(overflowuid, "65534"))
+                stage[n++] = "overflowuid-not-65534";
+        if (read_one_line_file("/proc/sys/kernel/overflowgid", &overflowgid) >= 0 &&
+            !streq(overflowgid, "65534"))
+                stage[n++] = "overflowgid-not-65534";
+
+        if (short_uid_gid_range(UID_RANGE_USERNS_INSIDE) > 0)
+                stage[n++] = "short-uid-range";
+        if (short_uid_gid_range(GID_RANGE_USERNS_INSIDE) > 0)
+                stage[n++] = "short-gid-range";
+
+        assert(n < ELEMENTSOF(stage) - 1);  /* One extra for NULL terminator */
+
+        return strv_join((char**) stage, ":");
+}
diff --git a/src/core/taint.h b/src/core/taint.h
new file mode 100644
index 0000000..2e514e3
--- /dev/null
+++ b/src/core/taint.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+char* taint_string(void);
diff --git a/src/core/target.c b/src/core/target.c
index 8f2a331..15866e9 100644
--- a/src/core/target.c
+++ b/src/core/target.c
@@ -11,12 +11,13 @@
 #include "unit.h"
 
 static const UnitActiveState state_translation_table[_TARGET_STATE_MAX] = {
-        [TARGET_DEAD] = UNIT_INACTIVE,
-        [TARGET_ACTIVE] = UNIT_ACTIVE
+        [TARGET_DEAD]   = UNIT_INACTIVE,
+        [TARGET_ACTIVE] = UNIT_ACTIVE,
 };
 
 static void target_set_state(Target *t, TargetState state) {
         TargetState old_state;
+
         assert(t);
 
         if (t->state != state)
@@ -26,10 +27,8 @@ static void target_set_state(Target *t, TargetState state) {
         t->state = state;
 
         if (state != old_state)
-                log_debug("%s changed %s -> %s",
-                          UNIT(t)->id,
-                          target_state_to_string(old_state),
-                          target_state_to_string(state));
+                log_unit_debug(UNIT(t), "Changed %s -> %s",
+                               target_state_to_string(old_state), target_state_to_string(state));
 
         unit_notify(UNIT(t), state_translation_table[old_state], state_translation_table[state], /* reload_success = */ true);
 }
@@ -56,8 +55,8 @@ static int target_add_default_dependencies(Target *t) {
         if (n_others < 0)
                 return n_others;
 
-        for (int i = 0; i < n_others; i++) {
-                r = unit_add_default_target_dependency(others[i], UNIT(t));
+        FOREACH_ARRAY(i, others, n_others) {
+                r = unit_add_default_target_dependency(*i, UNIT(t));
                 if (r < 0)
                         return r;
         }
@@ -70,11 +69,9 @@ static int target_add_default_dependencies(Target *t) {
 }
 
 static int target_load(Unit *u) {
-        Target *t = TARGET(u);
+        Target *t = ASSERT_PTR(TARGET(u));
         int r;
 
-        assert(t);
-
         r = unit_load_fragment_and_dropin(u, true);
         if (r < 0)
                 return r;
@@ -87,9 +84,8 @@ static int target_load(Unit *u) {
 }
 
 static int target_coldplug(Unit *u) {
-        Target *t = TARGET(u);
+        Target *t = ASSERT_PTR(TARGET(u));
 
-        assert(t);
         assert(t->state == TARGET_DEAD);
 
         if (t->deserialized_state != t->state)
@@ -99,10 +95,10 @@ static int target_coldplug(Unit *u) {
 }
 
 static void target_dump(Unit *u, FILE *f, const char *prefix) {
-        Target *t = TARGET(u);
+        Target *t = ASSERT_PTR(TARGET(u));
 
-        assert(t);
         assert(f);
+        assert(prefix);
 
         fprintf(f,
                 "%sTarget State: %s\n",
@@ -110,10 +106,9 @@ static void target_dump(Unit *u, FILE *f, const char *prefix) {
 }
 
 static int target_start(Unit *u) {
-        Target *t = TARGET(u);
+        Target *t = ASSERT_PTR(TARGET(u));
         int r;
 
-        assert(t);
         assert(t->state == TARGET_DEAD);
 
         r = unit_acquire_invocation_id(u);
@@ -125,9 +120,8 @@ static int target_start(Unit *u) {
 }
 
 static int target_stop(Unit *u) {
-        Target *t = TARGET(u);
+        Target *t = ASSERT_PTR(TARGET(u));
 
-        assert(t);
         assert(t->state == TARGET_ACTIVE);
 
         target_set_state(t, TARGET_DEAD);
@@ -135,21 +129,18 @@ static int target_stop(Unit *u) {
 }
 
 static int target_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Target *s = TARGET(u);
+        Target *t = ASSERT_PTR(TARGET(u));
 
-        assert(s);
         assert(f);
         assert(fds);
 
-        (void) serialize_item(f, "state", target_state_to_string(s->state));
+        (void) serialize_item(f, "state", target_state_to_string(t->state));
         return 0;
 }
 
 static int target_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Target *s = TARGET(u);
+        Target *t = ASSERT_PTR(TARGET(u));
 
-        assert(s);
-        assert(u);
         assert(key);
         assert(value);
         assert(fds);
@@ -159,26 +150,26 @@ static int target_deserialize_item(Unit *u, const char *key, const char *value,
 
                 state = target_state_from_string(value);
                 if (state < 0)
-                        log_debug("Failed to parse state value %s", value);
+                        log_unit_debug(u, "Failed to parse state: %s", value);
                 else
-                        s->deserialized_state = state;
+                        t->deserialized_state = state;
 
         } else
-                log_debug("Unknown serialization key '%s'", key);
+                log_unit_debug(u, "Unknown serialization key: %s", key);
 
         return 0;
 }
 
 static UnitActiveState target_active_state(Unit *u) {
-        assert(u);
+        Target *t = ASSERT_PTR(TARGET(u));
 
-        return state_translation_table[TARGET(u)->state];
+        return state_translation_table[t->state];
 }
 
 static const char *target_sub_state_to_string(Unit *u) {
-        assert(u);
+        Target *t = ASSERT_PTR(TARGET(u));
 
-        return target_state_to_string(TARGET(u)->state);
+        return target_state_to_string(t->state);
 }
 
 const UnitVTable target_vtable = {
@@ -213,4 +204,6 @@ const UnitVTable target_vtable = {
                         [JOB_DONE]       = "Stopped target %s.",
                 },
         },
+
+        .notify_supervisor = true,
 };
diff --git a/src/core/timer.c b/src/core/timer.c
index 3c41a25..d7ce473 100644
--- a/src/core/timer.c
+++ b/src/core/timer.c
@@ -25,19 +25,18 @@
 #include "virt.h"
 
 static const UnitActiveState state_translation_table[_TIMER_STATE_MAX] = {
-        [TIMER_DEAD] = UNIT_INACTIVE,
+        [TIMER_DEAD]    = UNIT_INACTIVE,
         [TIMER_WAITING] = UNIT_ACTIVE,
         [TIMER_RUNNING] = UNIT_ACTIVE,
         [TIMER_ELAPSED] = UNIT_ACTIVE,
-        [TIMER_FAILED] = UNIT_FAILED
+        [TIMER_FAILED]  = UNIT_FAILED,
 };
 
 static int timer_dispatch(sd_event_source *s, uint64_t usec, void *userdata);
 
 static void timer_init(Unit *u) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         t->next_elapse_monotonic_or_boottime = USEC_INFINITY;
@@ -58,9 +57,7 @@ void timer_free_values(Timer *t) {
 }
 
 static void timer_done(Unit *u) {
-        Timer *t = TIMER(u);
-
-        assert(t);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
         timer_free_values(t);
 
@@ -141,7 +138,7 @@ static int timer_setup_persistent(Timer *t) {
 
         if (MANAGER_IS_SYSTEM(UNIT(t)->manager)) {
 
-                r = unit_require_mounts_for(UNIT(t), "/var/lib/systemd/timers", UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(UNIT(t), "/var/lib/systemd/timers", UNIT_DEPENDENCY_FILE, UNIT_MOUNT_REQUIRES);
                 if (r < 0)
                         return r;
 
@@ -192,19 +189,18 @@ static uint64_t timer_get_fixed_delay_hash(Timer *t) {
         }
 
         siphash24_init(&state, hash_key);
-        siphash24_compress(&machine_id, sizeof(sd_id128_t), &state);
+        siphash24_compress_typesafe(machine_id, &state);
         siphash24_compress_boolean(MANAGER_IS_SYSTEM(UNIT(t)->manager), &state);
-        siphash24_compress(&uid, sizeof(uid_t), &state);
+        siphash24_compress_typesafe(uid, &state);
         siphash24_compress_string(UNIT(t)->id, &state);
 
         return siphash24_finalize(&state);
 }
 
 static int timer_load(Unit *u) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
         int r;
 
-        assert(u);
         assert(u->load_state == UNIT_STUB);
 
         r = unit_load_fragment_and_dropin(u, true);
@@ -231,9 +227,12 @@ static int timer_load(Unit *u) {
 }
 
 static void timer_dump(Unit *u, FILE *f, const char *prefix) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
         Unit *trigger;
 
+        assert(f);
+        assert(prefix);
+
         trigger = UNIT_TRIGGER(u);
 
         fprintf(f,
@@ -279,6 +278,7 @@ static void timer_dump(Unit *u, FILE *f, const char *prefix) {
 
 static void timer_set_state(Timer *t, TimerState state) {
         TimerState old_state;
+
         assert(t);
 
         if (t->state != state)
@@ -303,9 +303,8 @@ static void timer_set_state(Timer *t, TimerState state) {
 static void timer_enter_waiting(Timer *t, bool time_change);
 
 static int timer_coldplug(Unit *u) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        assert(t);
         assert(t->state == TIMER_DEAD);
 
         if (t->deserialized_state == t->state)
@@ -634,10 +633,9 @@ fail:
 }
 
 static int timer_start(Unit *u) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
         int r;
 
-        assert(t);
         assert(IN_SET(t->state, TIMER_DEAD, TIMER_FAILED));
 
         r = unit_test_trigger_loaded(u);
@@ -682,9 +680,8 @@ static int timer_start(Unit *u) {
 }
 
 static int timer_stop(Unit *u) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        assert(t);
         assert(IN_SET(t->state, TIMER_WAITING, TIMER_RUNNING, TIMER_ELAPSED));
 
         timer_enter_dead(t, TIMER_SUCCESS);
@@ -692,9 +689,8 @@ static int timer_stop(Unit *u) {
 }
 
 static int timer_serialize(Unit *u, FILE *f, FDSet *fds) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        assert(u);
         assert(f);
         assert(fds);
 
@@ -711,9 +707,8 @@ static int timer_serialize(Unit *u, FILE *f, FDSet *fds) {
 }
 
 static int timer_deserialize_item(Unit *u, const char *key, const char *value, FDSet *fds) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        assert(u);
         assert(key);
         assert(value);
         assert(fds);
@@ -747,21 +742,19 @@ static int timer_deserialize_item(Unit *u, const char *key, const char *value, F
 }
 
 static UnitActiveState timer_active_state(Unit *u) {
-        assert(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        return state_translation_table[TIMER(u)->state];
+        return state_translation_table[t->state];
 }
 
 static const char *timer_sub_state_to_string(Unit *u) {
-        assert(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        return timer_state_to_string(TIMER(u)->state);
+        return timer_state_to_string(t->state);
 }
 
 static int timer_dispatch(sd_event_source *s, uint64_t usec, void *userdata) {
-        Timer *t = TIMER(userdata);
-
-        assert(t);
+        Timer *t = ASSERT_PTR(TIMER(userdata));
 
         if (t->state != TIMER_WAITING)
                 return 0;
@@ -772,9 +765,8 @@ static int timer_dispatch(sd_event_source *s, uint64_t usec, void *userdata) {
 }
 
 static void timer_trigger_notify(Unit *u, Unit *other) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        assert(u);
         assert(other);
 
         /* Filter out invocations with bogus state */
@@ -812,9 +804,7 @@ static void timer_trigger_notify(Unit *u, Unit *other) {
 }
 
 static void timer_reset_failed(Unit *u) {
-        Timer *t = TIMER(u);
-
-        assert(t);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
         if (t->state == TIMER_FAILED)
                 timer_set_state(t, TIMER_DEAD);
@@ -823,11 +813,9 @@ static void timer_reset_failed(Unit *u) {
 }
 
 static void timer_time_change(Unit *u) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
         usec_t ts;
 
-        assert(u);
-
         if (t->state != TIMER_WAITING)
                 return;
 
@@ -849,9 +837,7 @@ static void timer_time_change(Unit *u) {
 }
 
 static void timer_timezone_change(Unit *u) {
-        Timer *t = TIMER(u);
-
-        assert(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
         if (t->state != TIMER_WAITING)
                 return;
@@ -866,10 +852,9 @@ static void timer_timezone_change(Unit *u) {
 }
 
 static int timer_clean(Unit *u, ExecCleanMask mask) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
         int r;
 
-        assert(t);
         assert(mask != 0);
 
         if (t->state != TIMER_DEAD)
@@ -892,9 +877,8 @@ static int timer_clean(Unit *u, ExecCleanMask mask) {
 }
 
 static int timer_can_clean(Unit *u, ExecCleanMask *ret) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
 
-        assert(t);
         assert(ret);
 
         *ret = t->persistent ? EXEC_CLEAN_STATE : 0;
@@ -902,11 +886,9 @@ static int timer_can_clean(Unit *u, ExecCleanMask *ret) {
 }
 
 static int timer_can_start(Unit *u) {
-        Timer *t = TIMER(u);
+        Timer *t = ASSERT_PTR(TIMER(u));
         int r;
 
-        assert(t);
-
         r = unit_test_start_limit(u);
         if (r < 0) {
                 timer_enter_dead(t, TIMER_FAILURE_START_LIMIT_HIT);
@@ -917,9 +899,8 @@ static int timer_can_start(Unit *u) {
 }
 
 static void activation_details_timer_serialize(ActivationDetails *details, FILE *f) {
-        ActivationDetailsTimer *t = ACTIVATION_DETAILS_TIMER(details);
+        ActivationDetailsTimer *t = ASSERT_PTR(ACTIVATION_DETAILS_TIMER(details));
 
-        assert(details);
         assert(f);
         assert(t);
 
@@ -950,10 +931,9 @@ static int activation_details_timer_deserialize(const char *key, const char *val
 }
 
 static int activation_details_timer_append_env(ActivationDetails *details, char ***strv) {
-        ActivationDetailsTimer *t = ACTIVATION_DETAILS_TIMER(details);
+        ActivationDetailsTimer *t = ASSERT_PTR(ACTIVATION_DETAILS_TIMER(details));
         int r;
 
-        assert(details);
         assert(strv);
         assert(t);
 
@@ -972,10 +952,9 @@ static int activation_details_timer_append_env(ActivationDetails *details, char
 }
 
 static int activation_details_timer_append_pair(ActivationDetails *details, char ***strv) {
-        ActivationDetailsTimer *t = ACTIVATION_DETAILS_TIMER(details);
+        ActivationDetailsTimer *t = ASSERT_PTR(ACTIVATION_DETAILS_TIMER(details));
         int r;
 
-        assert(details);
         assert(strv);
         assert(t);
 
@@ -1014,7 +993,7 @@ static const char* const timer_base_table[_TIMER_BASE_MAX] = {
         [TIMER_STARTUP]       = "OnStartupSec",
         [TIMER_UNIT_ACTIVE]   = "OnUnitActiveSec",
         [TIMER_UNIT_INACTIVE] = "OnUnitInactiveSec",
-        [TIMER_CALENDAR]      = "OnCalendar"
+        [TIMER_CALENDAR]      = "OnCalendar",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(timer_base, TimerBase);
diff --git a/src/core/transaction.c b/src/core/transaction.c
index a81c40f..ab6e699 100644
--- a/src/core/transaction.c
+++ b/src/core/transaction.c
@@ -446,10 +446,10 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi
          * the graph over 'before' edges in the actual job execution order. We traverse over both unit
          * ordering dependencies and we test with job_compare() whether it is the 'before' edge in the job
          * execution ordering. */
-        for (size_t d = 0; d < ELEMENTSOF(directions); d++) {
+        FOREACH_ELEMENT(d, directions) {
                 Unit *u;
 
-                UNIT_FOREACH_DEPENDENCY(u, j->unit, directions[d]) {
+                UNIT_FOREACH_DEPENDENCY(u, j->unit, *d) {
                         Job *o;
 
                         /* Is there a job for this unit? */
@@ -463,7 +463,7 @@ static int transaction_verify_order_one(Transaction *tr, Job *j, Job *from, unsi
                         }
 
                         /* Cut traversing if the job j is not really *before* o. */
-                        if (job_compare(j, o, directions[d]) >= 0)
+                        if (job_compare(j, o, *d) >= 0)
                                 continue;
 
                         r = transaction_verify_order_one(tr, o, j, generation, e);
@@ -964,7 +964,7 @@ int transaction_add_job_and_dependencies(
 
         if (type != JOB_STOP) {
                 r = bus_unit_validate_load_state(unit, e);
-                /* The time-based cache allows to start new units without daemon-reload, but if they are
+                /* The time-based cache allows new units to be started without daemon-reload, but if they are
                  * already referenced (because of dependencies or ordering) then we have to force a load of
                  * the fragment. As an optimization, check first if anything in the usual paths was modified
                  * since the last time the cache was loaded. Also check if the last time an attempt to load
diff --git a/src/core/unit-printf.c b/src/core/unit-printf.c
index 9f95984..f25e2e3 100644
--- a/src/core/unit-printf.c
+++ b/src/core/unit-printf.c
@@ -4,6 +4,7 @@
 #include "cgroup-util.h"
 #include "format-util.h"
 #include "macro.h"
+#include "sd-path.h"
 #include "specifier.h"
 #include "string-util.h"
 #include "strv.h"
@@ -86,68 +87,46 @@ static void bad_specifier(const Unit *u, char specifier) {
 
 static int specifier_cgroup(char specifier, const void *data, const char *root, const void *userdata, char **ret) {
         const Unit *u = ASSERT_PTR(userdata);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
 
         bad_specifier(u, specifier);
 
-        if (u->cgroup_path) {
-                char *n;
-
-                n = strdup(u->cgroup_path);
-                if (!n)
-                        return -ENOMEM;
-
-                *ret = n;
-                return 0;
-        }
+        if (crt && crt->cgroup_path)
+                return strdup_to(ret, crt->cgroup_path);
 
         return unit_default_cgroup_path(u, ret);
 }
 
 static int specifier_cgroup_root(char specifier, const void *data, const char *root, const void *userdata, char **ret) {
         const Unit *u = ASSERT_PTR(userdata);
-        char *n;
 
         bad_specifier(u, specifier);
 
-        n = strdup(u->manager->cgroup_root);
-        if (!n)
-                return -ENOMEM;
-
-        *ret = n;
-        return 0;
+        return strdup_to(ret, u->manager->cgroup_root);
 }
 
 static int specifier_cgroup_slice(char specifier, const void *data, const char *root, const void *userdata, char **ret) {
         const Unit *u = ASSERT_PTR(userdata), *slice;
-        char *n;
 
         bad_specifier(u, specifier);
 
         slice = UNIT_GET_SLICE(u);
         if (slice) {
-                if (slice->cgroup_path)
-                        n = strdup(slice->cgroup_path);
-                else
-                        return unit_default_cgroup_path(slice, ret);
-        } else
-                n = strdup(u->manager->cgroup_root);
-        if (!n)
-                return -ENOMEM;
+                CGroupRuntime *crt = unit_get_cgroup_runtime(slice);
 
-        *ret = n;
-        return 0;
+                if (crt && crt->cgroup_path)
+                        return strdup_to(ret, crt->cgroup_path);
+
+                return unit_default_cgroup_path(slice, ret);
+        }
+
+        return strdup_to(ret, u->manager->cgroup_root);
 }
 
 static int specifier_special_directory(char specifier, const void *data, const char *root, const void *userdata, char **ret) {
         const Unit *u = ASSERT_PTR(userdata);
-        char *n;
-
-        n = strdup(u->manager->prefix[PTR_TO_UINT(data)]);
-        if (!n)
-                return -ENOMEM;
 
-        *ret = n;
-        return 0;
+        return strdup_to(ret, u->manager->prefix[PTR_TO_UINT(data)]);
 }
 
 static int specifier_credentials_dir(char specifier, const void *data, const char *root, const void *userdata, char **ret) {
@@ -164,6 +143,14 @@ static int specifier_credentials_dir(char specifier, const void *data, const cha
         return 0;
 }
 
+static int specifier_shared_data_dir(char specifier, const void *data, const char *root, const void *userdata, char **ret) {
+        const Unit *u = ASSERT_PTR(userdata);
+
+        assert(ret);
+
+        return sd_path_lookup(MANAGER_IS_SYSTEM(u->manager) ? SD_PATH_SYSTEM_SHARED : SD_PATH_USER_SHARED, NULL, ret);
+}
+
 int unit_name_printf(const Unit *u, const char* format, char **ret) {
         /*
          * This will use the passed string as format string and replace the following specifiers (which should all be
@@ -208,6 +195,7 @@ int unit_full_printf_full(const Unit *u, const char *format, size_t max_length,
          *
          * %C: the cache directory root (e.g. /var/cache or $XDG_CACHE_HOME)
          * %d: the credentials directory ($CREDENTIALS_DIRECTORY)
+         * %D: the shared data root (e.g. /usr/share or $XDG_DATA_HOME)
          * %E: the configuration directory root (e.g. /etc or $XDG_CONFIG_HOME)
          * %L: the log directory root (e.g. /var/log or $XDG_STATE_HOME/log)
          * %S: the state directory root (e.g. /var/lib or $XDG_STATE_HOME)
@@ -245,6 +233,7 @@ int unit_full_printf_full(const Unit *u, const char *format, size_t max_length,
 
                 { 'C', specifier_special_directory,        UINT_TO_PTR(EXEC_DIRECTORY_CACHE) },
                 { 'd', specifier_credentials_dir,          NULL },
+                { 'D', specifier_shared_data_dir,          NULL },
                 { 'E', specifier_special_directory,        UINT_TO_PTR(EXEC_DIRECTORY_CONFIGURATION) },
                 { 'L', specifier_special_directory,        UINT_TO_PTR(EXEC_DIRECTORY_LOGS) },
                 { 'S', specifier_special_directory,        UINT_TO_PTR(EXEC_DIRECTORY_STATE) },
diff --git a/src/core/unit-serialize.c b/src/core/unit-serialize.c
index fe4221c..175e327 100644
--- a/src/core/unit-serialize.c
+++ b/src/core/unit-serialize.c
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
+#include "bpf-restrict-ifaces.h"
 #include "bpf-socket-bind.h"
 #include "bus-util.h"
 #include "dbus.h"
@@ -7,29 +8,11 @@
 #include "fileio.h"
 #include "format-util.h"
 #include "parse-util.h"
-#include "restrict-ifaces.h"
 #include "serialize.h"
 #include "string-table.h"
 #include "unit-serialize.h"
 #include "user-util.h"
 
-static int serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) {
-        _cleanup_free_ char *s = NULL;
-        int r;
-
-        assert(f);
-        assert(key);
-
-        if (mask == 0)
-                return 0;
-
-        r = cg_mask_to_string(mask, &s);
-        if (r < 0)
-                return log_error_errno(r, "Failed to format cgroup mask: %m");
-
-        return serialize_item(f, key, s);
-}
-
 /* Make sure out values fit in the bitfield. */
 assert_cc(_UNIT_MARKER_MAX <= sizeof(((Unit){}).markers) * 8);
 
@@ -69,40 +52,6 @@ static int deserialize_markers(Unit *u, const char *value) {
         }
 }
 
-static const char* const ip_accounting_metric_field_table[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
-        [CGROUP_IP_INGRESS_BYTES]   = "ip-accounting-ingress-bytes",
-        [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
-        [CGROUP_IP_EGRESS_BYTES]    = "ip-accounting-egress-bytes",
-        [CGROUP_IP_EGRESS_PACKETS]  = "ip-accounting-egress-packets",
-};
-
-DEFINE_PRIVATE_STRING_TABLE_LOOKUP(ip_accounting_metric_field, CGroupIPAccountingMetric);
-
-static const char* const io_accounting_metric_field_base_table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
-        [CGROUP_IO_READ_BYTES]       = "io-accounting-read-bytes-base",
-        [CGROUP_IO_WRITE_BYTES]      = "io-accounting-write-bytes-base",
-        [CGROUP_IO_READ_OPERATIONS]  = "io-accounting-read-operations-base",
-        [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-base",
-};
-
-DEFINE_PRIVATE_STRING_TABLE_LOOKUP(io_accounting_metric_field_base, CGroupIOAccountingMetric);
-
-static const char* const io_accounting_metric_field_last_table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
-        [CGROUP_IO_READ_BYTES]       = "io-accounting-read-bytes-last",
-        [CGROUP_IO_WRITE_BYTES]      = "io-accounting-write-bytes-last",
-        [CGROUP_IO_READ_OPERATIONS]  = "io-accounting-read-operations-last",
-        [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-last",
-};
-
-DEFINE_PRIVATE_STRING_TABLE_LOOKUP(io_accounting_metric_field_last, CGroupIOAccountingMetric);
-
-static const char* const memory_accounting_metric_field_last_table[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1] = {
-        [CGROUP_MEMORY_PEAK]      = "memory-accounting-peak",
-        [CGROUP_MEMORY_SWAP_PEAK] = "memory-accounting-swap-peak",
-};
-
-DEFINE_PRIVATE_STRING_TABLE_LOOKUP(memory_accounting_metric_field_last, CGroupMemoryAccountingMetric);
-
 int unit_serialize_state(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
         int r;
 
@@ -158,48 +107,7 @@ int unit_serialize_state(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
         (void) serialize_bool(f, "exported-log-rate-limit-interval", u->exported_log_ratelimit_interval);
         (void) serialize_bool(f, "exported-log-rate-limit-burst", u->exported_log_ratelimit_burst);
 
-        (void) serialize_item_format(f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
-        if (u->cpu_usage_last != NSEC_INFINITY)
-                (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
-
-        if (u->managed_oom_kill_last > 0)
-                (void) serialize_item_format(f, "managed-oom-kill-last", "%" PRIu64, u->managed_oom_kill_last);
-
-        if (u->oom_kill_last > 0)
-                (void) serialize_item_format(f, "oom-kill-last", "%" PRIu64, u->oom_kill_last);
-
-        for (CGroupIOAccountingMetric im = 0; im < _CGROUP_IO_ACCOUNTING_METRIC_MAX; im++) {
-                (void) serialize_item_format(f, io_accounting_metric_field_base_to_string(im), "%" PRIu64, u->io_accounting_base[im]);
-
-                if (u->io_accounting_last[im] != UINT64_MAX)
-                        (void) serialize_item_format(f, io_accounting_metric_field_last_to_string(im), "%" PRIu64, u->io_accounting_last[im]);
-        }
-
-        for (CGroupMemoryAccountingMetric metric = 0; metric <= _CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST; metric++) {
-                uint64_t v;
-
-                r = unit_get_memory_accounting(u, metric, &v);
-                if (r >= 0)
-                        (void) serialize_item_format(f, memory_accounting_metric_field_last_to_string(metric), "%" PRIu64, v);
-        }
-
-        if (u->cgroup_path)
-                (void) serialize_item(f, "cgroup", u->cgroup_path);
-
-        (void) serialize_bool(f, "cgroup-realized", u->cgroup_realized);
-        (void) serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
-        (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
-        (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
-
-        (void) bpf_serialize_socket_bind(u, f, fds);
-
-        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-ingress-installed", u->ip_bpf_ingress_installed);
-        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-egress-installed", u->ip_bpf_egress_installed);
-        (void) bpf_program_serialize_attachment(f, fds, "bpf-device-control-installed", u->bpf_device_control_installed);
-        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-ingress-installed", u->ip_bpf_custom_ingress_installed);
-        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-egress-installed", u->ip_bpf_custom_egress_installed);
-
-        (void) serialize_restrict_network_interfaces(u, f, fds);
+        (void) cgroup_runtime_serialize(u, f, fds);
 
         if (uid_is_valid(u->ref_uid))
                 (void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid);
@@ -214,14 +122,6 @@ int unit_serialize_state(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
 
         bus_track_serialize(u->bus_track, f, "ref");
 
-        for (CGroupIPAccountingMetric m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
-                uint64_t v;
-
-                r = unit_get_ip_accounting(u, m, &v);
-                if (r >= 0)
-                        (void) serialize_item_format(f, ip_accounting_metric_field_to_string(m), "%" PRIu64, v);
-        }
-
         if (!switching_root) {
                 if (u->job) {
                         fputs("job\n", f);
@@ -297,7 +197,6 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
 
         for (;;) {
                 _cleanup_free_ char *l  = NULL;
-                ssize_t m;
                 size_t k;
                 char *v;
 
@@ -380,76 +279,7 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
                 else if (MATCH_DESERIALIZE("exported-log-rate-limit-burst", l, v, parse_boolean, u->exported_log_ratelimit_burst))
                         continue;
 
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cpu-usage-base", l, v, safe_atou64, u->cpu_usage_base) ||
-                         MATCH_DESERIALIZE_IMMEDIATE("cpuacct-usage-base", l, v, safe_atou64, u->cpu_usage_base))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cpu-usage-last", l, v, safe_atou64, u->cpu_usage_last))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("managed-oom-kill-last", l, v, safe_atou64, u->managed_oom_kill_last))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("oom-kill-last", l, v, safe_atou64, u->oom_kill_last))
-                        continue;
-
-                else if (streq(l, "cgroup")) {
-                        r = unit_set_cgroup_path(u, v);
-                        if (r < 0)
-                                log_unit_debug_errno(u, r, "Failed to set cgroup path %s, ignoring: %m", v);
-
-                        (void) unit_watch_cgroup(u);
-                        (void) unit_watch_cgroup_memory(u);
-
-                        continue;
-
-                } else if (MATCH_DESERIALIZE("cgroup-realized", l, v, parse_boolean, u->cgroup_realized))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cgroup-realized-mask", l, v, cg_mask_from_string, u->cgroup_realized_mask))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cgroup-enabled-mask", l, v, cg_mask_from_string, u->cgroup_enabled_mask))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cgroup-invalidated-mask", l, v, cg_mask_from_string, u->cgroup_invalidated_mask))
-                        continue;
-
-                else if (STR_IN_SET(l, "ipv4-socket-bind-bpf-link-fd", "ipv6-socket-bind-bpf-link-fd")) {
-                        int fd;
-
-                        fd = deserialize_fd(fds, v);
-                        if (fd >= 0)
-                                (void) bpf_socket_bind_add_initial_link_fd(u, fd);
-                        continue;
-
-                } else if (streq(l, "ip-bpf-ingress-installed")) {
-                         (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_ingress_installed);
-                         continue;
-                } else if (streq(l, "ip-bpf-egress-installed")) {
-                         (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_egress_installed);
-                         continue;
-                } else if (streq(l, "bpf-device-control-installed")) {
-                         (void) bpf_program_deserialize_attachment(v, fds, &u->bpf_device_control_installed);
-                         continue;
-
-                } else if (streq(l, "ip-bpf-custom-ingress-installed")) {
-                         (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_ingress_installed);
-                         continue;
-                } else if (streq(l, "ip-bpf-custom-egress-installed")) {
-                         (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_egress_installed);
-                         continue;
-
-                } else if (streq(l, "restrict-ifaces-bpf-fd")) {
-                        int fd;
-
-                        fd = deserialize_fd(fds, v);
-                        if (fd >= 0)
-                                (void) restrict_network_interfaces_add_initial_link_fd(u, fd);
-
-                        continue;
-
-                } else if (streq(l, "ref-uid")) {
+                else if (streq(l, "ref-uid")) {
                         uid_t uid;
 
                         r = parse_uid(v, &uid);
@@ -499,55 +329,6 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
                         continue;
                 }
 
-                m = memory_accounting_metric_field_last_from_string(l);
-                if (m >= 0) {
-                        uint64_t c;
-
-                        r = safe_atou64(v, &c);
-                        if (r < 0)
-                                log_unit_debug(u, "Failed to parse memory accounting last value %s, ignoring.", v);
-                        else
-                                u->memory_accounting_last[m] = c;
-                        continue;
-                }
-
-                /* Check if this is an IP accounting metric serialization field */
-                m = ip_accounting_metric_field_from_string(l);
-                if (m >= 0) {
-                        uint64_t c;
-
-                        r = safe_atou64(v, &c);
-                        if (r < 0)
-                                log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
-                        else
-                                u->ip_accounting_extra[m] = c;
-                        continue;
-                }
-
-                m = io_accounting_metric_field_base_from_string(l);
-                if (m >= 0) {
-                        uint64_t c;
-
-                        r = safe_atou64(v, &c);
-                        if (r < 0)
-                                log_unit_debug(u, "Failed to parse IO accounting base value %s, ignoring.", v);
-                        else
-                                u->io_accounting_base[m] = c;
-                        continue;
-                }
-
-                m = io_accounting_metric_field_last_from_string(l);
-                if (m >= 0) {
-                        uint64_t c;
-
-                        r = safe_atou64(v, &c);
-                        if (r < 0)
-                                log_unit_debug(u, "Failed to parse IO accounting last value %s, ignoring.", v);
-                        else
-                                u->io_accounting_last[m] = c;
-                        continue;
-                }
-
                 r = exec_shared_runtime_deserialize_compat(u, l, v, fds);
                 if (r < 0) {
                         log_unit_warning(u, "Failed to deserialize runtime parameter '%s', ignoring.", l);
@@ -556,6 +337,13 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
                         /* Returns positive if key was handled by the call */
                         continue;
 
+                r = cgroup_runtime_deserialize_one(u, l, v, fds);
+                if (r < 0) {
+                        log_unit_warning(u, "Failed to deserialize cgroup runtime parameter '%s, ignoring.", l);
+                        continue;
+                } else if (r > 0)
+                        continue; /* was handled */
+
                 if (UNIT_VTABLE(u)->deserialize_item) {
                         r = UNIT_VTABLE(u)->deserialize_item(u, l, v, fds);
                         if (r < 0)
@@ -574,7 +362,9 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
         /* Let's make sure that everything that is deserialized also gets any potential new cgroup settings
          * applied after we are done. For that we invalidate anything already realized, so that we can
          * realize it again. */
-        if (u->cgroup_realized) {
+        CGroupRuntime *crt;
+        crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->cgroup_realized) {
                 unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
                 unit_invalidate_cgroup_bpf(u);
         }
@@ -661,8 +451,8 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         prefix2 = strjoina(prefix, "\t");
 
         fprintf(f,
-                "%s-> Unit %s:\n",
-                prefix, u->id);
+                "%s%s Unit %s:\n",
+                prefix, special_glyph(SPECIAL_GLYPH_ARROW_RIGHT), u->id);
 
         SET_FOREACH(t, u->aliases)
                 fprintf(f, "%s\tAlias: %s\n", prefix, t);
@@ -707,23 +497,25 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         }
 
         if (UNIT_HAS_CGROUP_CONTEXT(u)) {
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+
                 fprintf(f,
                         "%s\tSlice: %s\n"
                         "%s\tCGroup: %s\n"
                         "%s\tCGroup realized: %s\n",
                         prefix, strna(unit_slice_name(u)),
-                        prefix, strna(u->cgroup_path),
-                        prefix, yes_no(u->cgroup_realized));
+                        prefix, strna(crt ? crt->cgroup_path : NULL),
+                        prefix, yes_no(crt ? crt->cgroup_realized : false));
 
-                if (u->cgroup_realized_mask != 0) {
+                if (crt && crt->cgroup_realized_mask != 0) {
                         _cleanup_free_ char *s = NULL;
-                        (void) cg_mask_to_string(u->cgroup_realized_mask, &s);
+                        (void) cg_mask_to_string(crt->cgroup_realized_mask, &s);
                         fprintf(f, "%s\tCGroup realized mask: %s\n", prefix, strnull(s));
                 }
 
-                if (u->cgroup_enabled_mask != 0) {
+                if (crt && crt->cgroup_enabled_mask != 0) {
                         _cleanup_free_ char *s = NULL;
-                        (void) cg_mask_to_string(u->cgroup_enabled_mask, &s);
+                        (void) cg_mask_to_string(crt->cgroup_enabled_mask, &s);
                         fprintf(f, "%s\tCGroup enabled mask: %s\n", prefix, strnull(s));
                 }
 
@@ -831,21 +623,26 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
                 }
         }
 
-        if (!hashmap_isempty(u->requires_mounts_for)) {
-                UnitDependencyInfo di;
-                const char *path;
+        for (UnitMountDependencyType type = 0; type < _UNIT_MOUNT_DEPENDENCY_TYPE_MAX; type++)
+                if (!hashmap_isempty(u->mounts_for[type])) {
+                        UnitDependencyInfo di;
+                        const char *path;
 
-                HASHMAP_FOREACH_KEY(di.data, path, u->requires_mounts_for) {
-                        bool space = false;
+                        HASHMAP_FOREACH_KEY(di.data, path, u->mounts_for[type]) {
+                                bool space = false;
 
-                        fprintf(f, "%s\tRequiresMountsFor: %s (", prefix, path);
+                                fprintf(f,
+                                        "%s\t%s: %s (",
+                                        prefix,
+                                        unit_mount_dependency_type_to_string(type),
+                                        path);
 
-                        print_unit_dependency_mask(f, "origin", di.origin_mask, &space);
-                        print_unit_dependency_mask(f, "destination", di.destination_mask, &space);
+                                print_unit_dependency_mask(f, "origin", di.origin_mask, &space);
+                                print_unit_dependency_mask(f, "destination", di.destination_mask, &space);
 
-                        fputs(")\n", f);
+                                fputs(")\n", f);
+                        }
                 }
-        }
 
         if (u->load_state == UNIT_LOADED) {
 
diff --git a/src/core/unit.c b/src/core/unit.c
index 2fc9f5a..2d40618 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -67,27 +67,29 @@
 #endif
 
 /* Thresholds for logging at INFO level about resource consumption */
-#define MENTIONWORTHY_CPU_NSEC (1 * NSEC_PER_SEC)
-#define MENTIONWORTHY_IO_BYTES (1024 * 1024ULL)
-#define MENTIONWORTHY_IP_BYTES (0ULL)
+#define MENTIONWORTHY_CPU_NSEC     (1 * NSEC_PER_SEC)
+#define MENTIONWORTHY_MEMORY_BYTES (64 * U64_MB)
+#define MENTIONWORTHY_IO_BYTES     (1 * U64_MB)
+#define MENTIONWORTHY_IP_BYTES     UINT64_C(0)
 
-/* Thresholds for logging at INFO level about resource consumption */
-#define NOTICEWORTHY_CPU_NSEC (10*60 * NSEC_PER_SEC) /* 10 minutes */
-#define NOTICEWORTHY_IO_BYTES (10 * 1024 * 1024ULL)  /* 10 MB */
-#define NOTICEWORTHY_IP_BYTES (128 * 1024 * 1024ULL) /* 128 MB */
+/* Thresholds for logging at NOTICE level about resource consumption */
+#define NOTICEWORTHY_CPU_NSEC     (10 * NSEC_PER_MINUTE)
+#define NOTICEWORTHY_MEMORY_BYTES (512 * U64_MB)
+#define NOTICEWORTHY_IO_BYTES     (10 * U64_MB)
+#define NOTICEWORTHY_IP_BYTES     (128 * U64_MB)
 
 const UnitVTable * const unit_vtable[_UNIT_TYPE_MAX] = {
-        [UNIT_SERVICE] = &service_vtable,
-        [UNIT_SOCKET] = &socket_vtable,
-        [UNIT_TARGET] = &target_vtable,
-        [UNIT_DEVICE] = &device_vtable,
-        [UNIT_MOUNT] = &mount_vtable,
+        [UNIT_SERVICE]   = &service_vtable,
+        [UNIT_SOCKET]    = &socket_vtable,
+        [UNIT_TARGET]    = &target_vtable,
+        [UNIT_DEVICE]    = &device_vtable,
+        [UNIT_MOUNT]     = &mount_vtable,
         [UNIT_AUTOMOUNT] = &automount_vtable,
-        [UNIT_SWAP] = &swap_vtable,
-        [UNIT_TIMER] = &timer_vtable,
-        [UNIT_PATH] = &path_vtable,
-        [UNIT_SLICE] = &slice_vtable,
-        [UNIT_SCOPE] = &scope_vtable,
+        [UNIT_SWAP]      = &swap_vtable,
+        [UNIT_TIMER]     = &timer_vtable,
+        [UNIT_PATH]      = &path_vtable,
+        [UNIT_SLICE]     = &slice_vtable,
+        [UNIT_SCOPE]     = &scope_vtable,
 };
 
 Unit* unit_new(Manager *m, size_t size) {
@@ -107,29 +109,13 @@ Unit* unit_new(Manager *m, size_t size) {
         u->unit_file_preset = -1;
         u->on_failure_job_mode = JOB_REPLACE;
         u->on_success_job_mode = JOB_FAIL;
-        u->cgroup_control_inotify_wd = -1;
-        u->cgroup_memory_inotify_wd = -1;
         u->job_timeout = USEC_INFINITY;
         u->job_running_timeout = USEC_INFINITY;
         u->ref_uid = UID_INVALID;
         u->ref_gid = GID_INVALID;
-        u->cpu_usage_last = NSEC_INFINITY;
-
-        unit_reset_memory_accounting_last(u);
 
-        unit_reset_io_accounting_last(u);
-
-        u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
         u->failure_action_exit_status = u->success_action_exit_status = -1;
 
-        u->ip_accounting_ingress_map_fd = -EBADF;
-        u->ip_accounting_egress_map_fd = -EBADF;
-
-        u->ipv4_allow_map_fd = -EBADF;
-        u->ipv6_allow_map_fd = -EBADF;
-        u->ipv4_deny_map_fd = -EBADF;
-        u->ipv6_deny_map_fd = -EBADF;
-
         u->last_section_private = -1;
 
         u->start_ratelimit = (const RateLimit) {
@@ -137,7 +123,13 @@ Unit* unit_new(Manager *m, size_t size) {
                 m->defaults.start_limit_burst,
         };
 
-        u->auto_start_stop_ratelimit = (const RateLimit) { .interval = 10 * USEC_PER_SEC, .burst = 16 };
+        u->auto_start_stop_ratelimit = (const RateLimit) {
+                .interval = 10 * USEC_PER_SEC,
+                .burst = 16
+        };
+
+        unit_reset_memory_accounting_last(u);
+        unit_reset_io_accounting_last(u);
 
         return u;
 }
@@ -251,12 +243,12 @@ int unit_add_name(Unit *u, const char *text) {
         if (unit_name_is_valid(text, UNIT_NAME_TEMPLATE)) {
                 if (!u->instance)
                         return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
-                                                    "instance is not set when adding name '%s': %m", text);
+                                                    "Instance is not set when adding name '%s'.", text);
 
                 r = unit_name_replace_instance(text, u->instance, &name);
                 if (r < 0)
                         return log_unit_debug_errno(u, r,
-                                                    "failed to build instance name from '%s': %m", text);
+                                                    "Failed to build instance name from '%s': %m", text);
         } else {
                 name = strdup(text);
                 if (!name)
@@ -268,47 +260,47 @@ int unit_add_name(Unit *u, const char *text) {
 
         if (hashmap_contains(u->manager->units, name))
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EEXIST),
-                                            "unit already exist when adding name '%s': %m", name);
+                                            "Unit already exist when adding name '%s'.", name);
 
         if (!unit_name_is_valid(name, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
-                                            "name '%s' is invalid: %m", name);
+                                            "Name '%s' is invalid.", name);
 
         t = unit_name_to_type(name);
         if (t < 0)
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
-                                            "failed to derive unit type from name '%s': %m", name);
+                                            "failed to derive unit type from name '%s'.", name);
 
         if (u->type != _UNIT_TYPE_INVALID && t != u->type)
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
-                                            "unit type is illegal: u->type(%d) and t(%d) for name '%s': %m",
+                                            "Unit type is illegal: u->type(%d) and t(%d) for name '%s'.",
                                             u->type, t, name);
 
         r = unit_name_to_instance(name, &instance);
         if (r < 0)
-                return log_unit_debug_errno(u, r, "failed to extract instance from name '%s': %m", name);
+                return log_unit_debug_errno(u, r, "Failed to extract instance from name '%s': %m", name);
 
         if (instance && !unit_type_may_template(t))
-                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL), "templates are not allowed for name '%s': %m", name);
+                return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL), "Templates are not allowed for name '%s'.", name);
 
         /* Ensure that this unit either has no instance, or that the instance matches. */
         if (u->type != _UNIT_TYPE_INVALID && !streq_ptr(u->instance, instance))
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EINVAL),
-                                            "cannot add name %s, the instances don't match (\"%s\" != \"%s\").",
+                                            "Cannot add name %s, the instances don't match (\"%s\" != \"%s\").",
                                             name, instance, u->instance);
 
         if (u->id && !unit_type_may_alias(t))
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EEXIST),
-                                            "cannot add name %s, aliases are not allowed for %s units.",
+                                            "Cannot add name %s, aliases are not allowed for %s units.",
                                             name, unit_type_to_string(t));
 
         if (hashmap_size(u->manager->units) >= MANAGER_MAX_NAMES)
-                return log_unit_warning_errno(u, SYNTHETIC_ERRNO(E2BIG), "cannot add name, manager has too many units: %m");
+                return log_unit_warning_errno(u, SYNTHETIC_ERRNO(E2BIG), "Cannot add name, manager has too many units.");
 
         /* Add name to the global hashmap first, because that's easier to undo */
         r = hashmap_put(u->manager->units, name, u);
         if (r < 0)
-                return log_unit_debug_errno(u, r, "add unit to hashmap failed for name '%s': %m", text);
+                return log_unit_debug_errno(u, r, "Add unit to hashmap failed for name '%s': %m", text);
 
         if (u->id) {
                 r = unit_add_alias(u, name); /* unit_add_alias() takes ownership of the name on success */
@@ -475,7 +467,7 @@ bool unit_may_gc(Unit *u) {
                 break;
 
         case COLLECT_INACTIVE_OR_FAILED:
-                if (!IN_SET(state, UNIT_INACTIVE, UNIT_FAILED))
+                if (!UNIT_IS_INACTIVE_OR_FAILED(state))
                         return false;
 
                 break;
@@ -488,16 +480,11 @@ bool unit_may_gc(Unit *u) {
         if (unit_success_failure_handler_has_jobs(u))
                 return false;
 
-        if (u->cgroup_path) {
-                /* If the unit has a cgroup, then check whether there's anything in it. If so, we should stay
-                 * around. Units with active processes should never be collected. */
-
-                r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
-                if (r < 0)
-                        log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", empty_to_root(u->cgroup_path));
-                if (r <= 0)
-                        return false;
-        }
+        /* If the unit has a cgroup, then check whether there's anything in it. If so, we should stay
+         * around. Units with active processes should never be collected. */
+        r = unit_cgroup_is_empty(u);
+        if (r <= 0 && r != -ENXIO)
+                return false; /* ENXIO means: currently not realized */
 
         if (!UNIT_VTABLE(u)->may_gc)
                 return true;
@@ -689,38 +676,39 @@ static void unit_remove_transient(Unit *u) {
         }
 }
 
-static void unit_free_requires_mounts_for(Unit *u) {
+static void unit_free_mounts_for(Unit *u) {
         assert(u);
 
-        for (;;) {
-                _cleanup_free_ char *path = NULL;
+        for (UnitMountDependencyType t = 0; t < _UNIT_MOUNT_DEPENDENCY_TYPE_MAX; ++t) {
+                for (;;) {
+                        _cleanup_free_ char *path = NULL;
+
+                        path = hashmap_steal_first_key(u->mounts_for[t]);
+                        if (!path)
+                                break;
 
-                path = hashmap_steal_first_key(u->requires_mounts_for);
-                if (!path)
-                        break;
-                else {
                         char s[strlen(path) + 1];
 
                         PATH_FOREACH_PREFIX_MORE(s, path) {
                                 char *y;
                                 Set *x;
 
-                                x = hashmap_get2(u->manager->units_requiring_mounts_for, s, (void**) &y);
+                                x = hashmap_get2(u->manager->units_needing_mounts_for[t], s, (void**) &y);
                                 if (!x)
                                         continue;
 
                                 (void) set_remove(x, u);
 
                                 if (set_isempty(x)) {
-                                        (void) hashmap_remove(u->manager->units_requiring_mounts_for, y);
+                                        assert_se(hashmap_remove(u->manager->units_needing_mounts_for[t], y));
                                         free(y);
                                         set_free(x);
                                 }
                         }
                 }
-        }
 
-        u->requires_mounts_for = hashmap_free(u->requires_mounts_for);
+                u->mounts_for[t] = hashmap_free(u->mounts_for[t]);
+        }
 }
 
 static void unit_done(Unit *u) {
@@ -769,7 +757,7 @@ Unit* unit_free(Unit *u) {
         u->deserialized_refs = strv_free(u->deserialized_refs);
         u->pending_freezer_invocation = sd_bus_message_unref(u->pending_freezer_invocation);
 
-        unit_free_requires_mounts_for(u);
+        unit_free_mounts_for(u);
 
         SET_FOREACH(t, u->aliases)
                 hashmap_remove_value(u->manager->units, t, u);
@@ -801,12 +789,6 @@ Unit* unit_free(Unit *u) {
         if (u->on_console)
                 manager_unref_console(u->manager);
 
-        fdset_free(u->initial_socket_bind_link_fds);
-#if BPF_FRAMEWORK
-        bpf_link_free(u->ipv4_socket_bind_link);
-        bpf_link_free(u->ipv6_socket_bind_link);
-#endif
-
         unit_release_cgroup(u);
 
         if (!MANAGER_IS_RELOADING(u->manager))
@@ -863,16 +845,6 @@ Unit* unit_free(Unit *u) {
 
         bpf_firewall_close(u);
 
-        hashmap_free(u->bpf_foreign_by_key);
-
-        bpf_program_free(u->bpf_device_control_installed);
-
-#if BPF_FRAMEWORK
-        bpf_link_free(u->restrict_ifaces_ingress_bpf_link);
-        bpf_link_free(u->restrict_ifaces_egress_bpf_link);
-#endif
-        fdset_free(u->initial_restric_ifaces_link_fds);
-
         condition_free_list(u->conditions);
         condition_free_list(u->asserts);
 
@@ -902,32 +874,6 @@ FreezerState unit_freezer_state(Unit *u) {
         return u->freezer_state;
 }
 
-int unit_freezer_state_kernel(Unit *u, FreezerState *ret) {
-        char *values[1] = {};
-        int r;
-
-        assert(u);
-
-        r = cg_get_keyed_attribute(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events",
-                                   STRV_MAKE("frozen"), values);
-        if (r < 0)
-                return r;
-
-        r = _FREEZER_STATE_INVALID;
-
-        if (values[0])  {
-                if (streq(values[0], "0"))
-                        r = FREEZER_RUNNING;
-                else if (streq(values[0], "1"))
-                        r = FREEZER_FROZEN;
-        }
-
-        free(values[0]);
-        *ret = r;
-
-        return 0;
-}
-
 UnitActiveState unit_active_state(Unit *u) {
         assert(u);
 
@@ -1277,20 +1223,24 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
 
         /* Unlike unit_add_dependency() or friends, this always returns 0 on success. */
 
-        if (c->working_directory && !c->working_directory_missing_ok) {
-                r = unit_require_mounts_for(u, c->working_directory, UNIT_DEPENDENCY_FILE);
+        if (c->working_directory) {
+                r = unit_add_mounts_for(
+                                u,
+                                c->working_directory,
+                                UNIT_DEPENDENCY_FILE,
+                                c->working_directory_missing_ok ? UNIT_MOUNT_WANTS : UNIT_MOUNT_REQUIRES);
                 if (r < 0)
                         return r;
         }
 
         if (c->root_directory) {
-                r = unit_require_mounts_for(u, c->root_directory, UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(u, c->root_directory, UNIT_DEPENDENCY_FILE, UNIT_MOUNT_WANTS);
                 if (r < 0)
                         return r;
         }
 
         if (c->root_image) {
-                r = unit_require_mounts_for(u, c->root_image, UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(u, c->root_image, UNIT_DEPENDENCY_FILE, UNIT_MOUNT_WANTS);
                 if (r < 0)
                         return r;
         }
@@ -1299,14 +1249,14 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
                 if (!u->manager->prefix[dt])
                         continue;
 
-                for (size_t i = 0; i < c->directories[dt].n_items; i++) {
+                FOREACH_ARRAY(i, c->directories[dt].items, c->directories[dt].n_items) {
                         _cleanup_free_ char *p = NULL;
 
-                        p = path_join(u->manager->prefix[dt], c->directories[dt].items[i].path);
+                        p = path_join(u->manager->prefix[dt], i->path);
                         if (!p)
                                 return -ENOMEM;
 
-                        r = unit_require_mounts_for(u, p, UNIT_DEPENDENCY_FILE);
+                        r = unit_add_mounts_for(u, p, UNIT_DEPENDENCY_FILE, UNIT_MOUNT_REQUIRES);
                         if (r < 0)
                                 return r;
                 }
@@ -1326,16 +1276,11 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
         }
 
         if (c->private_tmp) {
-
-                /* FIXME: for now we make a special case for /tmp and add a weak dependency on
-                 * tmp.mount so /tmp being masked is supported. However there's no reason to treat
-                 * /tmp specifically and masking other mount units should be handled more
-                 * gracefully too, see PR#16894. */
-                r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_WANTS, "tmp.mount", true, UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(u, "/tmp", UNIT_DEPENDENCY_FILE, UNIT_MOUNT_WANTS);
                 if (r < 0)
                         return r;
 
-                r = unit_require_mounts_for(u, "/var/tmp", UNIT_DEPENDENCY_FILE);
+                r = unit_add_mounts_for(u, "/var/tmp", UNIT_DEPENDENCY_FILE, UNIT_MOUNT_WANTS);
                 if (r < 0)
                         return r;
 
@@ -1366,23 +1311,26 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
          * is run first. */
 
         if (c->log_namespace) {
-                _cleanup_free_ char *socket_unit = NULL, *varlink_socket_unit = NULL;
-
-                r = unit_name_build_from_type("systemd-journald", c->log_namespace, UNIT_SOCKET, &socket_unit);
-                if (r < 0)
-                        return r;
+                static const struct {
+                        const char *template;
+                        UnitType type;
+                } deps[] = {
+                        { "systemd-journald",         UNIT_SOCKET,  },
+                        { "systemd-journald-varlink", UNIT_SOCKET,  },
+                        { "systemd-journald-sync",    UNIT_SERVICE, },
+                };
 
-                r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, socket_unit, true, UNIT_DEPENDENCY_FILE);
-                if (r < 0)
-                        return r;
+                FOREACH_ELEMENT(i, deps) {
+                        _cleanup_free_ char *unit = NULL;
 
-                r = unit_name_build_from_type("systemd-journald-varlink", c->log_namespace, UNIT_SOCKET, &varlink_socket_unit);
-                if (r < 0)
-                        return r;
+                        r = unit_name_build_from_type(i->template, c->log_namespace, i->type, &unit);
+                        if (r < 0)
+                                return r;
 
-                r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, varlink_socket_unit, true, UNIT_DEPENDENCY_FILE);
-                if (r < 0)
-                        return r;
+                        r = unit_add_two_dependencies_by_name(u, UNIT_AFTER, UNIT_REQUIRES, unit, true, UNIT_DEPENDENCY_FILE);
+                        if (r < 0)
+                                return r;
+                }
         } else {
                 r = unit_add_dependency_by_name(u, UNIT_AFTER, SPECIAL_JOURNALD_SOCKET, true, UNIT_DEPENDENCY_FILE);
                 if (r < 0)
@@ -1515,6 +1463,7 @@ int unit_add_default_target_dependency(Unit *u, Unit *target) {
 
 static int unit_add_slice_dependencies(Unit *u) {
         Unit *slice;
+
         assert(u);
 
         if (!UNIT_HAS_CGROUP_CONTEXT(u))
@@ -1526,8 +1475,12 @@ static int unit_add_slice_dependencies(Unit *u) {
         UnitDependencyMask mask = u->type == UNIT_SLICE ? UNIT_DEPENDENCY_IMPLICIT : UNIT_DEPENDENCY_FILE;
 
         slice = UNIT_GET_SLICE(u);
-        if (slice)
+        if (slice) {
+                if (!IN_SET(slice->freezer_state, FREEZER_RUNNING, FREEZER_THAWING))
+                        u->freezer_state = FREEZER_FROZEN_BY_PARENT;
+
                 return unit_add_two_dependencies(u, UNIT_AFTER, UNIT_REQUIRES, slice, true, mask);
+        }
 
         if (unit_has_name(u, SPECIAL_ROOT_SLICE))
                 return 0;
@@ -1536,51 +1489,72 @@ static int unit_add_slice_dependencies(Unit *u) {
 }
 
 static int unit_add_mount_dependencies(Unit *u) {
-        UnitDependencyInfo di;
-        const char *path;
         bool changed = false;
         int r;
 
         assert(u);
 
-        HASHMAP_FOREACH_KEY(di.data, path, u->requires_mounts_for) {
-                char prefix[strlen(path) + 1];
+        for (UnitMountDependencyType t = 0; t < _UNIT_MOUNT_DEPENDENCY_TYPE_MAX; ++t) {
+                UnitDependencyInfo di;
+                const char *path;
 
-                PATH_FOREACH_PREFIX_MORE(prefix, path) {
-                        _cleanup_free_ char *p = NULL;
-                        Unit *m;
+                HASHMAP_FOREACH_KEY(di.data, path, u->mounts_for[t]) {
 
-                        r = unit_name_from_path(prefix, ".mount", &p);
-                        if (r == -EINVAL)
-                                continue; /* If the path cannot be converted to a mount unit name, then it's
-                                           * not manageable as a unit by systemd, and hence we don't need a
-                                           * dependency on it. Let's thus silently ignore the issue. */
-                        if (r < 0)
-                                return r;
+                        char prefix[strlen(ASSERT_PTR(path)) + 1];
 
-                        m = manager_get_unit(u->manager, p);
-                        if (!m) {
-                                /* Make sure to load the mount unit if it exists. If so the dependencies on
-                                 * this unit will be added later during the loading of the mount unit. */
-                                (void) manager_load_unit_prepare(u->manager, p, NULL, NULL, &m);
-                                continue;
-                        }
-                        if (m == u)
-                                continue;
+                        PATH_FOREACH_PREFIX_MORE(prefix, path) {
+                                _cleanup_free_ char *p = NULL;
+                                Unit *m;
 
-                        if (m->load_state != UNIT_LOADED)
-                                continue;
+                                r = unit_name_from_path(prefix, ".mount", &p);
+                                if (r == -EINVAL)
+                                        continue; /* If the path cannot be converted to a mount unit name,
+                                                   * then it's not manageable as a unit by systemd, and
+                                                   * hence we don't need a dependency on it. Let's thus
+                                                   * silently ignore the issue. */
+                                if (r < 0)
+                                        return r;
 
-                        r = unit_add_dependency(u, UNIT_AFTER, m, true, di.origin_mask);
-                        if (r < 0)
-                                return r;
-                        changed = changed || r > 0;
+                                m = manager_get_unit(u->manager, p);
+                                if (!m) {
+                                        /* Make sure to load the mount unit if it exists. If so the
+                                         * dependencies on this unit will be added later during the loading
+                                         * of the mount unit. */
+                                        (void) manager_load_unit_prepare(
+                                                        u->manager,
+                                                        p,
+                                                        /* path= */NULL,
+                                                        /* e= */NULL,
+                                                        &m);
+                                        continue;
+                                }
+                                if (m == u)
+                                        continue;
 
-                        if (m->fragment_path) {
-                                r = unit_add_dependency(u, UNIT_REQUIRES, m, true, di.origin_mask);
+                                if (m->load_state != UNIT_LOADED)
+                                        continue;
+
+                                r = unit_add_dependency(
+                                                u,
+                                                UNIT_AFTER,
+                                                m,
+                                                /* add_reference= */ true,
+                                                di.origin_mask);
                                 if (r < 0)
                                         return r;
                                 changed = changed || r > 0;
+
+                                if (m->fragment_path) {
+                                        r = unit_add_dependency(
+                                                        u,
+                                                        unit_mount_dependency_type_to_dependency_type(t),
+                                                        m,
+                                                        /* add_reference= */ true,
+                                                        di.origin_mask);
+                                        if (r < 0)
+                                                return r;
+                                        changed = changed || r > 0;
+                                }
                         }
                 }
         }
@@ -1959,6 +1933,10 @@ int unit_start(Unit *u, ActivationDetails *details) {
                 return unit_start(following, details);
         }
 
+        /* Check to make sure the unit isn't frozen */
+        if (u->freezer_state != FREEZER_RUNNING)
+                return -EDEADLK;
+
         /* Check our ability to start early so that failure conditions don't cause us to enter a busy loop. */
         if (UNIT_VTABLE(u)->can_start) {
                 r = UNIT_VTABLE(u)->can_start(u);
@@ -1975,7 +1953,6 @@ int unit_start(Unit *u, ActivationDetails *details) {
          * waits for a holdoff timer to elapse before it will start again. */
 
         unit_add_to_dbus_queue(u);
-        unit_cgroup_freezer_action(u, FREEZER_THAW);
 
         if (!u->activation_details) /* Older details object wins */
                 u->activation_details = activation_details_ref(details);
@@ -2010,6 +1987,7 @@ bool unit_can_isolate(Unit *u) {
  *         -EBADR:    This unit type does not support stopping.
  *         -EALREADY: Unit is already stopped.
  *         -EAGAIN:   An operation is already in progress. Retry later.
+ *         -EDEADLK:  Unit is frozen
  */
 int unit_stop(Unit *u) {
         UnitActiveState state;
@@ -2027,11 +2005,14 @@ int unit_stop(Unit *u) {
                 return unit_stop(following);
         }
 
+        /* Check to make sure the unit isn't frozen */
+        if (u->freezer_state != FREEZER_RUNNING)
+                return -EDEADLK;
+
         if (!UNIT_VTABLE(u)->stop)
                 return -EBADR;
 
         unit_add_to_dbus_queue(u);
-        unit_cgroup_freezer_action(u, FREEZER_THAW);
 
         return UNIT_VTABLE(u)->stop(u);
 }
@@ -2056,6 +2037,7 @@ bool unit_can_stop(Unit *u) {
  *         -EBADR:    This unit type does not support reloading.
  *         -ENOEXEC:  Unit is not started.
  *         -EAGAIN:   An operation is already in progress. Retry later.
+ *         -EDEADLK:  Unit is frozen.
  */
 int unit_reload(Unit *u) {
         UnitActiveState state;
@@ -2082,6 +2064,10 @@ int unit_reload(Unit *u) {
                 return unit_reload(following);
         }
 
+        /* Check to make sure the unit isn't frozen */
+        if (u->freezer_state != FREEZER_RUNNING)
+                return -EDEADLK;
+
         unit_add_to_dbus_queue(u);
 
         if (!UNIT_VTABLE(u)->reload) {
@@ -2090,8 +2076,6 @@ int unit_reload(Unit *u) {
                 return 0;
         }
 
-        unit_cgroup_freezer_action(u, FREEZER_THAW);
-
         return UNIT_VTABLE(u)->reload(u);
 }
 
@@ -2238,16 +2222,16 @@ static void retroactively_start_dependencies(Unit *u) {
         UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_RETROACTIVE_START_REPLACE) /* Requires= + BindsTo= */
                 if (!unit_has_dependency(u, UNIT_ATOM_AFTER, other) &&
                     !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(other)))
-                        manager_add_job(u->manager, JOB_START, other, JOB_REPLACE, NULL, NULL, NULL);
+                        (void) manager_add_job(u->manager, JOB_START, other, JOB_REPLACE, NULL, NULL, NULL);
 
         UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_RETROACTIVE_START_FAIL) /* Wants= */
                 if (!unit_has_dependency(u, UNIT_ATOM_AFTER, other) &&
                     !UNIT_IS_ACTIVE_OR_ACTIVATING(unit_active_state(other)))
-                        manager_add_job(u->manager, JOB_START, other, JOB_FAIL, NULL, NULL, NULL);
+                        (void) manager_add_job(u->manager, JOB_START, other, JOB_FAIL, NULL, NULL, NULL);
 
         UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_RETROACTIVE_STOP_ON_START) /* Conflicts= (and inverse) */
                 if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
-                        manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL, NULL);
+                        (void) manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL, NULL);
 }
 
 static void retroactively_stop_dependencies(Unit *u) {
@@ -2259,7 +2243,7 @@ static void retroactively_stop_dependencies(Unit *u) {
         /* Pull down units which are bound to us recursively if enabled */
         UNIT_FOREACH_DEPENDENCY(other, u, UNIT_ATOM_RETROACTIVE_STOP_ON_STOP) /* BoundBy= */
                 if (!UNIT_IS_INACTIVE_OR_DEACTIVATING(unit_active_state(other)))
-                        manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL, NULL);
+                        (void) manager_add_job(u->manager, JOB_STOP, other, JOB_REPLACE, NULL, NULL, NULL);
 }
 
 void unit_start_on_failure(
@@ -2291,7 +2275,7 @@ void unit_start_on_failure(
                         log_unit_warning_errno(
                                         u, r, "Failed to enqueue %s job, ignoring: %s",
                                         dependency_name, bus_error_message(&error, r));
-                n_jobs ++;
+                n_jobs++;
         }
 
         if (n_jobs >= 0)
@@ -2318,273 +2302,179 @@ static int raise_level(int log_level, bool condition_info, bool condition_notice
 }
 
 static int unit_log_resources(Unit *u) {
-        struct iovec iovec[1 + 2 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + _CGROUP_IO_ACCOUNTING_METRIC_MAX + 4];
-        bool any_traffic = false, have_ip_accounting = false, any_io = false, have_io_accounting = false;
-        _cleanup_free_ char *igress = NULL, *egress = NULL, *rr = NULL, *wr = NULL;
-        int log_level = LOG_DEBUG; /* May be raised if resources consumed over a threshold */
-        size_t n_message_parts = 0, n_iovec = 0;
-        char* message_parts[1 + 2 + 2 + 2 + 1], *t;
-        nsec_t nsec = NSEC_INFINITY;
-        uint64_t memory_peak = UINT64_MAX, memory_swap_peak = UINT64_MAX;
-        int r;
-        const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
-                [CGROUP_IP_INGRESS_BYTES]   = "IP_METRIC_INGRESS_BYTES",
-                [CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
-                [CGROUP_IP_EGRESS_BYTES]    = "IP_METRIC_EGRESS_BYTES",
-                [CGROUP_IP_EGRESS_PACKETS]  = "IP_METRIC_EGRESS_PACKETS",
-        };
-        const char* const io_fields[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
-                [CGROUP_IO_READ_BYTES]       = "IO_METRIC_READ_BYTES",
-                [CGROUP_IO_WRITE_BYTES]      = "IO_METRIC_WRITE_BYTES",
-                [CGROUP_IO_READ_OPERATIONS]  = "IO_METRIC_READ_OPERATIONS",
-                [CGROUP_IO_WRITE_OPERATIONS] = "IO_METRIC_WRITE_OPERATIONS",
+
+        static const struct {
+                const char *journal_field;
+                const char *message_suffix;
+        } memory_fields[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1] = {
+                [CGROUP_MEMORY_PEAK]         = { "MEMORY_PEAK",                "memory peak"         },
+                [CGROUP_MEMORY_SWAP_PEAK]    = { "MEMORY_SWAP_PEAK",           "memory swap peak"    },
+        }, ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+                [CGROUP_IP_INGRESS_BYTES]    = { "IP_METRIC_INGRESS_BYTES",    "incoming IP traffic" },
+                [CGROUP_IP_EGRESS_BYTES]     = { "IP_METRIC_EGRESS_BYTES",     "outgoing IP traffic" },
+                [CGROUP_IP_INGRESS_PACKETS]  = { "IP_METRIC_INGRESS_PACKETS",  NULL                  },
+                [CGROUP_IP_EGRESS_PACKETS]   = { "IP_METRIC_EGRESS_PACKETS",   NULL                  },
+        }, io_fields[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+                [CGROUP_IO_READ_BYTES]       = { "IO_METRIC_READ_BYTES",       "read from disk"      },
+                [CGROUP_IO_WRITE_BYTES]      = { "IO_METRIC_WRITE_BYTES",      "written to disk"     },
+                [CGROUP_IO_READ_OPERATIONS]  = { "IO_METRIC_READ_OPERATIONS",  NULL                  },
+                [CGROUP_IO_WRITE_OPERATIONS] = { "IO_METRIC_WRITE_OPERATIONS", NULL                  },
         };
 
+        struct iovec *iovec = NULL;
+        size_t n_iovec = 0;
+        _cleanup_free_ char *message = NULL, *t = NULL;
+        nsec_t cpu_nsec = NSEC_INFINITY;
+        int log_level = LOG_DEBUG; /* May be raised if resources consumed over a threshold */
+
         assert(u);
 
+        CLEANUP_ARRAY(iovec, n_iovec, iovec_array_free);
+
+        iovec = new(struct iovec, 1 + (_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1) +
+                                  _CGROUP_IP_ACCOUNTING_METRIC_MAX + _CGROUP_IO_ACCOUNTING_METRIC_MAX + 4);
+        if (!iovec)
+                return log_oom();
+
         /* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
          * accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
          * information and the complete data in structured fields. */
 
-        (void) unit_get_cpu_usage(u, &nsec);
-        if (nsec != NSEC_INFINITY) {
+        (void) unit_get_cpu_usage(u, &cpu_nsec);
+        if (cpu_nsec != NSEC_INFINITY) {
                 /* Format the CPU time for inclusion in the structured log message */
-                if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
-                        r = log_oom();
-                        goto finish;
-                }
-                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+                if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, cpu_nsec) < 0)
+                        return log_oom();
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(TAKE_PTR(t));
 
                 /* Format the CPU time for inclusion in the human language message string */
-                t = strjoin("consumed ", FORMAT_TIMESPAN(nsec / NSEC_PER_USEC, USEC_PER_MSEC), " CPU time");
-                if (!t) {
-                        r = log_oom();
-                        goto finish;
-                }
-
-                message_parts[n_message_parts++] = t;
+                if (strextendf_with_separator(&message, ", ",
+                                              "Consumed %s CPU time",
+                                              FORMAT_TIMESPAN(cpu_nsec / NSEC_PER_USEC, USEC_PER_MSEC)) < 0)
+                        return log_oom();
 
                 log_level = raise_level(log_level,
-                                        nsec > MENTIONWORTHY_CPU_NSEC,
-                                        nsec > NOTICEWORTHY_CPU_NSEC);
+                                        cpu_nsec > MENTIONWORTHY_CPU_NSEC,
+                                        cpu_nsec > NOTICEWORTHY_CPU_NSEC);
         }
 
-        (void) unit_get_memory_accounting(u, CGROUP_MEMORY_PEAK, &memory_peak);
-        if (memory_peak != UINT64_MAX) {
-                /* Format peak memory for inclusion in the structured log message */
-                if (asprintf(&t, "MEMORY_PEAK=%" PRIu64, memory_peak) < 0) {
-                        r = log_oom();
-                        goto finish;
-                }
-                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+        for (CGroupMemoryAccountingMetric metric = 0; metric <= _CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST; metric++) {
+                uint64_t value = UINT64_MAX;
 
-                /* Format peak memory for inclusion in the human language message string */
-                t = strjoin(FORMAT_BYTES(memory_peak), " memory peak");
-                if (!t) {
-                        r = log_oom();
-                        goto finish;
-                }
-                message_parts[n_message_parts++] = t;
-        }
+                assert(memory_fields[metric].journal_field);
+                assert(memory_fields[metric].message_suffix);
 
-        (void) unit_get_memory_accounting(u, CGROUP_MEMORY_SWAP_PEAK, &memory_swap_peak);
-        if (memory_swap_peak != UINT64_MAX) {
-                /* Format peak swap memory for inclusion in the structured log message */
-                if (asprintf(&t, "MEMORY_SWAP_PEAK=%" PRIu64, memory_swap_peak) < 0) {
-                        r = log_oom();
-                        goto finish;
-                }
-                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+                (void) unit_get_memory_accounting(u, metric, &value);
+                if (value == UINT64_MAX)
+                        continue;
 
-                /* Format peak swap memory for inclusion in the human language message string */
-                t = strjoin(FORMAT_BYTES(memory_swap_peak), " memory swap peak");
-                if (!t) {
-                        r = log_oom();
-                        goto finish;
-                }
-                message_parts[n_message_parts++] = t;
+                if (asprintf(&t, "%s=%" PRIu64, memory_fields[metric].journal_field, value) < 0)
+                        return log_oom();
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(TAKE_PTR(t));
+
+                /* If value is 0, we don't log it in the MESSAGE= field. */
+                if (value == 0)
+                        continue;
+
+                if (strextendf_with_separator(&message, ", ", "%s %s",
+                                              FORMAT_BYTES(value), memory_fields[metric].message_suffix) < 0)
+                        return log_oom();
+
+                log_level = raise_level(log_level,
+                                        value > MENTIONWORTHY_MEMORY_BYTES,
+                                        value > NOTICEWORTHY_MEMORY_BYTES);
         }
 
         for (CGroupIOAccountingMetric k = 0; k < _CGROUP_IO_ACCOUNTING_METRIC_MAX; k++) {
                 uint64_t value = UINT64_MAX;
 
-                assert(io_fields[k]);
+                assert(io_fields[k].journal_field);
 
                 (void) unit_get_io_accounting(u, k, k > 0, &value);
                 if (value == UINT64_MAX)
                         continue;
 
-                have_io_accounting = true;
-                if (value > 0)
-                        any_io = true;
-
                 /* Format IO accounting data for inclusion in the structured log message */
-                if (asprintf(&t, "%s=%" PRIu64, io_fields[k], value) < 0) {
-                        r = log_oom();
-                        goto finish;
-                }
-                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+                if (asprintf(&t, "%s=%" PRIu64, io_fields[k].journal_field, value) < 0)
+                        return log_oom();
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(TAKE_PTR(t));
+
+                /* If value is 0, we don't log it in the MESSAGE= field. */
+                if (value == 0)
+                        continue;
 
                 /* Format the IO accounting data for inclusion in the human language message string, but only
                  * for the bytes counters (and not for the operations counters) */
-                if (k == CGROUP_IO_READ_BYTES) {
-                        assert(!rr);
-                        rr = strjoin("read ", strna(FORMAT_BYTES(value)), " from disk");
-                        if (!rr) {
-                                r = log_oom();
-                                goto finish;
-                        }
-                } else if (k == CGROUP_IO_WRITE_BYTES) {
-                        assert(!wr);
-                        wr = strjoin("written ", strna(FORMAT_BYTES(value)), " to disk");
-                        if (!wr) {
-                                r = log_oom();
-                                goto finish;
-                        }
-                }
+                if (io_fields[k].message_suffix) {
+                        if (strextendf_with_separator(&message, ", ", "%s %s",
+                                                      FORMAT_BYTES(value), io_fields[k].message_suffix) < 0)
+                                return log_oom();
 
-                if (IN_SET(k, CGROUP_IO_READ_BYTES, CGROUP_IO_WRITE_BYTES))
                         log_level = raise_level(log_level,
                                                 value > MENTIONWORTHY_IO_BYTES,
                                                 value > NOTICEWORTHY_IO_BYTES);
-        }
-
-        if (have_io_accounting) {
-                if (any_io) {
-                        if (rr)
-                                message_parts[n_message_parts++] = TAKE_PTR(rr);
-                        if (wr)
-                                message_parts[n_message_parts++] = TAKE_PTR(wr);
-
-                } else {
-                        char *k;
-
-                        k = strdup("no IO");
-                        if (!k) {
-                                r = log_oom();
-                                goto finish;
-                        }
-
-                        message_parts[n_message_parts++] = k;
                 }
         }
 
         for (CGroupIPAccountingMetric m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
                 uint64_t value = UINT64_MAX;
 
-                assert(ip_fields[m]);
+                assert(ip_fields[m].journal_field);
 
                 (void) unit_get_ip_accounting(u, m, &value);
                 if (value == UINT64_MAX)
                         continue;
 
-                have_ip_accounting = true;
-                if (value > 0)
-                        any_traffic = true;
-
                 /* Format IP accounting data for inclusion in the structured log message */
-                if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
-                        r = log_oom();
-                        goto finish;
-                }
-                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
-
-                /* Format the IP accounting data for inclusion in the human language message string, but only for the
-                 * bytes counters (and not for the packets counters) */
-                if (m == CGROUP_IP_INGRESS_BYTES) {
-                        assert(!igress);
-                        igress = strjoin("received ", strna(FORMAT_BYTES(value)), " IP traffic");
-                        if (!igress) {
-                                r = log_oom();
-                                goto finish;
-                        }
-                } else if (m == CGROUP_IP_EGRESS_BYTES) {
-                        assert(!egress);
-                        egress = strjoin("sent ", strna(FORMAT_BYTES(value)), " IP traffic");
-                        if (!egress) {
-                                r = log_oom();
-                                goto finish;
-                        }
-                }
+                if (asprintf(&t, "%s=%" PRIu64, ip_fields[m].journal_field, value) < 0)
+                        return log_oom();
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(TAKE_PTR(t));
+
+                /* If value is 0, we don't log it in the MESSAGE= field. */
+                if (value == 0)
+                        continue;
+
+                /* Format the IP accounting data for inclusion in the human language message string, but only
+                 * for the bytes counters (and not for the packets counters) */
+                if (ip_fields[m].message_suffix) {
+                        if (strextendf_with_separator(&message, ", ", "%s %s",
+                                                      FORMAT_BYTES(value), ip_fields[m].message_suffix) < 0)
+                                return log_oom();
 
-                if (IN_SET(m, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
                         log_level = raise_level(log_level,
                                                 value > MENTIONWORTHY_IP_BYTES,
                                                 value > NOTICEWORTHY_IP_BYTES);
-        }
-
-        /* This check is here because it is the earliest point following all possible log_level assignments. If
-         * log_level is assigned anywhere after this point, move this check. */
-        if (!unit_log_level_test(u, log_level)) {
-                r = 0;
-                goto finish;
-        }
-
-        if (have_ip_accounting) {
-                if (any_traffic) {
-                        if (igress)
-                                message_parts[n_message_parts++] = TAKE_PTR(igress);
-                        if (egress)
-                                message_parts[n_message_parts++] = TAKE_PTR(egress);
-
-                } else {
-                        char *k;
-
-                        k = strdup("no IP traffic");
-                        if (!k) {
-                                r = log_oom();
-                                goto finish;
-                        }
-
-                        message_parts[n_message_parts++] = k;
                 }
         }
 
+        /* This check is here because it is the earliest point following all possible log_level assignments.
+         * (If log_level is assigned anywhere after this point, move this check.) */
+        if (!unit_log_level_test(u, log_level))
+                return 0;
+
         /* Is there any accounting data available at all? */
         if (n_iovec == 0) {
-                r = 0;
-                goto finish;
-        }
-
-        if (n_message_parts == 0)
-                t = strjoina("MESSAGE=", u->id, ": Completed.");
-        else {
-                _cleanup_free_ char *joined = NULL;
-
-                message_parts[n_message_parts] = NULL;
-
-                joined = strv_join(message_parts, ", ");
-                if (!joined) {
-                        r = log_oom();
-                        goto finish;
-                }
-
-                joined[0] = ascii_toupper(joined[0]);
-                t = strjoina("MESSAGE=", u->id, ": ", joined, ".");
+                assert(!message);
+                return 0;
         }
 
-        /* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
-         * and hence don't increase n_iovec for them */
-        iovec[n_iovec] = IOVEC_MAKE_STRING(t);
-        iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
-
-        t = strjoina(u->manager->unit_log_field, u->id);
-        iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
-
-        t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
-        iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
+        t = strjoin("MESSAGE=", u->id, ": ", message ?: "Completed", ".");
+        if (!t)
+                return log_oom();
+        iovec[n_iovec++] = IOVEC_MAKE_STRING(TAKE_PTR(t));
 
-        log_unit_struct_iovec(u, log_level, iovec, n_iovec + 4);
-        r = 0;
+        if (!set_iovec_string_field(iovec, &n_iovec, "MESSAGE_ID=", SD_MESSAGE_UNIT_RESOURCES_STR))
+                return log_oom();
 
-finish:
-        free_many_charp(message_parts, n_message_parts);
+        if (!set_iovec_string_field(iovec, &n_iovec, u->manager->unit_log_field, u->id))
+                return log_oom();
 
-        for (size_t i = 0; i < n_iovec; i++)
-                free(iovec[i].iov_base);
+        if (!set_iovec_string_field(iovec, &n_iovec, u->manager->invocation_log_field, u->invocation_id_string))
+                return log_oom();
 
-        return r;
+        log_unit_struct_iovec(u, log_level, iovec, n_iovec);
 
+        return 0;
 }
 
 static void unit_update_on_console(Unit *u) {
@@ -2796,12 +2686,14 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
 
                         unit_emit_audit_start(u);
                         manager_send_unit_plymouth(m, u);
+                        manager_send_unit_supervisor(m, u, /* active= */ true);
                 }
 
                 if (UNIT_IS_INACTIVE_OR_FAILED(ns) && !UNIT_IS_INACTIVE_OR_FAILED(os)) {
                         /* This unit just stopped/failed. */
 
                         unit_emit_audit_stop(u, ns);
+                        manager_send_unit_supervisor(m, u, /* active= */ false);
                         unit_log_resources(u);
                 }
 
@@ -2859,7 +2751,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
         }
 }
 
-int unit_watch_pidref(Unit *u, PidRef *pid, bool exclusive) {
+int unit_watch_pidref(Unit *u, const PidRef *pid, bool exclusive) {
         _cleanup_(pidref_freep) PidRef *pid_dup = NULL;
         int r;
 
@@ -2943,7 +2835,7 @@ int unit_watch_pid(Unit *u, pid_t pid, bool exclusive) {
         return unit_watch_pidref(u, &pidref, exclusive);
 }
 
-void unit_unwatch_pidref(Unit *u, PidRef *pid) {
+void unit_unwatch_pidref(Unit *u, const PidRef *pid) {
         assert(u);
         assert(pidref_is_set(pid));
 
@@ -3005,6 +2897,16 @@ void unit_unwatch_all_pids(Unit *u) {
         u->pids = set_free(u->pids);
 }
 
+void unit_unwatch_pidref_done(Unit *u, PidRef *pidref) {
+        assert(u);
+
+        if (!pidref_is_set(pidref))
+                return;
+
+        unit_unwatch_pidref(u, pidref);
+        pidref_done(pidref);
+}
+
 static void unit_tidy_watch_pids(Unit *u) {
         PidRef *except1, *except2, *e;
 
@@ -3030,7 +2932,7 @@ static int on_rewatch_pids_event(sd_event_source *s, void *userdata) {
         assert(s);
 
         unit_tidy_watch_pids(u);
-        unit_watch_all_pids(u);
+        (void) unit_watch_all_pids(u);
 
         /* If the PID set is empty now, then let's finish this off. */
         unit_synthesize_cgroup_empty_event(u);
@@ -3043,7 +2945,8 @@ int unit_enqueue_rewatch_pids(Unit *u) {
 
         assert(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENOENT;
 
         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
@@ -3063,7 +2966,7 @@ int unit_enqueue_rewatch_pids(Unit *u) {
                 if (r < 0)
                         return log_error_errno(r, "Failed to allocate event source for tidying watched PIDs: %m");
 
-                r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_IDLE);
+                r = sd_event_source_set_priority(s, EVENT_PRIORITY_REWATCH_PIDS);
                 if (r < 0)
                         return log_error_errno(r, "Failed to adjust priority of event source for tidying watched PIDs: %m");
 
@@ -3288,8 +3191,8 @@ int unit_add_dependency(
         if (u->manager && FLAGS_SET(u->manager->test_run_flags, MANAGER_TEST_RUN_IGNORE_DEPENDENCIES))
                 return 0;
 
-        /* Note that ordering a device unit after a unit is permitted since it allows to start its job
-         * running timeout at a specific time. */
+        /* Note that ordering a device unit after a unit is permitted since it allows its job running
+         * timeout to be started at a specific time. */
         if (FLAGS_SET(a, UNIT_ATOM_BEFORE) && other->type == UNIT_DEVICE) {
                 log_unit_warning(u, "Dependency Before=%s ignored (.device units cannot be delayed)", other->id);
                 return 0;
@@ -3529,8 +3432,11 @@ int unit_set_slice(Unit *u, Unit *slice) {
                 return 0;
 
         /* Disallow slice changes if @u is already bound to cgroups */
-        if (UNIT_GET_SLICE(u) && u->cgroup_realized)
-                return -EBUSY;
+        if (UNIT_GET_SLICE(u)) {
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                if (crt && crt->cgroup_realized)
+                        return -EBUSY;
+        }
 
         /* Remove any slices assigned prior; we should only have one UNIT_IN_SLICE dependency */
         if (UNIT_GET_SLICE(u))
@@ -4019,28 +3925,25 @@ void unit_notify_cgroup_oom(Unit *u, bool managed_oom) {
                 UNIT_VTABLE(u)->notify_cgroup_oom(u, managed_oom);
 }
 
-static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) {
-        _cleanup_set_free_ Set *pid_set = NULL;
+static int unit_pid_set(Unit *u, Set **pid_set) {
         int r;
 
-        pid_set = set_new(NULL);
-        if (!pid_set)
-                return NULL;
+        assert(u);
+        assert(pid_set);
+
+        set_clear(*pid_set); /* This updates input. */
 
         /* Exclude the main/control pids from being killed via the cgroup */
-        if (main_pid > 0) {
-                r = set_put(pid_set, PID_TO_PTR(main_pid));
-                if (r < 0)
-                        return NULL;
-        }
 
-        if (control_pid > 0) {
-                r = set_put(pid_set, PID_TO_PTR(control_pid));
-                if (r < 0)
-                        return NULL;
-        }
+        PidRef *pid;
+        FOREACH_ARGUMENT(pid, unit_main_pid(u), unit_control_pid(u))
+                if (pidref_is_set(pid)) {
+                        r = set_ensure_put(pid_set, NULL, PID_TO_PTR(pid->pid));
+                        if (r < 0)
+                                return r;
+                }
 
-        return TAKE_PTR(pid_set);
+        return 0;
 }
 
 static int kill_common_log(const PidRef *pid, int signo, void *userdata) {
@@ -4074,13 +3977,55 @@ static int kill_or_sigqueue(PidRef* pidref, int signo, int code, int value) {
         }
 }
 
+static int unit_kill_one(
+                Unit *u,
+                PidRef *pidref,
+                const char *type,
+                int signo,
+                int code,
+                int value,
+                sd_bus_error *ret_error) {
+
+        int r;
+
+        assert(u);
+        assert(type);
+
+        if (!pidref_is_set(pidref))
+                return 0;
+
+        _cleanup_free_ char *comm = NULL;
+        (void) pidref_get_comm(pidref, &comm);
+
+        r = kill_or_sigqueue(pidref, signo, code, value);
+        if (r == -ESRCH)
+                return 0;
+        if (r < 0) {
+                /* Report this failure both to the logs and to the client */
+                if (ret_error)
+                        sd_bus_error_set_errnof(
+                                        ret_error, r,
+                                        "Failed to send signal SIG%s to %s process " PID_FMT " (%s): %m",
+                                        signal_to_string(signo), type, pidref->pid, strna(comm));
+
+                return log_unit_warning_errno(
+                                u, r,
+                                "Failed to send signal SIG%s to %s process " PID_FMT " (%s) on client request: %m",
+                                signal_to_string(signo), type, pidref->pid, strna(comm));
+        }
+
+        log_unit_info(u, "Sent signal SIG%s to %s process " PID_FMT " (%s) on client request.",
+                      signal_to_string(signo), type, pidref->pid, strna(comm));
+        return 1; /* killed */
+}
+
 int unit_kill(
                 Unit *u,
                 KillWho who,
                 int signo,
                 int code,
                 int value,
-                sd_bus_error *error) {
+                sd_bus_error *ret_error) {
 
         PidRef *main_pid, *control_pid;
         bool killed = false;
@@ -4100,110 +4045,71 @@ int unit_kill(
         control_pid = unit_control_pid(u);
 
         if (!UNIT_HAS_CGROUP_CONTEXT(u) && !main_pid && !control_pid)
-                return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Unit type does not support process killing.");
+                return sd_bus_error_setf(ret_error, SD_BUS_ERROR_NOT_SUPPORTED, "Unit type does not support process killing.");
 
         if (IN_SET(who, KILL_MAIN, KILL_MAIN_FAIL)) {
                 if (!main_pid)
-                        return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_PROCESS, "%s units have no main processes", unit_type_to_string(u->type));
+                        return sd_bus_error_setf(ret_error, BUS_ERROR_NO_SUCH_PROCESS, "%s units have no main processes", unit_type_to_string(u->type));
                 if (!pidref_is_set(main_pid))
-                        return sd_bus_error_set_const(error, BUS_ERROR_NO_SUCH_PROCESS, "No main process to kill");
+                        return sd_bus_error_set_const(ret_error, BUS_ERROR_NO_SUCH_PROCESS, "No main process to kill");
         }
 
         if (IN_SET(who, KILL_CONTROL, KILL_CONTROL_FAIL)) {
                 if (!control_pid)
-                        return sd_bus_error_setf(error, BUS_ERROR_NO_SUCH_PROCESS, "%s units have no control processes", unit_type_to_string(u->type));
+                        return sd_bus_error_setf(ret_error, BUS_ERROR_NO_SUCH_PROCESS, "%s units have no control processes", unit_type_to_string(u->type));
                 if (!pidref_is_set(control_pid))
-                        return sd_bus_error_set_const(error, BUS_ERROR_NO_SUCH_PROCESS, "No control process to kill");
+                        return sd_bus_error_set_const(ret_error, BUS_ERROR_NO_SUCH_PROCESS, "No control process to kill");
         }
 
-        if (pidref_is_set(control_pid) &&
-            IN_SET(who, KILL_CONTROL, KILL_CONTROL_FAIL, KILL_ALL, KILL_ALL_FAIL)) {
-                _cleanup_free_ char *comm = NULL;
-                (void) pidref_get_comm(control_pid, &comm);
-
-                r = kill_or_sigqueue(control_pid, signo, code, value);
-                if (r < 0) {
-                        ret = r;
-
-                        /* Report this failure both to the logs and to the client */
-                        sd_bus_error_set_errnof(
-                                        error, r,
-                                        "Failed to send signal SIG%s to control process " PID_FMT " (%s): %m",
-                                        signal_to_string(signo), control_pid->pid, strna(comm));
-                        log_unit_warning_errno(
-                                        u, r,
-                                        "Failed to send signal SIG%s to control process " PID_FMT " (%s) on client request: %m",
-                                        signal_to_string(signo), control_pid->pid, strna(comm));
-                } else {
-                        log_unit_info(u, "Sent signal SIG%s to control process " PID_FMT " (%s) on client request.",
-                                      signal_to_string(signo), control_pid->pid, strna(comm));
-                        killed = true;
-                }
+        if (IN_SET(who, KILL_CONTROL, KILL_CONTROL_FAIL, KILL_ALL, KILL_ALL_FAIL)) {
+                r = unit_kill_one(u, control_pid, "control", signo, code, value, ret_error);
+                RET_GATHER(ret, r);
+                killed = killed || r > 0;
         }
 
-        if (pidref_is_set(main_pid) &&
-            IN_SET(who, KILL_MAIN, KILL_MAIN_FAIL, KILL_ALL, KILL_ALL_FAIL)) {
-                _cleanup_free_ char *comm = NULL;
-                (void) pidref_get_comm(main_pid, &comm);
-
-                r = kill_or_sigqueue(main_pid, signo, code, value);
-                if (r < 0) {
-                        if (ret == 0) {
-                                ret = r;
-
-                                sd_bus_error_set_errnof(
-                                                error, r,
-                                                "Failed to send signal SIG%s to main process " PID_FMT " (%s): %m",
-                                                signal_to_string(signo), main_pid->pid, strna(comm));
-                        }
-
-                        log_unit_warning_errno(
-                                        u, r,
-                                        "Failed to send signal SIG%s to main process " PID_FMT " (%s) on client request: %m",
-                                        signal_to_string(signo), main_pid->pid, strna(comm));
-
-                } else {
-                        log_unit_info(u, "Sent signal SIG%s to main process " PID_FMT " (%s) on client request.",
-                                      signal_to_string(signo), main_pid->pid, strna(comm));
-                        killed = true;
-                }
+        if (IN_SET(who, KILL_MAIN, KILL_MAIN_FAIL, KILL_ALL, KILL_ALL_FAIL)) {
+                r = unit_kill_one(u, main_pid, "main", signo, code, value, ret >= 0 ? ret_error : NULL);
+                RET_GATHER(ret, r);
+                killed = killed || r > 0;
         }
 
         /* Note: if we shall enqueue rather than kill we won't do this via the cgroup mechanism, since it
          * doesn't really make much sense (and given that enqueued values are a relatively expensive
          * resource, and we shouldn't allow us to be subjects for such allocation sprees) */
-        if (IN_SET(who, KILL_ALL, KILL_ALL_FAIL) && u->cgroup_path && code == SI_USER) {
-                _cleanup_set_free_ Set *pid_set = NULL;
+        if (IN_SET(who, KILL_ALL, KILL_ALL_FAIL) && code == SI_USER) {
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
 
-                /* Exclude the main/control pids from being killed via the cgroup */
-                pid_set = unit_pid_set(main_pid ? main_pid->pid : 0, control_pid ? control_pid->pid : 0);
-                if (!pid_set)
-                        return log_oom();
+                if (crt && crt->cgroup_path) {
+                        _cleanup_set_free_ Set *pid_set = NULL;
 
-                r = cg_kill_recursive(u->cgroup_path, signo, 0, pid_set, kill_common_log, u);
-                if (r < 0) {
-                        if (!IN_SET(r, -ESRCH, -ENOENT)) {
-                                if (ret == 0) {
-                                        ret = r;
+                        /* Exclude the main/control pids from being killed via the cgroup */
+                        r = unit_pid_set(u, &pid_set);
+                        if (r < 0)
+                                return log_oom();
 
+                        r = cg_kill_recursive(crt->cgroup_path, signo, 0, pid_set, kill_common_log, u);
+                        if (r < 0 && !IN_SET(r, -ESRCH, -ENOENT)) {
+                                if (ret >= 0)
                                         sd_bus_error_set_errnof(
-                                                        error, r,
+                                                        ret_error, r,
                                                         "Failed to send signal SIG%s to auxiliary processes: %m",
                                                         signal_to_string(signo));
-                                }
 
                                 log_unit_warning_errno(
                                                 u, r,
                                                 "Failed to send signal SIG%s to auxiliary processes on client request: %m",
                                                 signal_to_string(signo));
+
+                                RET_GATHER(ret, r);
                         }
-                } else
-                        killed = true;
+
+                        killed = killed || r >= 0;
+                }
         }
 
         /* If the "fail" versions of the operation are requested, then complain if the set of processes we killed is empty */
-        if (ret == 0 && !killed && IN_SET(who, KILL_ALL_FAIL, KILL_CONTROL_FAIL, KILL_MAIN_FAIL))
-                return sd_bus_error_set_const(error, BUS_ERROR_NO_SUCH_PROCESS, "No matching processes to kill");
+        if (ret >= 0 && !killed && IN_SET(who, KILL_ALL_FAIL, KILL_CONTROL_FAIL, KILL_MAIN_FAIL))
+                return sd_bus_error_set_const(ret_error, BUS_ERROR_NO_SUCH_PROCESS, "No matching processes to kill");
 
         return ret;
 }
@@ -4316,6 +4222,21 @@ static int user_from_unit_name(Unit *u, char **ret) {
         return 0;
 }
 
+static int unit_verify_contexts(const Unit *u, const ExecContext *ec) {
+        assert(u);
+
+        if (!ec)
+                return 0;
+
+        if (MANAGER_IS_USER(u->manager) && ec->dynamic_user)
+                return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOEXEC), "DynamicUser= enabled for user unit, which is not supported. Refusing.");
+
+        if (ec->dynamic_user && ec->working_directory_home)
+                return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOEXEC), "WorkingDirectory=~ is not allowed under DynamicUser=yes. Refusing.");
+
+        return 0;
+}
+
 int unit_patch_contexts(Unit *u) {
         CGroupContext *cc;
         ExecContext *ec;
@@ -4337,16 +4258,14 @@ int unit_patch_contexts(Unit *u) {
                                         return -ENOMEM;
                         }
 
-                if (MANAGER_IS_USER(u->manager) &&
-                    !ec->working_directory) {
-
+                if (MANAGER_IS_USER(u->manager) && !ec->working_directory) {
                         r = get_home_dir(&ec->working_directory);
                         if (r < 0)
                                 return r;
 
-                        /* Allow user services to run, even if the
-                         * home directory is missing */
-                        ec->working_directory_missing_ok = true;
+                        if (!ec->working_directory_home)
+                                /* If home directory is implied by us, allow it to be missing. */
+                                ec->working_directory_missing_ok = true;
                 }
 
                 if (ec->private_devices)
@@ -4390,8 +4309,8 @@ int unit_patch_contexts(Unit *u) {
                         ec->restrict_suid_sgid = true;
                 }
 
-                for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++)
-                        exec_directory_sort(ec->directories + dt);
+                FOREACH_ARRAY(d, ec->directories, _EXEC_DIRECTORY_TYPE_MAX)
+                        exec_directory_sort(d);
         }
 
         cc = unit_get_cgroup_context(u);
@@ -4441,7 +4360,7 @@ int unit_patch_contexts(Unit *u) {
                 }
         }
 
-        return 0;
+        return unit_verify_contexts(u, ec);
 }
 
 ExecContext *unit_get_exec_context(const Unit *u) {
@@ -4458,7 +4377,7 @@ ExecContext *unit_get_exec_context(const Unit *u) {
         return (ExecContext*) ((uint8_t*) u + offset);
 }
 
-KillContext *unit_get_kill_context(Unit *u) {
+KillContext *unit_get_kill_context(const Unit *u) {
         size_t offset;
         assert(u);
 
@@ -4472,7 +4391,7 @@ KillContext *unit_get_kill_context(Unit *u) {
         return (KillContext*) ((uint8_t*) u + offset);
 }
 
-CGroupContext *unit_get_cgroup_context(Unit *u) {
+CGroupContext *unit_get_cgroup_context(const Unit *u) {
         size_t offset;
 
         if (u->type < 0)
@@ -4485,7 +4404,7 @@ CGroupContext *unit_get_cgroup_context(Unit *u) {
         return (CGroupContext*) ((uint8_t*) u + offset);
 }
 
-ExecRuntime *unit_get_exec_runtime(Unit *u) {
+ExecRuntime *unit_get_exec_runtime(const Unit *u) {
         size_t offset;
 
         if (u->type < 0)
@@ -4498,6 +4417,19 @@ ExecRuntime *unit_get_exec_runtime(Unit *u) {
         return *(ExecRuntime**) ((uint8_t*) u + offset);
 }
 
+CGroupRuntime *unit_get_cgroup_runtime(const Unit *u) {
+        size_t offset;
+
+        if (u->type < 0)
+                return NULL;
+
+        offset = UNIT_VTABLE(u)->cgroup_runtime_offset;
+        if (offset <= 0)
+                return NULL;
+
+        return *(CGroupRuntime**) ((uint8_t*) u + offset);
+}
+
 static const char* unit_drop_in_dir(Unit *u, UnitWriteFlags flags) {
         assert(u);
 
@@ -4820,26 +4752,57 @@ static int operation_to_signal(
         }
 }
 
-int unit_kill_context(
+static int unit_kill_context_one(
                 Unit *u,
-                KillContext *c,
-                KillOperation k,
-                PidRef* main_pid,
-                PidRef* control_pid,
-                bool main_pid_alien) {
+                const PidRef *pidref,
+                const char *type,
+                bool is_alien,
+                int sig,
+                bool send_sighup,
+                cg_kill_log_func_t log_func) {
 
+        int r;
+
+        assert(u);
+        assert(type);
+
+        /* This returns > 0 if it makes sense to wait for SIGCHLD for the process, == 0 if not. */
+
+        if (!pidref_is_set(pidref))
+                return 0;
+
+        if (log_func)
+                log_func(pidref, sig, u);
+
+        r = pidref_kill_and_sigcont(pidref, sig);
+        if (r == -ESRCH)
+                return !is_alien;
+        if (r < 0) {
+                _cleanup_free_ char *comm = NULL;
+
+                (void) pidref_get_comm(pidref, &comm);
+                return log_unit_warning_errno(u, r, "Failed to kill %s process " PID_FMT " (%s), ignoring: %m", type, pidref->pid, strna(comm));
+        }
+
+        if (send_sighup)
+                (void) pidref_kill(pidref, SIGHUP);
+
+        return !is_alien;
+}
+
+int unit_kill_context(Unit *u, KillOperation k) {
         bool wait_for_exit = false, send_sighup;
         cg_kill_log_func_t log_func = NULL;
         int sig, r;
 
         assert(u);
-        assert(c);
 
         /* Kill the processes belonging to this unit, in preparation for shutting the unit down.  Returns > 0
          * if we killed something worth waiting for, 0 otherwise. Do not confuse with unit_kill_common()
          * which is used for user-requested killing of unit processes. */
 
-        if (c->kill_mode == KILL_NONE)
+        KillContext *c = unit_get_kill_context(u);
+        if (!c || c->kill_mode == KILL_NONE)
                 return 0;
 
         bool noteworthy;
@@ -4852,61 +4815,33 @@ int unit_kill_context(
                 IN_SET(k, KILL_TERMINATE, KILL_TERMINATE_AND_LOG) &&
                 sig != SIGHUP;
 
-        if (pidref_is_set(main_pid)) {
-                if (log_func)
-                        log_func(main_pid, sig, u);
-
-                r = pidref_kill_and_sigcont(main_pid, sig);
-                if (r < 0 && r != -ESRCH) {
-                        _cleanup_free_ char *comm = NULL;
-                        (void) pidref_get_comm(main_pid, &comm);
+        bool is_alien;
+        PidRef *main_pid = unit_main_pid_full(u, &is_alien);
+        r = unit_kill_context_one(u, main_pid, "main", is_alien, sig, send_sighup, log_func);
+        wait_for_exit = wait_for_exit || r > 0;
 
-                        log_unit_warning_errno(u, r, "Failed to kill main process " PID_FMT " (%s), ignoring: %m", main_pid->pid, strna(comm));
-                } else {
-                        if (!main_pid_alien)
-                                wait_for_exit = true;
+        r = unit_kill_context_one(u, unit_control_pid(u), "control", /* is_alien = */ false, sig, send_sighup, log_func);
+        wait_for_exit = wait_for_exit || r > 0;
 
-                        if (r != -ESRCH && send_sighup)
-                                (void) pidref_kill(main_pid, SIGHUP);
-                }
-        }
-
-        if (pidref_is_set(control_pid)) {
-                if (log_func)
-                        log_func(control_pid, sig, u);
-
-                r = pidref_kill_and_sigcont(control_pid, sig);
-                if (r < 0 && r != -ESRCH) {
-                        _cleanup_free_ char *comm = NULL;
-                        (void) pidref_get_comm(control_pid, &comm);
-
-                        log_unit_warning_errno(u, r, "Failed to kill control process " PID_FMT " (%s), ignoring: %m", control_pid->pid, strna(comm));
-                } else {
-                        wait_for_exit = true;
-
-                        if (r != -ESRCH && send_sighup)
-                                (void) pidref_kill(control_pid, SIGHUP);
-                }
-        }
-
-        if (u->cgroup_path &&
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->cgroup_path &&
             (c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && k == KILL_KILL))) {
                 _cleanup_set_free_ Set *pid_set = NULL;
 
                 /* Exclude the main/control pids from being killed via the cgroup */
-                pid_set = unit_pid_set(main_pid ? main_pid->pid : 0, control_pid ? control_pid->pid : 0);
-                if (!pid_set)
-                        return -ENOMEM;
+                r = unit_pid_set(u, &pid_set);
+                if (r < 0)
+                        return r;
 
                 r = cg_kill_recursive(
-                                u->cgroup_path,
+                                crt->cgroup_path,
                                 sig,
                                 CGROUP_SIGCONT|CGROUP_IGNORE_SELF,
                                 pid_set,
                                 log_func, u);
                 if (r < 0) {
                         if (!IN_SET(r, -EAGAIN, -ESRCH, -ENOENT))
-                                log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                                log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
                 } else if (r > 0) {
 
@@ -4922,14 +4857,12 @@ int unit_kill_context(
                                 wait_for_exit = true;
 
                         if (send_sighup) {
-                                set_free(pid_set);
-
-                                pid_set = unit_pid_set(main_pid ? main_pid->pid : 0, control_pid ? control_pid->pid : 0);
-                                if (!pid_set)
-                                        return -ENOMEM;
+                                r = unit_pid_set(u, &pid_set);
+                                if (r < 0)
+                                        return r;
 
                                 (void) cg_kill_recursive(
-                                                u->cgroup_path,
+                                                crt->cgroup_path,
                                                 SIGHUP,
                                                 CGROUP_IGNORE_SELF,
                                                 pid_set,
@@ -4942,11 +4875,16 @@ int unit_kill_context(
         return wait_for_exit;
 }
 
-int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask) {
+int unit_add_mounts_for(Unit *u, const char *path, UnitDependencyMask mask, UnitMountDependencyType type) {
+        Hashmap **unit_map, **manager_map;
         int r;
 
         assert(u);
         assert(path);
+        assert(type >= 0 && type < _UNIT_MOUNT_DEPENDENCY_TYPE_MAX);
+
+        unit_map = &u->mounts_for[type];
+        manager_map = &u->manager->units_needing_mounts_for[type];
 
         /* Registers a unit for requiring a certain path and all its prefixes. We keep a hashtable of these
          * paths in the unit (from the path to the UnitDependencyInfo structure indicating how to the
@@ -4956,7 +4894,7 @@ int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask)
         if (!path_is_absolute(path))
                 return -EINVAL;
 
-        if (hashmap_contains(u->requires_mounts_for, path)) /* Exit quickly if the path is already covered. */
+        if (hashmap_contains(*unit_map, path)) /* Exit quickly if the path is already covered. */
                 return 0;
 
         /* Use the canonical form of the path as the stored key. We call path_is_normalized()
@@ -4975,7 +4913,7 @@ int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask)
                 .origin_mask = mask
         };
 
-        r = hashmap_ensure_put(&u->requires_mounts_for, &path_hash_ops, p, di.data);
+        r = hashmap_ensure_put(unit_map, &path_hash_ops, p, di.data);
         if (r < 0)
                 return r;
         assert(r > 0);
@@ -4985,11 +4923,11 @@ int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask)
         PATH_FOREACH_PREFIX_MORE(prefix, path) {
                 Set *x;
 
-                x = hashmap_get(u->manager->units_requiring_mounts_for, prefix);
+                x = hashmap_get(*manager_map, prefix);
                 if (!x) {
                         _cleanup_free_ char *q = NULL;
 
-                        r = hashmap_ensure_allocated(&u->manager->units_requiring_mounts_for, &path_hash_ops);
+                        r = hashmap_ensure_allocated(manager_map, &path_hash_ops);
                         if (r < 0)
                                 return r;
 
@@ -5001,7 +4939,7 @@ int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask)
                         if (!x)
                                 return -ENOMEM;
 
-                        r = hashmap_put(u->manager->units_requiring_mounts_for, q, x);
+                        r = hashmap_put(*manager_map, q, x);
                         if (r < 0) {
                                 set_free(x);
                                 return r;
@@ -5035,8 +4973,7 @@ int unit_setup_exec_runtime(Unit *u) {
         if (*rt)
                 return 0;
 
-        ec = unit_get_exec_context(u);
-        assert(ec);
+        ec = ASSERT_PTR(unit_get_exec_context(u));
 
         r = unit_get_transitive_dependency_set(u, UNIT_ATOM_JOINS_NAMESPACE_OF, &units);
         if (r < 0)
@@ -5073,6 +5010,21 @@ int unit_setup_exec_runtime(Unit *u) {
         return r;
 }
 
+CGroupRuntime *unit_setup_cgroup_runtime(Unit *u) {
+        size_t offset;
+
+        assert(u);
+
+        offset = UNIT_VTABLE(u)->cgroup_runtime_offset;
+        assert(offset > 0);
+
+        CGroupRuntime **rt = (CGroupRuntime**) ((uint8_t*) u + offset);
+        if (*rt)
+                return *rt;
+
+        return (*rt = cgroup_runtime_new());
+}
+
 bool unit_type_supported(UnitType t) {
         static int8_t cache[_UNIT_TYPE_MAX] = {}; /* -1: disabled, 1: enabled: 0: don't know */
         int r;
@@ -5178,12 +5130,14 @@ PidRef* unit_control_pid(Unit *u) {
         return NULL;
 }
 
-PidRef* unit_main_pid(Unit *u) {
+PidRef* unit_main_pid_full(Unit *u, bool *ret_is_alien) {
         assert(u);
 
         if (UNIT_VTABLE(u)->main_pid)
-                return UNIT_VTABLE(u)->main_pid(u);
+                return UNIT_VTABLE(u)->main_pid(u, ret_is_alien);
 
+        if (ret_is_alien)
+                *ret_is_alien = false;
         return NULL;
 }
 
@@ -5393,7 +5347,6 @@ int unit_acquire_invocation_id(Unit *u) {
 }
 
 int unit_set_exec_params(Unit *u, ExecParameters *p) {
-        const char *confirm_spawn;
         int r;
 
         assert(u);
@@ -5406,19 +5359,17 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
 
         p->runtime_scope = u->manager->runtime_scope;
 
-        confirm_spawn = manager_get_confirm_spawn(u->manager);
-        if (confirm_spawn) {
-                p->confirm_spawn = strdup(confirm_spawn);
-                if (!p->confirm_spawn)
-                        return -ENOMEM;
-        }
+        r = strdup_to(&p->confirm_spawn, manager_get_confirm_spawn(u->manager));
+        if (r < 0)
+                return r;
 
         p->cgroup_supported = u->manager->cgroup_supported;
         p->prefix = u->manager->prefix;
         SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(u->manager));
 
         /* Copy parameters from unit */
-        p->cgroup_path = u->cgroup_path;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        p->cgroup_path = crt ? crt->cgroup_path : NULL;
         SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
 
         p->received_credentials_directory = u->manager->received_credentials_directory;
@@ -5428,17 +5379,18 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
 
         p->fallback_smack_process_label = u->manager->defaults.smack_process_label;
 
-        if (u->manager->restrict_fs && p->bpf_outer_map_fd < 0) {
-                int fd = lsm_bpf_map_restrict_fs_fd(u);
+        if (u->manager->restrict_fs && p->bpf_restrict_fs_map_fd < 0) {
+                int fd = bpf_restrict_fs_map_fd(u);
                 if (fd < 0)
                         return fd;
 
-                p->bpf_outer_map_fd = fd;
+                p->bpf_restrict_fs_map_fd = fd;
         }
 
         p->user_lookup_fd = u->manager->user_lookup_fds[1];
+        p->handoff_timestamp_fd = u->manager->handoff_timestamp_fds[1];
 
-        p->cgroup_id = u->cgroup_id;
+        p->cgroup_id = crt ? crt->cgroup_id : 0;
         p->invocation_id = u->invocation_id;
         sd_id128_to_string(p->invocation_id, p->invocation_id_string);
         p->unit_id = strdup(u->id);
@@ -5460,6 +5412,10 @@ int unit_fork_helper_process(Unit *u, const char *name, PidRef *ret) {
 
         (void) unit_realize_cgroup(u);
 
+        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+        if (!crt)
+                return -ENOMEM;
+
         r = safe_fork(name, FORK_REOPEN_LOG|FORK_DEATHSIG_SIGTERM, &pid);
         if (r < 0)
                 return r;
@@ -5482,10 +5438,10 @@ int unit_fork_helper_process(Unit *u, const char *name, PidRef *ret) {
         (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE);
         (void) ignore_signals(SIGPIPE);
 
-        if (u->cgroup_path) {
-                r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
+        if (crt->cgroup_path) {
+                r = cg_attach_everywhere(u->manager->cgroup_supported, crt->cgroup_path, 0, NULL, NULL);
                 if (r < 0) {
-                        log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", empty_to_root(u->cgroup_path));
+                        log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", empty_to_root(crt->cgroup_path));
                         _exit(EXIT_CGROUP);
                 }
         }
@@ -5880,9 +5836,10 @@ int unit_prepare_exec(Unit *u) {
 
         (void) unit_realize_cgroup(u);
 
-        if (u->reset_accounting) {
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->reset_accounting) {
                 (void) unit_reset_accounting(u);
-                u->reset_accounting = false;
+                crt->reset_accounting = false;
         }
 
         unit_export_state_files(u);
@@ -5942,11 +5899,13 @@ int unit_warn_leftover_processes(Unit *u, cg_kill_log_func_t log_func) {
 
         (void) unit_pick_cgroup_path(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
         return cg_kill_recursive(
-                        u->cgroup_path,
+                        crt->cgroup_path,
                         /* sig= */ 0,
                         /* flags= */ 0,
                         /* set= */ NULL,
@@ -5976,7 +5935,7 @@ bool unit_needs_console(Unit *u) {
         return exec_context_may_touch_console(ec);
 }
 
-int unit_pid_attachable(Unit *u, PidRef *pid, sd_bus_error *error) {
+int unit_pid_attachable(Unit *u, const PidRef *pid, sd_bus_error *error) {
         int r;
 
         assert(u);
@@ -6213,19 +6172,98 @@ bool unit_can_isolate_refuse_manual(Unit *u) {
         return unit_can_isolate(u) && !u->refuse_manual_start;
 }
 
+void unit_next_freezer_state(Unit *u, FreezerAction action, FreezerState *ret, FreezerState *ret_target) {
+        Unit *slice;
+        FreezerState curr, parent, next, tgt;
+
+        assert(u);
+        assert(IN_SET(action, FREEZER_FREEZE, FREEZER_PARENT_FREEZE,
+                              FREEZER_THAW, FREEZER_PARENT_THAW));
+        assert(ret);
+        assert(ret_target);
+
+        /* This function determines the correct freezer state transitions for a unit
+         * given the action being requested. It returns the next state, and also the "target",
+         * which is either FREEZER_FROZEN or FREEZER_RUNNING, depending on what actual state we
+         * ultimately want to achieve. */
+
+         curr = u->freezer_state;
+         slice = UNIT_GET_SLICE(u);
+         if (slice)
+                parent = slice->freezer_state;
+         else
+                parent = FREEZER_RUNNING;
+
+        if (action == FREEZER_FREEZE) {
+                /* We always "promote" a freeze initiated by parent into a normal freeze */
+                if (IN_SET(curr, FREEZER_FROZEN, FREEZER_FROZEN_BY_PARENT))
+                        next = FREEZER_FROZEN;
+                else
+                        next = FREEZER_FREEZING;
+        } else if (action == FREEZER_THAW) {
+                /* Thawing is the most complicated operation here, because we can't thaw a unit
+                 * if its parent is frozen. So we instead "demote" a normal freeze into a freeze
+                 * initiated by parent if the parent is frozen */
+                if (IN_SET(curr, FREEZER_RUNNING, FREEZER_THAWING, FREEZER_FREEZING_BY_PARENT, FREEZER_FROZEN_BY_PARENT))
+                        next = curr;
+                else if (curr == FREEZER_FREEZING) {
+                        if (IN_SET(parent, FREEZER_RUNNING, FREEZER_THAWING))
+                                next = FREEZER_THAWING;
+                        else
+                                next = FREEZER_FREEZING_BY_PARENT;
+                } else {
+                        assert(curr == FREEZER_FROZEN);
+                        if (IN_SET(parent, FREEZER_RUNNING, FREEZER_THAWING))
+                                next = FREEZER_THAWING;
+                        else
+                                next = FREEZER_FROZEN_BY_PARENT;
+                }
+        } else if (action == FREEZER_PARENT_FREEZE) {
+                /* We need to avoid accidentally demoting units frozen manually */
+                if (IN_SET(curr, FREEZER_FREEZING, FREEZER_FROZEN, FREEZER_FROZEN_BY_PARENT))
+                        next = curr;
+                else
+                        next = FREEZER_FREEZING_BY_PARENT;
+        } else {
+                assert(action == FREEZER_PARENT_THAW);
+
+                /* We don't want to thaw units from a parent if they were frozen
+                 * manually, so for such units this action is a no-op */
+                if (IN_SET(curr, FREEZER_RUNNING, FREEZER_FREEZING, FREEZER_FROZEN))
+                        next = curr;
+                else
+                        next = FREEZER_THAWING;
+        }
+
+        tgt = freezer_state_finish(next);
+        if (tgt == FREEZER_FROZEN_BY_PARENT)
+                tgt = FREEZER_FROZEN;
+        assert(IN_SET(tgt, FREEZER_RUNNING, FREEZER_FROZEN));
+
+        *ret = next;
+        *ret_target = tgt;
+}
+
 bool unit_can_freeze(Unit *u) {
         assert(u);
 
+        if (unit_has_name(u, SPECIAL_ROOT_SLICE) || unit_has_name(u, SPECIAL_INIT_SCOPE))
+                return false;
+
         if (UNIT_VTABLE(u)->can_freeze)
                 return UNIT_VTABLE(u)->can_freeze(u);
 
-        return UNIT_VTABLE(u)->freeze;
+        return UNIT_VTABLE(u)->freezer_action;
 }
 
 void unit_frozen(Unit *u) {
         assert(u);
 
-        u->freezer_state = FREEZER_FROZEN;
+        u->freezer_state = u->freezer_state == FREEZER_FREEZING_BY_PARENT
+                           ? FREEZER_FROZEN_BY_PARENT
+                           : FREEZER_FROZEN;
+
+        log_unit_debug(u, "Unit now %s.", freezer_state_to_string(u->freezer_state));
 
         bus_unit_send_pending_freezer_message(u, false);
 }
@@ -6235,19 +6273,19 @@ void unit_thawed(Unit *u) {
 
         u->freezer_state = FREEZER_RUNNING;
 
+        log_unit_debug(u, "Unit thawed.");
+
         bus_unit_send_pending_freezer_message(u, false);
 }
 
-static int unit_freezer_action(Unit *u, FreezerAction action) {
+int unit_freezer_action(Unit *u, FreezerAction action) {
         UnitActiveState s;
-        int (*method)(Unit*);
         int r;
 
         assert(u);
         assert(IN_SET(action, FREEZER_FREEZE, FREEZER_THAW));
 
-        method = action == FREEZER_FREEZE ? UNIT_VTABLE(u)->freeze : UNIT_VTABLE(u)->thaw;
-        if (!method || !cg_freezer_supported())
+        if (!cg_freezer_supported() || !unit_can_freeze(u))
                 return -EOPNOTSUPP;
 
         if (u->job)
@@ -6260,36 +6298,21 @@ static int unit_freezer_action(Unit *u, FreezerAction action) {
         if (s != UNIT_ACTIVE)
                 return -EHOSTDOWN;
 
-        if ((IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_THAWING) && action == FREEZER_FREEZE) ||
-            (u->freezer_state == FREEZER_THAWING && action == FREEZER_THAW))
+        if (action == FREEZER_FREEZE && IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_FREEZING_BY_PARENT))
                 return -EALREADY;
+        if (action == FREEZER_THAW && u->freezer_state == FREEZER_THAWING)
+                return -EALREADY;
+        if (action == FREEZER_THAW && IN_SET(u->freezer_state, FREEZER_FREEZING_BY_PARENT, FREEZER_FROZEN_BY_PARENT))
+                return -ECHILD;
 
-        r = method(u);
+        r = UNIT_VTABLE(u)->freezer_action(u, action);
         if (r <= 0)
                 return r;
 
-        assert(IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_THAWING));
-
+        assert(IN_SET(u->freezer_state, FREEZER_FREEZING, FREEZER_FREEZING_BY_PARENT, FREEZER_THAWING));
         return 1;
 }
 
-int unit_freeze(Unit *u) {
-        return unit_freezer_action(u, FREEZER_FREEZE);
-}
-
-int unit_thaw(Unit *u) {
-        return unit_freezer_action(u, FREEZER_THAW);
-}
-
-/* Wrappers around low-level cgroup freezer operations common for service and scope units */
-int unit_freeze_vtable_common(Unit *u) {
-        return unit_cgroup_freezer_action(u, FREEZER_FREEZE);
-}
-
-int unit_thaw_vtable_common(Unit *u) {
-        return unit_cgroup_freezer_action(u, FREEZER_THAW);
-}
-
 Condition *unit_find_failed_condition(Unit *u) {
         Condition *failed_trigger = NULL;
         bool has_succeeded_trigger = false;
@@ -6310,7 +6333,7 @@ Condition *unit_find_failed_condition(Unit *u) {
 }
 
 static const char* const collect_mode_table[_COLLECT_MODE_MAX] = {
-        [COLLECT_INACTIVE] = "inactive",
+        [COLLECT_INACTIVE]           = "inactive",
         [COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed",
 };
 
@@ -6460,7 +6483,7 @@ int unit_compare_priority(Unit *a, Unit *b) {
 }
 
 const ActivationDetailsVTable * const activation_details_vtable[_UNIT_TYPE_MAX] = {
-        [UNIT_PATH] = &activation_details_path_vtable,
+        [UNIT_PATH]  = &activation_details_path_vtable,
         [UNIT_TIMER] = &activation_details_timer_vtable,
 };
 
@@ -6596,11 +6619,7 @@ int activation_details_append_pair(ActivationDetails *details, char ***strv) {
                 return 0;
 
         if (!isempty(details->trigger_unit_name)) {
-                r = strv_extend(strv, "trigger_unit");
-                if (r < 0)
-                        return r;
-
-                r = strv_extend(strv, details->trigger_unit_name);
+                r = strv_extend_many(strv, "trigger_unit", details->trigger_unit_name);
                 if (r < 0)
                         return r;
         }
@@ -6615,3 +6634,24 @@ int activation_details_append_pair(ActivationDetails *details, char ***strv) {
 }
 
 DEFINE_TRIVIAL_REF_UNREF_FUNC(ActivationDetails, activation_details, activation_details_free);
+
+static const char* const unit_mount_dependency_type_table[_UNIT_MOUNT_DEPENDENCY_TYPE_MAX] = {
+        [UNIT_MOUNT_WANTS]    = "WantsMountsFor",
+        [UNIT_MOUNT_REQUIRES] = "RequiresMountsFor",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_mount_dependency_type, UnitMountDependencyType);
+
+UnitDependency unit_mount_dependency_type_to_dependency_type(UnitMountDependencyType t) {
+        switch (t) {
+
+        case UNIT_MOUNT_WANTS:
+                return UNIT_WANTS;
+
+        case UNIT_MOUNT_REQUIRES:
+                return UNIT_REQUIRES;
+
+        default:
+                assert_not_reached();
+        }
+}
diff --git a/src/core/unit.h b/src/core/unit.h
index 60bc2e3..b135fec 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 #pragma once
 
+#include <errno.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <sys/socket.h>
@@ -8,6 +9,14 @@
 
 #include "sd-id128.h"
 
+/* Circular dependency with manager.h, needs to be defined before local includes */
+typedef enum UnitMountDependencyType {
+        UNIT_MOUNT_WANTS,
+        UNIT_MOUNT_REQUIRES,
+        _UNIT_MOUNT_DEPENDENCY_TYPE_MAX,
+        _UNIT_MOUNT_DEPENDENCY_TYPE_INVALID = -EINVAL,
+} UnitMountDependencyType;
+
 #include "bpf-program.h"
 #include "cgroup.h"
 #include "condition.h"
@@ -55,7 +64,11 @@ static inline bool UNIT_IS_INACTIVE_OR_FAILED(UnitActiveState t) {
 }
 
 static inline bool UNIT_IS_LOAD_COMPLETE(UnitLoadState t) {
-        return t >= 0 && t < _UNIT_LOAD_STATE_MAX && t != UNIT_STUB && t != UNIT_MERGED;
+        return t >= 0 && t < _UNIT_LOAD_STATE_MAX && !IN_SET(t, UNIT_STUB, UNIT_MERGED);
+}
+
+static inline bool UNIT_IS_LOAD_ERROR(UnitLoadState t) {
+        return IN_SET(t, UNIT_NOT_FOUND, UNIT_BAD_SETTING, UNIT_ERROR);
 }
 
 /* Stores the 'reason' a dependency was created as a bit mask, i.e. due to which configuration source it came to be. We
@@ -199,6 +212,7 @@ struct UnitRef {
         LIST_FIELDS(UnitRef, refs_by_target);
 };
 
+/* The generic, dynamic definition of the unit */
 typedef struct Unit {
         Manager *manager;
 
@@ -216,9 +230,9 @@ typedef struct Unit {
          * Hashmap(UnitDependency → Hashmap(Unit* → UnitDependencyInfo)) */
         Hashmap *dependencies;
 
-        /* Similar, for RequiresMountsFor= path dependencies. The key is the path, the value the
-         * UnitDependencyInfo type */
-        Hashmap *requires_mounts_for;
+        /* Similar, for RequiresMountsFor= and WantsMountsFor= path dependencies. The key is the path, the
+         * value the UnitDependencyInfo type */
+        Hashmap *mounts_for[_UNIT_MOUNT_DEPENDENCY_TYPE_MAX];
 
         char *description;
         char **documentation;
@@ -361,74 +375,6 @@ typedef struct Unit {
         UnitFileState unit_file_state;
         PresetAction unit_file_preset;
 
-        /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
-        nsec_t cpu_usage_base;
-        nsec_t cpu_usage_last; /* the most recently read value */
-
-        /* Most recently read value of memory accounting metrics */
-        uint64_t memory_accounting_last[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1];
-
-        /* The current counter of OOM kills initiated by systemd-oomd */
-        uint64_t managed_oom_kill_last;
-
-        /* The current counter of the oom_kill field in the memory.events cgroup attribute */
-        uint64_t oom_kill_last;
-
-        /* Where the io.stat data was at the time the unit was started */
-        uint64_t io_accounting_base[_CGROUP_IO_ACCOUNTING_METRIC_MAX];
-        uint64_t io_accounting_last[_CGROUP_IO_ACCOUNTING_METRIC_MAX]; /* the most recently read value */
-
-        /* Counterparts in the cgroup filesystem */
-        char *cgroup_path;
-        uint64_t cgroup_id;
-        CGroupMask cgroup_realized_mask;           /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */
-        CGroupMask cgroup_enabled_mask;            /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */
-        CGroupMask cgroup_invalidated_mask;        /* A mask specifying controllers which shall be considered invalidated, and require re-realization */
-        CGroupMask cgroup_members_mask;            /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */
-
-        /* Inotify watch descriptors for watching cgroup.events and memory.events on cgroupv2 */
-        int cgroup_control_inotify_wd;
-        int cgroup_memory_inotify_wd;
-
-        /* Device Controller BPF program */
-        BPFProgram *bpf_device_control_installed;
-
-        /* IP BPF Firewalling/accounting */
-        int ip_accounting_ingress_map_fd;
-        int ip_accounting_egress_map_fd;
-        uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
-
-        int ipv4_allow_map_fd;
-        int ipv6_allow_map_fd;
-        int ipv4_deny_map_fd;
-        int ipv6_deny_map_fd;
-        BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
-        BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
-
-        Set *ip_bpf_custom_ingress;
-        Set *ip_bpf_custom_ingress_installed;
-        Set *ip_bpf_custom_egress;
-        Set *ip_bpf_custom_egress_installed;
-
-        /* BPF programs managed (e.g. loaded to kernel) by an entity external to systemd,
-         * attached to unit cgroup by provided program fd and attach type. */
-        Hashmap *bpf_foreign_by_key;
-
-        FDSet *initial_socket_bind_link_fds;
-#if BPF_FRAMEWORK
-        /* BPF links to BPF programs attached to cgroup/bind{4|6} hooks and
-         * responsible for allowing or denying a unit to bind(2) to a socket
-         * address. */
-        struct bpf_link *ipv4_socket_bind_link;
-        struct bpf_link *ipv6_socket_bind_link;
-#endif
-
-        FDSet *initial_restric_ifaces_link_fds;
-#if BPF_FRAMEWORK
-        struct bpf_link *restrict_ifaces_ingress_bpf_link;
-        struct bpf_link *restrict_ifaces_egress_bpf_link;
-#endif
-
         /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
          * ones which might have appeared. */
         sd_event_source *rewatch_pids_event_source;
@@ -499,12 +445,6 @@ typedef struct Unit {
         bool in_audit:1;
         bool on_console:1;
 
-        bool cgroup_realized:1;
-        bool cgroup_members_mask_valid:1;
-
-        /* Reset cgroup accounting next time we fork something off */
-        bool reset_accounting:1;
-
         bool start_limit_hit:1;
 
         /* Did we already invoke unit_coldplug() for this unit? */
@@ -520,9 +460,6 @@ typedef struct Unit {
         bool exported_log_ratelimit_interval:1;
         bool exported_log_ratelimit_burst:1;
 
-        /* Whether we warned about clamping the CPU quota period */
-        bool warned_clamping_cpu_quota_period:1;
-
         /* When writing transient unit files, stores which section we stored last. If < 0, we didn't write any yet. If
          * == 0 we are in the [Unit] section, if > 0 we are in the unit type-specific section. */
         signed int last_section_private:2;
@@ -568,6 +505,7 @@ static inline bool UNIT_WRITE_FLAGS_NOOP(UnitWriteFlags flags) {
 
 #include "kill.h"
 
+/* The static const, immutable data about a specific unit type */
 typedef struct UnitVTable {
         /* How much memory does an object of this unit type need */
         size_t object_size;
@@ -584,11 +522,14 @@ typedef struct UnitVTable {
          * KillContext is found, if the unit type has that */
         size_t kill_context_offset;
 
-        /* If greater than 0, the offset into the object where the
-         * pointer to ExecSharedRuntime is found, if the unit type has
-         * that */
+        /* If greater than 0, the offset into the object where the pointer to ExecRuntime is found, if
+         * the unit type has that */
         size_t exec_runtime_offset;
 
+        /* If greater than 0, the offset into the object where the pointer to CGroupRuntime is found, if the
+         * unit type has that */
+        size_t cgroup_runtime_offset;
+
         /* The name of the configuration file section with the private settings of this unit */
         const char *private_section;
 
@@ -633,9 +574,9 @@ typedef struct UnitVTable {
         /* Clear out the various runtime/state/cache/logs/configuration data */
         int (*clean)(Unit *u, ExecCleanMask m);
 
-        /* Freeze the unit */
-        int (*freeze)(Unit *u);
-        int (*thaw)(Unit *u);
+        /* Freeze or thaw the unit. Returns > 0 to indicate that the request will be handled asynchronously; unit_frozen
+         * or unit_thawed should be called once the operation is done. Returns 0 if done successfully, or < 0 on error. */
+        int (*freezer_action)(Unit *u, FreezerAction a);
         bool (*can_freeze)(Unit *u);
 
         /* Return which kind of data can be cleaned */
@@ -691,6 +632,9 @@ typedef struct UnitVTable {
         /* Called whenever a process of this unit sends us a message */
         void (*notify_message)(Unit *u, const struct ucred *ucred, char * const *tags, FDSet *fds);
 
+        /* Called whenever we learn a handoff timestamp */
+        void (*notify_handoff_timestamp)(Unit *u, const struct ucred *ucred, const dual_timestamp *ts);
+
         /* Called whenever a name this Unit registered for comes or goes away. */
         void (*bus_name_owner_change)(Unit *u, const char *new_owner);
 
@@ -722,10 +666,10 @@ typedef struct UnitVTable {
         /* Returns the start timeout of a unit */
         usec_t (*get_timeout_start_usec)(Unit *u);
 
-        /* Returns the main PID if there is any defined, or 0. */
-        PidRef* (*main_pid)(Unit *u);
+        /* Returns the main PID if there is any defined, or NULL. */
+        PidRef* (*main_pid)(Unit *u, bool *ret_is_alien);
 
-        /* Returns the control PID if there is any defined, or 0. */
+        /* Returns the control PID if there is any defined, or NULL. */
         PidRef* (*control_pid)(Unit *u);
 
         /* Returns true if the unit currently needs access to the console */
@@ -794,6 +738,9 @@ typedef struct UnitVTable {
         /* If true, we'll notify plymouth about this unit */
         bool notify_plymouth;
 
+        /* If true, we'll notify a surrounding VMM/container manager about this unit becoming available */
+        bool notify_supervisor;
+
         /* The audit events to generate on start + stop (or 0 if none shall be generated) */
         int audit_start_message_type;
         int audit_stop_message_type;
@@ -903,7 +850,6 @@ bool unit_has_name(const Unit *u, const char *name);
 
 UnitActiveState unit_active_state(Unit *u);
 FreezerState unit_freezer_state(Unit *u);
-int unit_freezer_state_kernel(Unit *u, FreezerState *ret);
 
 const char* unit_sub_state_to_string(Unit *u);
 
@@ -916,17 +862,18 @@ int unit_start(Unit *u, ActivationDetails *details);
 int unit_stop(Unit *u);
 int unit_reload(Unit *u);
 
-int unit_kill(Unit *u, KillWho w, int signo, int code, int value, sd_bus_error *error);
+int unit_kill(Unit *u, KillWho w, int signo, int code, int value, sd_bus_error *ret_error);
 
 void unit_notify_cgroup_oom(Unit *u, bool managed_oom);
 
 void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success);
 
-int unit_watch_pidref(Unit *u, PidRef *pid, bool exclusive);
+int unit_watch_pidref(Unit *u, const PidRef *pid, bool exclusive);
 int unit_watch_pid(Unit *u, pid_t pid, bool exclusive);
-void unit_unwatch_pidref(Unit *u, PidRef *pid);
+void unit_unwatch_pidref(Unit *u, const PidRef *pid);
 void unit_unwatch_pid(Unit *u, pid_t pid);
 void unit_unwatch_all_pids(Unit *u);
+void unit_unwatch_pidref_done(Unit *u, PidRef *pidref);
 
 int unit_enqueue_rewatch_pids(Unit *u);
 void unit_dequeue_rewatch_pids(Unit *u);
@@ -984,12 +931,14 @@ void unit_ref_unset(UnitRef *ref);
 int unit_patch_contexts(Unit *u);
 
 ExecContext *unit_get_exec_context(const Unit *u) _pure_;
-KillContext *unit_get_kill_context(Unit *u) _pure_;
-CGroupContext *unit_get_cgroup_context(Unit *u) _pure_;
+KillContext *unit_get_kill_context(const Unit *u) _pure_;
+CGroupContext *unit_get_cgroup_context(const Unit *u) _pure_;
 
-ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_;
+ExecRuntime *unit_get_exec_runtime(const Unit *u) _pure_;
+CGroupRuntime *unit_get_cgroup_runtime(const Unit *u) _pure_;
 
 int unit_setup_exec_runtime(Unit *u);
+CGroupRuntime *unit_setup_cgroup_runtime(Unit *u);
 
 const char* unit_escape_setting(const char *s, UnitWriteFlags flags, char **buf);
 char* unit_concat_strv(char **l, UnitWriteFlags flags);
@@ -997,11 +946,11 @@ char* unit_concat_strv(char **l, UnitWriteFlags flags);
 int unit_write_setting(Unit *u, UnitWriteFlags flags, const char *name, const char *data);
 int unit_write_settingf(Unit *u, UnitWriteFlags mode, const char *name, const char *format, ...) _printf_(4,5);
 
-int unit_kill_context(Unit *u, KillContext *c, KillOperation k, PidRef *main_pid, PidRef *control_pid, bool main_pid_alien);
+int unit_kill_context(Unit *u, KillOperation k);
 
 int unit_make_transient(Unit *u);
 
-int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask);
+int unit_add_mounts_for(Unit *u, const char *path, UnitDependencyMask mask, UnitMountDependencyType type);
 
 bool unit_type_supported(UnitType t);
 
@@ -1012,7 +961,10 @@ bool unit_is_upheld_by_active(Unit *u, Unit **ret_culprit);
 bool unit_is_bound_by_inactive(Unit *u, Unit **ret_culprit);
 
 PidRef* unit_control_pid(Unit *u);
-PidRef* unit_main_pid(Unit *u);
+PidRef* unit_main_pid_full(Unit *u, bool *ret_is_alien);
+static inline PidRef* unit_main_pid(Unit *u) {
+        return unit_main_pid_full(u, NULL);
+}
 
 void unit_warn_if_dir_nonempty(Unit *u, const char* where);
 int unit_fail_if_noncanonical(Unit *u, const char* where);
@@ -1046,7 +998,7 @@ int unit_warn_leftover_processes(Unit *u, cg_kill_log_func_t log_func);
 
 bool unit_needs_console(Unit *u);
 
-int unit_pid_attachable(Unit *unit, PidRef *pid, sd_bus_error *error);
+int unit_pid_attachable(Unit *unit, const PidRef *pid, sd_bus_error *error);
 
 static inline bool unit_has_job_type(Unit *u, JobType type) {
         return u && u->job && u->job->type == type;
@@ -1086,21 +1038,21 @@ bool unit_can_stop_refuse_manual(Unit *u);
 bool unit_can_isolate_refuse_manual(Unit *u);
 
 bool unit_can_freeze(Unit *u);
-int unit_freeze(Unit *u);
+int unit_freezer_action(Unit *u, FreezerAction action);
+void unit_next_freezer_state(Unit *u, FreezerAction a, FreezerState *ret, FreezerState *ret_tgt);
 void unit_frozen(Unit *u);
-
-int unit_thaw(Unit *u);
 void unit_thawed(Unit *u);
 
-int unit_freeze_vtable_common(Unit *u);
-int unit_thaw_vtable_common(Unit *u);
-
 Condition *unit_find_failed_condition(Unit *u);
 
 int unit_arm_timer(Unit *u, sd_event_source **source, bool relative, usec_t usec, sd_event_time_handler_t handler);
 
 int unit_compare_priority(Unit *a, Unit *b);
 
+UnitMountDependencyType unit_mount_dependency_type_from_string(const char *s) _const_;
+const char* unit_mount_dependency_type_to_string(UnitMountDependencyType t) _const_;
+UnitDependency unit_mount_dependency_type_to_dependency_type(UnitMountDependencyType t) _pure_;
+
 /* Macros which append UNIT= or USER_UNIT= to the message */
 
 #define log_unit_full_errno_zerook(unit, level, error, ...)             \
-- 
cgit v1.2.3