1 files changed, 145 insertions, 140 deletions
diff --git a/src/shared/mount-setup.c b/src/shared/mount-setup.c
index 1226ca1..ba291bd 100644
--- a/src/shared/mount-setup.c
+++ b/src/shared/mount-setup.c
@@ -69,7 +69,7 @@ static bool check_recursiveprot_supported(void) {
 
         r = mount_option_supported("cgroup2", "memory_recursiveprot", NULL);
         if (r < 0)
-                log_debug_errno(r, "Failed to determiner whether the 'memory_recursiveprot' mount option is supported, assuming not: %m");
+                log_debug_errno(r, "Failed to determine whether the 'memory_recursiveprot' mount option is supported, assuming not: %m");
         else if (r == 0)
                 log_debug("This kernel version does not support 'memory_recursiveprot', not using mount option.");
 
@@ -107,16 +107,6 @@ static const MountPoint mount_table[] = {
           cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
         { "cgroup2",     "/sys/fs/cgroup",            "cgroup2",    NULL,                                       MS_NOSUID|MS_NOEXEC|MS_NODEV,
           cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
-        { "tmpfs",       "/sys/fs/cgroup",            "tmpfs",      "mode=0755" TMPFS_LIMITS_SYS_FS_CGROUP,     MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
-          cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
-        { "cgroup2",     "/sys/fs/cgroup/unified",    "cgroup2",    "nsdelegate",                               MS_NOSUID|MS_NOEXEC|MS_NODEV,
-          cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
-        { "cgroup2",     "/sys/fs/cgroup/unified",    "cgroup2",    NULL,                                       MS_NOSUID|MS_NOEXEC|MS_NODEV,
-          cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
-        { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd,xattr",                  MS_NOSUID|MS_NOEXEC|MS_NODEV,
-          cg_is_legacy_wanted, MNT_IN_CONTAINER     },
-        { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd",                        MS_NOSUID|MS_NOEXEC|MS_NODEV,
-          cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
 #if ENABLE_PSTORE
         { "pstore",      "/sys/fs/pstore",            "pstore",     NULL,                                       MS_NOSUID|MS_NOEXEC|MS_NODEV,
           NULL,          MNT_NONE                   },
@@ -135,8 +125,8 @@ bool mount_point_is_api(const char *path) {
         /* Checks if this mount point is considered "API", and hence
          * should be ignored */
 
-        for (size_t i = 0; i < ELEMENTSOF(mount_table); i ++)
-                if (path_equal(path, mount_table[i].where))
+        FOREACH_ELEMENT(i, mount_table)
+                if (path_equal(path, i->where))
                         return true;
 
         return path_startswith(path, "/sys/fs/cgroup/");
@@ -167,8 +157,11 @@ static int mount_one(const MountPoint *p, bool relabel) {
         int r, priority;
 
         assert(p);
+        assert(p->what);
+        assert(p->where);
+        assert(p->type);
 
-        priority = (p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG;
+        priority = FLAGS_SET(p->mode, MNT_FATAL) ? LOG_ERR : LOG_DEBUG;
 
         if (p->condition_fn && !p->condition_fn())
                 return 0;
@@ -177,16 +170,16 @@ static int mount_one(const MountPoint *p, bool relabel) {
         if (relabel)
                 (void) label_fix(p->where, LABEL_IGNORE_ENOENT|LABEL_IGNORE_EROFS);
 
-        r = path_is_mount_point(p->where, NULL, AT_SYMLINK_FOLLOW);
+        r = path_is_mount_point_full(p->where, /* root = */ NULL, AT_SYMLINK_FOLLOW);
         if (r < 0 && r != -ENOENT) {
                 log_full_errno(priority, r, "Failed to determine whether %s is a mount point: %m", p->where);
-                return (p->mode & MNT_FATAL) ? r : 0;
+                return FLAGS_SET(p->mode, MNT_FATAL) ? r : 0;
         }
         if (r > 0)
                 return 0;
 
         /* Skip securityfs in a container */
-        if (!(p->mode & MNT_IN_CONTAINER) && detect_container() > 0)
+        if (!FLAGS_SET(p->mode, MNT_IN_CONTAINER) && detect_container() > 0)
                 return 0;
 
         /* The access mode here doesn't really matter too much, since
@@ -202,44 +195,37 @@ static int mount_one(const MountPoint *p, bool relabel) {
                   p->type,
                   strna(p->options));
 
-        if (FLAGS_SET(p->mode, MNT_FOLLOW_SYMLINK))
-                r = mount_follow_verbose(priority, p->what, p->where, p->type, p->flags, p->options);
-        else
-                r = mount_nofollow_verbose(priority, p->what, p->where, p->type, p->flags, p->options);
+        r = mount_verbose_full(priority, p->what, p->where, p->type, p->flags, p->options, FLAGS_SET(p->mode, MNT_FOLLOW_SYMLINK));
         if (r < 0)
-                return (p->mode & MNT_FATAL) ? r : 0;
+                return FLAGS_SET(p->mode, MNT_FATAL) ? r : 0;
 
         /* Relabel again, since we now mounted something fresh here */
         if (relabel)
                 (void) label_fix(p->where, 0);
 
-        if (p->mode & MNT_CHECK_WRITABLE) {
+        if (FLAGS_SET(p->mode, MNT_CHECK_WRITABLE))
                 if (access(p->where, W_OK) < 0) {
                         r = -errno;
 
                         (void) umount2(p->where, UMOUNT_NOFOLLOW);
                         (void) rmdir(p->where);
 
-                        log_full_errno(priority, r, "Mount point %s not writable after mounting, undoing: %m", p->where);
-                        return (p->mode & MNT_FATAL) ? r : 0;
+                        log_full_errno(priority, r, "Mount point '%s' not writable after mounting, undoing: %m", p->where);
+                        return FLAGS_SET(p->mode, MNT_FATAL) ? r : 0;
                 }
-        }
 
         return 1;
 }
 
 static int mount_points_setup(size_t n, bool loaded_policy) {
-        int ret = 0, r;
+        int r = 0;
 
         assert(n <= ELEMENTSOF(mount_table));
 
-        FOREACH_ARRAY(mp, mount_table, n) {
-                r = mount_one(mp, loaded_policy);
-                if (r != 0 && ret >= 0)
-                        ret = r;
-        }
+        FOREACH_ARRAY(mp, mount_table, n)
+                RET_GATHER(r, mount_one(mp, loaded_policy));
 
-        return ret;
+        return r;
 }
 
 int mount_setup_early(void) {
@@ -297,81 +283,6 @@ static int symlink_controller(const char *target, const char *alias) {
         return 0;
 }
 
-int mount_cgroup_controllers(void) {
-        _cleanup_set_free_ Set *controllers = NULL;
-        int r;
-
-        if (!cg_is_legacy_wanted())
-                return 0;
-
-        /* Mount all available cgroup controllers that are built into the kernel. */
-        r = cg_kernel_controllers(&controllers);
-        if (r < 0)
-                return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
-
-        for (;;) {
-                _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
-                const char *other_controller;
-                MountPoint p = {
-                        .what = "cgroup",
-                        .type = "cgroup",
-                        .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
-                        .mode = MNT_IN_CONTAINER,
-                };
-
-                controller = set_steal_first(controllers);
-                if (!controller)
-                        break;
-
-                /* Check if we shall mount this together with another controller */
-                other_controller = join_with(controller);
-                if (other_controller) {
-                        _cleanup_free_ char *c = NULL;
-
-                        /* Check if the other controller is actually available in the kernel too */
-                        c = set_remove(controllers, other_controller);
-                        if (c) {
-
-                                /* Join the two controllers into one string, and maintain a stable ordering */
-                                if (strcmp(controller, other_controller) < 0)
-                                        options = strjoin(controller, ",", other_controller);
-                                else
-                                        options = strjoin(other_controller, ",", controller);
-                                if (!options)
-                                        return log_oom();
-                        }
-                }
-
-                /* The simple case, where there's only one controller to mount together */
-                if (!options)
-                        options = TAKE_PTR(controller);
-
-                where = path_join("/sys/fs/cgroup", options);
-                if (!where)
-                        return log_oom();
-
-                p.where = where;
-                p.options = options;
-
-                r = mount_one(&p, true);
-                if (r < 0)
-                        return r;
-
-                /* Create symlinks from the individual controller names, in case we have a joined mount */
-                if (controller)
-                        (void) symlink_controller(options, controller);
-                if (other_controller)
-                        (void) symlink_controller(options, other_controller);
-        }
-
-        /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */
-        (void) mount_nofollow("tmpfs", "/sys/fs/cgroup", "tmpfs",
-                              MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY,
-                              "mode=0755" TMPFS_LIMITS_SYS_FS_CGROUP);
-
-        return 0;
-}
-
 #if HAVE_SELINUX || ENABLE_SMACK
 static int relabel_cb(
                 RecurseDirEvent event,
@@ -415,34 +326,6 @@ static int relabel_tree(const char *path) {
         return r;
 }
 
-static int relabel_cgroup_filesystems(void) {
-        int r;
-        struct statfs st;
-
-        r = cg_all_unified();
-        if (r == 0) {
-                /* Temporarily remount the root cgroup filesystem to give it a proper label. Do this
-                   only when the filesystem has been already populated by a previous instance of systemd
-                   running from initrd. Otherwise don't remount anything and leave the filesystem read-write
-                   for the cgroup filesystems to be mounted inside. */
-                if (statfs("/sys/fs/cgroup", &st) < 0)
-                        return log_error_errno(errno, "Failed to determine mount flags for /sys/fs/cgroup: %m");
-
-                if (st.f_flags & ST_RDONLY)
-                        (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT, NULL);
-
-                (void) label_fix("/sys/fs/cgroup", 0);
-                (void) relabel_tree("/sys/fs/cgroup");
-
-                if (st.f_flags & ST_RDONLY)
-                        (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT|MS_RDONLY, NULL);
-
-        } else if (r < 0)
-                return log_error_errno(r, "Failed to determine whether we are in all unified mode: %m");
-
-        return 0;
-}
-
 static int relabel_extra(void) {
         _cleanup_strv_free_ char **files = NULL;
         int r, c = 0;
@@ -533,14 +416,12 @@ int mount_setup(bool loaded_policy, bool leave_propagation) {
                 FOREACH_STRING(i, "/dev", "/dev/shm", "/run")
                         (void) relabel_tree(i);
 
-                (void) relabel_cgroup_filesystems();
-
                 n_extra = relabel_extra();
 
                 after_relabel = now(CLOCK_MONOTONIC);
 
-                log_info("Relabeled /dev, /dev/shm, /run, /sys/fs/cgroup%s in %s.",
-                         n_extra > 0 ? ", additional files" : "",
+                log_info("Relabeled /dev/, /dev/shm/, /run/%s in %s.",
+                         n_extra > 0 ? ", and additional files" : "",
                          FORMAT_TIMESPAN(after_relabel - before_relabel, 0));
         }
 #endif
@@ -589,3 +470,127 @@ int mount_setup(bool loaded_policy, bool leave_propagation) {
 
         return 0;
 }
+
+static const MountPoint cgroupv1_mount_table[] = {
+        { "tmpfs",       "/sys/fs/cgroup",            "tmpfs",      "mode=0755" TMPFS_LIMITS_SYS_FS_CGROUP,     MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
+          cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+        { "cgroup2",     "/sys/fs/cgroup/unified",    "cgroup2",    "nsdelegate",                               MS_NOSUID|MS_NOEXEC|MS_NODEV,
+          cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+        { "cgroup2",     "/sys/fs/cgroup/unified",    "cgroup2",    NULL,                                       MS_NOSUID|MS_NOEXEC|MS_NODEV,
+          cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE },
+        { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd,xattr",                  MS_NOSUID|MS_NOEXEC|MS_NODEV,
+          cg_is_legacy_wanted, MNT_IN_CONTAINER     },
+        { "cgroup",      "/sys/fs/cgroup/systemd",    "cgroup",     "none,name=systemd",                        MS_NOSUID|MS_NOEXEC|MS_NODEV,
+          cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER },
+};
+
+static void relabel_cgroup_legacy_hierarchy(void) {
+#if HAVE_SELINUX || ENABLE_SMACK
+        struct statfs st;
+
+        assert(cg_is_legacy_wanted());
+
+        /* Temporarily remount the root cgroup filesystem to give it a proper label. Do this
+           only when the filesystem has been already populated by a previous instance of systemd
+           running from initrd. Otherwise don't remount anything and leave the filesystem read-write
+           for the cgroup filesystems to be mounted inside. */
+        if (statfs("/sys/fs/cgroup", &st) < 0)
+                return (void) log_error_errno(errno, "Failed to determine mount flags for /sys/fs/cgroup/: %m");
+
+        if (st.f_flags & ST_RDONLY)
+                (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT, NULL);
+
+        (void) label_fix("/sys/fs/cgroup", 0);
+        (void) relabel_tree("/sys/fs/cgroup");
+
+        if (st.f_flags & ST_RDONLY)
+                (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT|MS_RDONLY, NULL);
+#endif
+}
+
+int mount_cgroup_legacy_controllers(bool loaded_policy) {
+        _cleanup_set_free_ Set *controllers = NULL;
+        int r;
+
+        if (!cg_is_legacy_wanted())
+                return 0;
+
+        if (!cg_is_legacy_force_enabled())
+                return -ERFKILL;
+
+        FOREACH_ELEMENT(mp, cgroupv1_mount_table) {
+                r = mount_one(mp, loaded_policy);
+                if (r < 0)
+                        return r;
+        }
+
+        if (loaded_policy)
+                relabel_cgroup_legacy_hierarchy();
+
+        /* Mount all available cgroup controllers that are built into the kernel. */
+        r = cg_kernel_controllers(&controllers);
+        if (r < 0)
+                return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
+
+        for (;;) {
+                _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
+                const char *other_controller;
+                MountPoint p = {
+                        .what = "cgroup",
+                        .type = "cgroup",
+                        .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
+                        .mode = MNT_IN_CONTAINER,
+                };
+
+                controller = set_steal_first(controllers);
+                if (!controller)
+                        break;
+
+                /* Check if we shall mount this together with another controller */
+                other_controller = join_with(controller);
+                if (other_controller) {
+                        _cleanup_free_ char *c = NULL;
+
+                        /* Check if the other controller is actually available in the kernel too */
+                        c = set_remove(controllers, other_controller);
+                        if (c) {
+
+                                /* Join the two controllers into one string, and maintain a stable ordering */
+                                if (strcmp(controller, other_controller) < 0)
+                                        options = strjoin(controller, ",", other_controller);
+                                else
+                                        options = strjoin(other_controller, ",", controller);
+                                if (!options)
+                                        return log_oom();
+                        }
+                }
+
+                /* The simple case, where there's only one controller to mount together */
+                if (!options)
+                        options = TAKE_PTR(controller);
+
+                where = path_join("/sys/fs/cgroup", options);
+                if (!where)
+                        return log_oom();
+
+                p.where = where;
+                p.options = options;
+
+                r = mount_one(&p, true);
+                if (r < 0)
+                        return r;
+
+                /* Create symlinks from the individual controller names, in case we have a joined mount */
+                if (controller)
+                        (void) symlink_controller(options, controller);
+                if (other_controller)
+                        (void) symlink_controller(options, other_controller);
+        }
+
+        /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */
+        (void) mount_nofollow("tmpfs", "/sys/fs/cgroup", "tmpfs",
+                              MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY,
+                              "mode=0755" TMPFS_LIMITS_SYS_FS_CGROUP);
+
+        return 1;
+}