From efeb864cb547a2cbf96dc0053a8bdb4d9190b364 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 12 Jun 2024 05:50:45 +0200 Subject: Merging upstream version 256. Signed-off-by: Daniel Baumann --- src/nspawn/nspawn-mount.c | 102 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 30 deletions(-) (limited to 'src/nspawn/nspawn-mount.c') diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 470f477..c2bd4f6 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -245,7 +245,7 @@ int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) assert(l); assert(n); - r = extract_many_words(&s, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL); + r = extract_many_words(&s, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination); if (r < 0) return r; if (r == 0) @@ -444,22 +444,38 @@ int tmpfs_patch_options( } int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { - const char *full, *top; - int r; + _cleanup_free_ char *top = NULL, *full = NULL;; unsigned long extra_flags = 0; + int r; - top = prefix_roota(dest, "/sys"); - r = path_is_fs_type(top, SYSFS_MAGIC); + top = path_join(dest, "/sys"); + if (!top) + return log_oom(); + + r = path_is_mount_point(top); if (r < 0) - return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top); - /* /sys might already be mounted as sysfs by the outer child in the - * !netns case. In this case, it's all good. Don't touch it because we - * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555. - */ - if (r > 0) - return 0; + return log_error_errno(r, "Failed to determine if '%s' is a mountpoint: %m", top); + if (r == 0) { + /* If this is not a mount point yet, then mount a tmpfs there */ + r = mount_nofollow_verbose(LOG_ERR, "tmpfs", top, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, "mode=0555" TMPFS_LIMITS_SYS); + if (r < 0) + return r; + } else { + r = path_is_fs_type(top, SYSFS_MAGIC); + if (r < 0) + return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top); + + /* /sys/ might already be mounted as sysfs by the outer child in the !netns case. In this case, it's + * all good. Don't touch it because we don't have the right to do so, see + * https://github.com/systemd/systemd/issues/1555. + */ + if (r > 0) + return 0; + } - full = prefix_roota(top, "/full"); + full = path_join(top, "/full"); + if (!full) + return log_oom(); (void) mkdir(full, 0755); @@ -501,10 +517,11 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings) { if (rmdir(full) < 0) return log_error_errno(errno, "Failed to remove %s: %m", full); - /* Create mountpoint for cgroups. Otherwise we are not allowed since we - * remount /sys read-only. - */ - const char *x = prefix_roota(top, "/fs/cgroup"); + /* Create mountpoint for cgroups. Otherwise we are not allowed since we remount /sys/ read-only. */ + _cleanup_free_ char *x = path_join(top, "/fs/cgroup"); + if (!x) + return log_oom(); + (void) mkdir_p(x, 0755); return mount_nofollow_verbose(LOG_ERR, NULL, top, NULL, @@ -541,7 +558,7 @@ int mount_all(const char *dest, } MountPoint; static const MountPoint mount_table[] = { - /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing) */ + /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing when we are privileged) */ { "proc", "/proc", "proc", NULL, PROC_DEFAULT_MOUNT_FLAGS, MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_MKDIR|MOUNT_FOLLOW_SYMLINKS }, /* we follow symlinks here since not following them requires /proc/ already being mounted, which we don't have here. */ @@ -575,15 +592,15 @@ int mount_all(const char *dest, { "mqueue", "/dev/mqueue", "mqueue", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, MOUNT_IN_USERNS|MOUNT_MKDIR }, - /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */ + /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing when we are privileged) */ { "tmpfs", "/tmp", "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME, MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP|MOUNT_MKDIR }, { "tmpfs", "/sys", "tmpfs", "mode=0555" TMPFS_LIMITS_SYS, MS_NOSUID|MS_NOEXEC|MS_NODEV, - MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR }, + MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS|MOUNT_MKDIR|MOUNT_PRIVILEGED }, { "sysfs", "/sys", "sysfs", NULL, SYS_DEFAULT_MOUNT_FLAGS, - MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR }, /* skipped if above was mounted */ + MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO|MOUNT_MKDIR|MOUNT_PRIVILEGED }, /* skipped if above was mounted */ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, - MOUNT_FATAL|MOUNT_MKDIR }, /* skipped if above was mounted */ + MOUNT_FATAL|MOUNT_MKDIR|MOUNT_PRIVILEGED }, /* skipped if above was mounted */ { "tmpfs", "/dev", "tmpfs", "mode=0755" TMPFS_LIMITS_PRIVATE_DEV, MS_NOSUID|MS_STRICTATIME, MOUNT_FATAL|MOUNT_MKDIR }, { "tmpfs", "/dev/shm", "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS, MS_NOSUID|MS_NODEV|MS_STRICTATIME, @@ -604,11 +621,11 @@ int mount_all(const char *dest, MOUNT_FATAL|MOUNT_IN_USERNS }, #if HAVE_SELINUX { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, - MOUNT_MKDIR }, /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */ + MOUNT_MKDIR|MOUNT_PRIVILEGED }, /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */ { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, - 0 }, /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */ + MOUNT_PRIVILEGED }, /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */ { NULL, "/sys/fs/selinux", NULL, NULL, MS_PRIVATE, - 0 }, /* Turn off propagation (we only want that for the mount propagation tunnel dir) */ + MOUNT_PRIVILEGED }, /* Turn off propagation (we only want that for the mount propagation tunnel dir) */ #endif }; @@ -617,6 +634,7 @@ int mount_all(const char *dest, bool ro = FLAGS_SET(mount_settings, MOUNT_APPLY_APIVFS_RO); bool in_userns = FLAGS_SET(mount_settings, MOUNT_IN_USERNS); bool tmpfs_tmp = FLAGS_SET(mount_settings, MOUNT_APPLY_TMPFS_TMP); + bool privileged = FLAGS_SET(mount_settings, MOUNT_PRIVILEGED); int r; for (size_t k = 0; k < ELEMENTSOF(mount_table); k++) { @@ -624,6 +642,10 @@ int mount_all(const char *dest, bool fatal = FLAGS_SET(mount_table[k].mount_settings, MOUNT_FATAL); const char *o; + /* If we are not privileged but the entry is marked as privileged and to be mounted outside the user namespace, then skip it */ + if (!privileged && FLAGS_SET(mount_table[k].mount_settings, MOUNT_PRIVILEGED) && !FLAGS_SET(mount_table[k].mount_settings, MOUNT_IN_USERNS)) + continue; + if (in_userns != FLAGS_SET(mount_table[k].mount_settings, MOUNT_IN_USERNS)) continue; @@ -642,7 +664,7 @@ int mount_all(const char *dest, /* Skip this entry if it is not a remount. */ if (mount_table[k].what) { - r = path_is_mount_point(where, NULL, 0); + r = path_is_mount_point(where); if (r < 0 && r != -ENOENT) return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where); if (r > 0) @@ -742,6 +764,8 @@ static int parse_mount_bind_options(const char *options, unsigned long *mount_fl new_idmapping = REMOUNT_IDMAPPING_NONE; else if (streq(word, "rootidmap")) new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER; + else if (streq(word, "owneridmap")) + new_idmapping = REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER; else return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid bind mount option: %s", word); @@ -759,6 +783,7 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u _cleanup_free_ char *mount_opts = NULL, *where = NULL; unsigned long mount_flags = MS_BIND | MS_REC; struct stat source_st, dest_st; + uid_t dest_uid = UID_INVALID; int r; RemountIdmapping idmapping = REMOUNT_IDMAPPING_NONE; @@ -787,6 +812,8 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u if (stat(where, &dest_st) < 0) return log_error_errno(errno, "Failed to stat %s: %m", where); + dest_uid = dest_st.st_uid; + if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot bind mount directory %s on file %s.", @@ -815,6 +842,8 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u if (chown(where, uid_shift, uid_shift) < 0) return log_error_errno(errno, "Failed to chown %s: %m", where); + + dest_uid = uid_shift; } r = mount_nofollow_verbose(LOG_ERR, m->source, where, NULL, mount_flags, mount_opts); @@ -828,7 +857,7 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u } if (idmapping != REMOUNT_IDMAPPING_NONE) { - r = remount_idmap(STRV_MAKE(where), uid_shift, uid_range, source_st.st_uid, idmapping); + r = remount_idmap(STRV_MAKE(where), uid_shift, uid_range, source_st.st_uid, dest_uid, idmapping); if (r < 0) return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where); } @@ -1388,17 +1417,30 @@ int wipe_fully_visible_fs(int mntns_fd) { _cleanup_close_ int orig_mntns_fd = -EBADF; int r, rr; - r = namespace_open(0, NULL, &orig_mntns_fd, NULL, NULL, NULL); + r = namespace_open(0, + /* ret_pidns_fd = */ NULL, + &orig_mntns_fd, + /* ret_netns_fd = */ NULL, + /* ret_userns_fd = */ NULL, + /* ret_root_fd = */ NULL); if (r < 0) return log_error_errno(r, "Failed to pin originating mount namespace: %m"); - r = namespace_enter(-EBADF, mntns_fd, -EBADF, -EBADF, -EBADF); + r = namespace_enter(/* pidns_fd = */ -EBADF, + mntns_fd, + /* netns_fd = */ -EBADF, + /* userns_fd = */ -EBADF, + /* root_fd = */ -EBADF); if (r < 0) return log_error_errno(r, "Failed to enter mount namespace: %m"); rr = do_wipe_fully_visible_fs(); - r = namespace_enter(-EBADF, orig_mntns_fd, -EBADF, -EBADF, -EBADF); + r = namespace_enter(/* pidns_fd = */ -EBADF, + orig_mntns_fd, + /* netns_fd = */ -EBADF, + /* userns_fd = */ -EBADF, + /* root_fd = */ -EBADF); if (r < 0) return log_error_errno(r, "Failed to enter original mount namespace: %m"); -- cgit v1.2.3