From: Luca Boccassi Date: Thu, 13 Aug 2020 14:01:34 +0100 Subject: machine/basic: factor out helper function to add airlocked mount to namespace (cherry picked from commit 6af52c3a458691b016bedeba34c1e72294a67c81) --- src/machine/machine-dbus.c | 214 ++------------------------------------------ src/shared/mount-util.c | 217 +++++++++++++++++++++++++++++++++++++++++++++ src/shared/mount-util.h | 2 + 3 files changed, 227 insertions(+), 206 deletions(-) diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index 1105008..3c8f4fd 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -810,17 +810,9 @@ int bus_machine_method_open_shell(sd_bus_message *message, void *userdata, sd_bu } int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error) { - _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; - char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p; - bool mount_slave_created = false, mount_slave_mounted = false, - mount_tmp_created = false, mount_tmp_mounted = false, - mount_outside_created = false, mount_outside_mounted = false; - _cleanup_free_ char *chased_src = NULL; int read_only, make_file_or_directory; - const char *dest, *src; + const char *dest, *src, *propagate_directory; Machine *m = userdata; - struct stat st; - pid_t child; uid_t uid; int r; @@ -862,205 +854,15 @@ int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bu if (uid != 0) return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Can't bind mount on container with user namespacing applied."); - /* One day, when bind mounting /proc/self/fd/n works across - * namespace boundaries we should rework this logic to make - * use of it... */ - - p = strjoina("/run/systemd/nspawn/propagate/", m->name, "/"); - if (laccess(p, F_OK) < 0) - return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Container does not allow propagation of mount points."); - - r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, &chased_src, NULL); + propagate_directory = strjoina("/run/systemd/nspawn/propagate/", m->name); + r = bind_mount_in_namespace(m->leader, + propagate_directory, + "/run/host/incoming/", + src, dest, read_only, make_file_or_directory); if (r < 0) - return sd_bus_error_set_errnof(error, r, "Failed to resolve source path: %m"); - - if (lstat(chased_src, &st) < 0) - return sd_bus_error_set_errnof(error, errno, "Failed to stat() source path: %m"); - if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */ - return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Source directory can't be a symbolic link"); - - /* Our goal is to install a new bind mount into the container, - possibly read-only. This is irritatingly complex - unfortunately, currently. - - First, we start by creating a private playground in /tmp, - that we can mount MS_SLAVE. (Which is necessary, since - MS_MOVE cannot be applied to mounts with MS_SHARED parent - mounts.) */ - - if (!mkdtemp(mount_slave)) - return sd_bus_error_set_errnof(error, errno, "Failed to create playground %s: %m", mount_slave); - - mount_slave_created = true; - - r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to make bind mount %s: %m", mount_slave); - goto finish; - } - - mount_slave_mounted = true; - - r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to remount slave %s: %m", mount_slave); - goto finish; - } - - /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */ - mount_tmp = strjoina(mount_slave, "/mount"); - r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to create temporary mount point %s: %m", mount_tmp); - goto finish; - } - - mount_tmp_created = true; - - r = mount_nofollow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to mount %s: %m", chased_src); - goto finish; - } - - mount_tmp_mounted = true; - - /* Third, we remount the new bind mount read-only if requested. */ - if (read_only) { - r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to remount read-only %s: %m", mount_tmp); - goto finish; - } - } - - /* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only - * right-away. */ - - mount_outside = strjoina("/run/systemd/nspawn/propagate/", m->name, "/XXXXXX"); - if (S_ISDIR(st.st_mode)) - r = mkdtemp(mount_outside) ? 0 : -errno; - else { - r = mkostemp_safe(mount_outside); - safe_close(r); - } - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Cannot create propagation file or directory %s: %m", mount_outside); - goto finish; - } - - mount_outside_created = true; - - r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to move %s to %s: %m", mount_tmp, mount_outside); - goto finish; - } - - mount_outside_mounted = true; - mount_tmp_mounted = false; - - if (S_ISDIR(st.st_mode)) - (void) rmdir(mount_tmp); - else - (void) unlink(mount_tmp); - mount_tmp_created = false; - - (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); - mount_slave_mounted = false; - - (void) rmdir(mount_slave); - mount_slave_created = false; - - if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) { - r = sd_bus_error_set_errnof(error, errno, "Failed to create pipe: %m"); - goto finish; - } - - r = safe_fork("(sd-bindmnt)", FORK_RESET_SIGNALS, &child); - if (r < 0) { - sd_bus_error_set_errnof(error, r, "Failed to fork(): %m"); - goto finish; - } - if (r == 0) { - const char *mount_inside, *q; - int mntfd; - - errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); - - q = procfs_file_alloca(m->leader, "ns/mnt"); - mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC); - if (mntfd < 0) { - r = log_error_errno(errno, "Failed to open mount namespace of leader: %m"); - goto child_fail; - } - - if (setns(mntfd, CLONE_NEWNS) < 0) { - r = log_error_errno(errno, "Failed to join namespace of leader: %m"); - goto child_fail; - } - - if (make_file_or_directory) { - (void) mkdir_parents(dest, 0755); - (void) make_mount_point_inode_from_stat(&st, dest, 0700); - } - - mount_inside = strjoina("/run/host/incoming/", basename(mount_outside)); - r = mount_nofollow_verbose(LOG_ERR, mount_inside, dest, NULL, MS_MOVE, NULL); - if (r < 0) - goto child_fail; - - _exit(EXIT_SUCCESS); - - child_fail: - (void) write(errno_pipe_fd[1], &r, sizeof(r)); - errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); - - _exit(EXIT_FAILURE); - } - - errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + return sd_bus_error_set_errnof(error, r, "Failed to mount %s on %s in machine's namespace: %m", src, dest); - r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0); - if (r < 0) { - r = sd_bus_error_set_errnof(error, r, "Failed to wait for child: %m"); - goto finish; - } - if (r != EXIT_SUCCESS) { - if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r)) - r = sd_bus_error_set_errnof(error, r, "Failed to mount: %m"); - else - r = sd_bus_error_setf(error, SD_BUS_ERROR_FAILED, "Child failed."); - goto finish; - } - - r = sd_bus_reply_method_return(message, NULL); - -finish: - if (mount_outside_mounted) - (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW); - if (mount_outside_created) { - if (S_ISDIR(st.st_mode)) - (void) rmdir(mount_outside); - else - (void) unlink(mount_outside); - } - - if (mount_tmp_mounted) - (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW); - if (mount_tmp_created) { - if (S_ISDIR(st.st_mode)) - (void) rmdir(mount_tmp); - else - (void) unlink(mount_tmp); - } - - if (mount_slave_mounted) - (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); - if (mount_slave_created) - (void) rmdir(mount_slave); - - return r; + return sd_bus_reply_method_return(message, NULL); } int bus_machine_method_copy(sd_bus_message *message, void *userdata, sd_bus_error *error) { diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index b19b384..4cfbb55 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -14,15 +14,18 @@ #include "fs-util.h" #include "hashmap.h" #include "libmount-util.h" +#include "mkdir.h" #include "mount-util.h" #include "mountpoint-util.h" #include "parse-util.h" #include "path-util.h" +#include "process-util.h" #include "set.h" #include "stat-util.h" #include "stdio-util.h" #include "string-util.h" #include "strv.h" +#include "tmpfile-util.h" int mount_fd(const char *source, int target_fd, @@ -742,3 +745,217 @@ int mount_option_mangle( return 0; } + +int bind_mount_in_namespace( + pid_t target, + const char *propagate_path, + const char *incoming_path, + const char *src, + const char *dest, + bool read_only, + bool make_file_or_directory) { + + _cleanup_close_pair_ int errno_pipe_fd[2] = { -1, -1 }; + char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p; + bool mount_slave_created = false, mount_slave_mounted = false, + mount_tmp_created = false, mount_tmp_mounted = false, + mount_outside_created = false, mount_outside_mounted = false; + _cleanup_free_ char *chased_src = NULL; + struct stat st; + pid_t child; + int r; + + assert(target > 0); + assert(propagate_path); + assert(incoming_path); + assert(src); + assert(dest); + + /* One day, when bind mounting /proc/self/fd/n works across + * namespace boundaries we should rework this logic to make + * use of it... */ + + p = strjoina(propagate_path, "/"); + r = laccess(p, F_OK); + if (r < 0) + return log_debug_errno(r == -ENOENT ? SYNTHETIC_ERRNO(EOPNOTSUPP) : r, "Target does not allow propagation of mount points"); + + r = chase_symlinks(src, NULL, CHASE_TRAIL_SLASH, &chased_src, NULL); + if (r < 0) + return log_debug_errno(r, "Failed to resolve source path of %s: %m", src); + + if (lstat(chased_src, &st) < 0) + return log_debug_errno(errno, "Failed to stat() resolved source path %s: %m", chased_src); + if (S_ISLNK(st.st_mode)) /* This shouldn't really happen, given that we just chased the symlinks above, but let's better be safe… */ + return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Source directory %s can't be a symbolic link", chased_src); + + /* Our goal is to install a new bind mount into the container, + possibly read-only. This is irritatingly complex + unfortunately, currently. + + First, we start by creating a private playground in /tmp, + that we can mount MS_SLAVE. (Which is necessary, since + MS_MOVE cannot be applied to mounts with MS_SHARED parent + mounts.) */ + + if (!mkdtemp(mount_slave)) + return log_debug_errno(errno, "Failed to create playground %s: %m", mount_slave); + + mount_slave_created = true; + + r = mount_nofollow_verbose(LOG_DEBUG, mount_slave, mount_slave, NULL, MS_BIND, NULL); + if (r < 0) + goto finish; + + mount_slave_mounted = true; + + r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_slave, NULL, MS_SLAVE, NULL); + if (r < 0) + goto finish; + + /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */ + mount_tmp = strjoina(mount_slave, "/mount"); + r = make_mount_point_inode_from_stat(&st, mount_tmp, 0700); + if (r < 0) { + log_debug_errno(r, "Failed to create temporary mount point %s: %m", mount_tmp); + goto finish; + } + + mount_tmp_created = true; + + r = mount_nofollow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL); + if (r < 0) + goto finish; + + mount_tmp_mounted = true; + + /* Third, we remount the new bind mount read-only if requested. */ + if (read_only) { + r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); + if (r < 0) + goto finish; + } + + /* Fourth, we move the new bind mount into the propagation directory. This way it will appear there read-only + * right-away. */ + + mount_outside = strjoina(propagate_path, "/XXXXXX"); + if (S_ISDIR(st.st_mode)) + r = mkdtemp(mount_outside) ? 0 : -errno; + else { + r = mkostemp_safe(mount_outside); + safe_close(r); + } + if (r < 0) { + log_debug_errno(r, "Cannot create propagation file or directory %s: %m", mount_outside); + goto finish; + } + + mount_outside_created = true; + + r = mount_nofollow_verbose(LOG_DEBUG, mount_tmp, mount_outside, NULL, MS_MOVE, NULL); + if (r < 0) + goto finish; + + mount_outside_mounted = true; + mount_tmp_mounted = false; + + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_tmp); + else + (void) unlink(mount_tmp); + mount_tmp_created = false; + + (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); + mount_slave_mounted = false; + + (void) rmdir(mount_slave); + mount_slave_created = false; + + if (pipe2(errno_pipe_fd, O_CLOEXEC|O_NONBLOCK) < 0) { + log_debug_errno(errno, "Failed to create pipe: %m"); + goto finish; + } + + r = safe_fork("(sd-bindmnt)", FORK_RESET_SIGNALS, &child); + if (r < 0) + goto finish; + if (r == 0) { + const char *mount_inside, *q; + int mntfd; + + errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); + + q = procfs_file_alloca(target, "ns/mnt"); + mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (mntfd < 0) { + r = log_error_errno(errno, "Failed to open mount namespace of leader: %m"); + goto child_fail; + } + + if (setns(mntfd, CLONE_NEWNS) < 0) { + r = log_error_errno(errno, "Failed to join namespace of leader: %m"); + goto child_fail; + } + + if (make_file_or_directory) { + (void) mkdir_parents(dest, 0755); + (void) make_mount_point_inode_from_stat(&st, dest, 0700); + } + + /* Fifth, move the mount to the right place inside */ + mount_inside = strjoina(incoming_path, basename(mount_outside)); + r = mount_nofollow_verbose(LOG_ERR, mount_inside, dest, NULL, MS_MOVE, NULL); + if (r < 0) + goto child_fail; + + _exit(EXIT_SUCCESS); + + child_fail: + (void) write(errno_pipe_fd[1], &r, sizeof(r)); + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + _exit(EXIT_FAILURE); + } + + errno_pipe_fd[1] = safe_close(errno_pipe_fd[1]); + + r = wait_for_terminate_and_check("(sd-bindmnt)", child, 0); + if (r < 0) { + log_debug_errno(r, "Failed to wait for child: %m"); + goto finish; + } + if (r != EXIT_SUCCESS) { + if (read(errno_pipe_fd[0], &r, sizeof(r)) == sizeof(r)) + log_debug_errno(r, "Failed to mount: %m"); + else + log_debug("Child failed."); + goto finish; + } + +finish: + if (mount_outside_mounted) + (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW); + if (mount_outside_created) { + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_outside); + else + (void) unlink(mount_outside); + } + + if (mount_tmp_mounted) + (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW); + if (mount_tmp_created) { + if (S_ISDIR(st.st_mode)) + (void) rmdir(mount_tmp); + else + (void) unlink(mount_tmp); + } + + if (mount_slave_mounted) + (void) umount_verbose(LOG_DEBUG, mount_slave, UMOUNT_NOFOLLOW); + if (mount_slave_created) + (void) rmdir(mount_slave); + + return r; +} diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 6202008..c3500a0 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -97,3 +97,5 @@ static inline char* umount_and_rmdir_and_free(char *p) { return NULL; } DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_rmdir_and_free); + +int bind_mount_in_namespace(pid_t target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, bool read_only, bool make_file_or_directory);