diff options
Diffstat (limited to 'src/basic/namespace-util.c')
-rw-r--r-- | src/basic/namespace-util.c | 290 |
1 files changed, 252 insertions, 38 deletions
diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index 2101f61..5b4e43f 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -11,6 +11,7 @@ #include "missing_magic.h" #include "missing_sched.h" #include "namespace-util.h" +#include "parse-util.h" #include "process-util.h" #include "stat-util.h" #include "stdio-util.h" @@ -33,71 +34,86 @@ const struct namespace_info namespace_info[] = { #define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path) -int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { - _cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, netnsfd = -EBADF, usernsfd = -EBADF; - int rfd = -EBADF; +static NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) { + for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++) + if (((namespace_info[t].clone_flag ^ clone_flag) & (CLONE_NEWCGROUP|CLONE_NEWIPC|CLONE_NEWNET|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUSER|CLONE_NEWUTS|CLONE_NEWTIME)) == 0) + return t; + + return _NAMESPACE_TYPE_INVALID; +} + +int namespace_open( + pid_t pid, + int *ret_pidns_fd, + int *ret_mntns_fd, + int *ret_netns_fd, + int *ret_userns_fd, + int *ret_root_fd) { + + _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF, + userns_fd = -EBADF, root_fd = -EBADF; assert(pid >= 0); - if (mntns_fd) { - const char *mntns; + if (ret_pidns_fd) { + const char *pidns; - mntns = pid_namespace_path(pid, NAMESPACE_MOUNT); - mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); - if (mntnsfd < 0) + pidns = pid_namespace_path(pid, NAMESPACE_PID); + pidns_fd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (pidns_fd < 0) return -errno; } - if (pidns_fd) { - const char *pidns; + if (ret_mntns_fd) { + const char *mntns; - pidns = pid_namespace_path(pid, NAMESPACE_PID); - pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); - if (pidnsfd < 0) + mntns = pid_namespace_path(pid, NAMESPACE_MOUNT); + mntns_fd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (mntns_fd < 0) return -errno; } - if (netns_fd) { + if (ret_netns_fd) { const char *netns; netns = pid_namespace_path(pid, NAMESPACE_NET); - netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); - if (netnsfd < 0) + netns_fd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (netns_fd < 0) return -errno; } - if (userns_fd) { + if (ret_userns_fd) { const char *userns; userns = pid_namespace_path(pid, NAMESPACE_USER); - usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); - if (usernsfd < 0 && errno != ENOENT) + userns_fd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (userns_fd < 0 && errno != ENOENT) return -errno; } - if (root_fd) { + if (ret_root_fd) { const char *root; root = procfs_file_alloca(pid, "root"); - rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); - if (rfd < 0) + root_fd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + if (root_fd < 0) return -errno; } - if (pidns_fd) - *pidns_fd = TAKE_FD(pidnsfd); + if (ret_pidns_fd) + *ret_pidns_fd = TAKE_FD(pidns_fd); - if (mntns_fd) - *mntns_fd = TAKE_FD(mntnsfd); + if (ret_mntns_fd) + *ret_mntns_fd = TAKE_FD(mntns_fd); - if (netns_fd) - *netns_fd = TAKE_FD(netnsfd); + if (ret_netns_fd) + *ret_netns_fd = TAKE_FD(netns_fd); - if (userns_fd) - *userns_fd = TAKE_FD(usernsfd); + if (ret_userns_fd) + *ret_userns_fd = TAKE_FD(userns_fd); - if (root_fd) - *root_fd = TAKE_FD(rfd); + if (ret_root_fd) + *ret_root_fd = TAKE_FD(root_fd); return 0; } @@ -206,6 +222,88 @@ int detach_mount_namespace(void) { return 0; } +int detach_mount_namespace_harder(uid_t target_uid, gid_t target_gid) { + int r; + + /* Tried detach_mount_namespace() first. If that doesn't work due to permissions, opens up an + * unprivileged user namespace with a mapping of the originating UID/GID to the specified target + * UID/GID. Then, tries detach_mount_namespace() again. + * + * Or in other words: tries much harder to get a mount namespace, making use of unprivileged user + * namespaces if need be. + * + * Note that after this function completed: + * + * → if we had privs, afterwards uids/gids on files and processes are as before + * + * → if we had no privs, our own id and all our files will show up owned by target_uid/target_gid, + * and everything else owned by nobody. + * + * Yes, that's quite a difference. */ + + if (!uid_is_valid(target_uid)) + return -EINVAL; + if (!gid_is_valid(target_gid)) + return -EINVAL; + + r = detach_mount_namespace(); + if (r != -EPERM) + return r; + + if (unshare(CLONE_NEWUSER) < 0) + return log_debug_errno(errno, "Failed to acquire user namespace: %m"); + + r = write_string_filef("/proc/self/uid_map", 0, + UID_FMT " " UID_FMT " 1\n", target_uid, getuid()); + if (r < 0) + return log_debug_errno(r, "Failed to write uid map: %m"); + + r = write_string_file("/proc/self/setgroups", "deny", 0); + if (r < 0) + return log_debug_errno(r, "Failed to write setgroups file: %m"); + + r = write_string_filef("/proc/self/gid_map", 0, + GID_FMT " " GID_FMT " 1\n", target_gid, getgid()); + if (r < 0) + return log_debug_errno(r, "Failed to write gid map: %m"); + + return detach_mount_namespace(); +} + +int detach_mount_namespace_userns(int userns_fd) { + int r; + + assert(userns_fd >= 0); + + if (setns(userns_fd, CLONE_NEWUSER) < 0) + return log_debug_errno(errno, "Failed to join user namespace: %m"); + + r = reset_uid_gid(); + if (r < 0) + return log_debug_errno(r, "Failed to become root in user namespace: %m"); + + return detach_mount_namespace(); +} + +int userns_acquire_empty(void) { + _cleanup_(sigkill_waitp) pid_t pid = 0; + _cleanup_close_ int userns_fd = -EBADF; + int r; + + r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS, &pid); + if (r < 0) + return r; + if (r == 0) + /* Child. We do nothing here, just freeze until somebody kills us. */ + freeze(); + + r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL); + if (r < 0) + return log_error_errno(r, "Failed to open userns fd: %m"); + + return TAKE_FD(userns_fd); +} + int userns_acquire(const char *uid_map, const char *gid_map) { char path[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1]; _cleanup_(sigkill_waitp) pid_t pid = 0; @@ -221,7 +319,7 @@ int userns_acquire(const char *uid_map, const char *gid_map) { r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS, &pid); if (r < 0) - return r; + return log_debug_errno(r, "Failed to fork process (sd-mkuserns): %m"); if (r == 0) /* Child. We do nothing here, just freeze until somebody kills us. */ freeze(); @@ -229,19 +327,50 @@ int userns_acquire(const char *uid_map, const char *gid_map) { xsprintf(path, "/proc/" PID_FMT "/uid_map", pid); r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER); if (r < 0) - return log_error_errno(r, "Failed to write UID map: %m"); + return log_debug_errno(r, "Failed to write UID map: %m"); xsprintf(path, "/proc/" PID_FMT "/gid_map", pid); r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER); if (r < 0) - return log_error_errno(r, "Failed to write GID map: %m"); - - r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL); + return log_debug_errno(r, "Failed to write GID map: %m"); + + r = namespace_open(pid, + /* ret_pidns_fd = */ NULL, + /* ret_mntns_fd = */ NULL, + /* ret_netns_fd = */ NULL, + &userns_fd, + /* ret_root_fd = */ NULL); if (r < 0) - return log_error_errno(r, "Failed to open userns fd: %m"); + return log_debug_errno(r, "Failed to open userns fd: %m"); return TAKE_FD(userns_fd); +} +int netns_acquire(void) { + _cleanup_(sigkill_waitp) pid_t pid = 0; + _cleanup_close_ int netns_fd = -EBADF; + int r; + + /* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills + * the process again. This way we have a netns fd that is not bound to any process. */ + + r = safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS, &pid); + if (r < 0) + return log_debug_errno(r, "Failed to fork process (sd-mknetns): %m"); + if (r == 0) + /* Child. We do nothing here, just freeze until somebody kills us. */ + freeze(); + + r = namespace_open(pid, + /* ret_pidns_fd = */ NULL, + /* ret_mntns_fd = */ NULL, + &netns_fd, + /* ret_userns_fd = */ NULL, + /* ret_root_fd = */ NULL); + if (r < 0) + return log_debug_errno(r, "Failed to open netns fd: %m"); + + return TAKE_FD(netns_fd); } int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) { @@ -267,3 +396,88 @@ int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) { return stat_inode_same(&ns_st1, &ns_st2); } + +int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range) { + _cleanup_free_ char *buffer = NULL; + const char *range, *shift; + int r; + uid_t uid_shift, uid_range = 65536; + + assert(s); + + range = strchr(s, ':'); + if (range) { + buffer = strndup(s, range - s); + if (!buffer) + return log_oom(); + shift = buffer; + + range++; + r = safe_atou32(range, &uid_range); + if (r < 0) + return log_error_errno(r, "Failed to parse UID range \"%s\": %m", range); + } else + shift = s; + + r = parse_uid(shift, &uid_shift); + if (r < 0) + return log_error_errno(r, "Failed to parse UID \"%s\": %m", s); + + if (!userns_shift_range_valid(uid_shift, uid_range)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "UID range cannot be empty or go beyond " UID_FMT ".", UID_INVALID); + + if (ret_uid_shift) + *ret_uid_shift = uid_shift; + + if (ret_uid_range) + *ret_uid_range = uid_range; + + return 0; +} + +int namespace_open_by_type(NamespaceType type) { + const char *p; + int fd; + + assert(type >= 0); + assert(type < _NAMESPACE_TYPE_MAX); + + p = pid_namespace_path(0, type); + + fd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC)); + if (fd == -ENOENT && proc_mounted() == 0) + return -ENOSYS; + + return fd; +} + +int is_our_namespace(int fd, NamespaceType request_type) { + int clone_flag; + + assert(fd >= 0); + + clone_flag = ioctl(fd, NS_GET_NSTYPE); + if (clone_flag < 0) + return -errno; + + NamespaceType found_type = clone_flag_to_namespace_type(clone_flag); + if (found_type < 0) + return -EBADF; /* Uh? Unknown namespace type? */ + + if (request_type >= 0 && request_type != found_type) /* It's a namespace, but not of the right type? */ + return -EUCLEAN; + + struct stat st_fd, st_ours; + if (fstat(fd, &st_fd) < 0) + return -errno; + + const char *p = pid_namespace_path(0, found_type); + if (stat(p, &st_ours) < 0) { + if (errno == ENOENT) + return proc_mounted() == 0 ? -ENOSYS : -ENOENT; + + return -errno; + } + + return stat_inode_same(&st_ours, &st_fd); +} |