diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 19:33:32 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-14 19:33:32 +0000 |
commit | 8bb05ac73a5b448b339ce0bc8d396c82c459b47f (patch) | |
tree | 1fdda006866bca20d41cb206767ea5241e36852f /sys-utils/unshare.c | |
parent | Adding debian version 2.39.3-11. (diff) | |
download | util-linux-8bb05ac73a5b448b339ce0bc8d396c82c459b47f.tar.xz util-linux-8bb05ac73a5b448b339ce0bc8d396c82c459b47f.zip |
Merging upstream version 2.40.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | sys-utils/unshare.c | 414 |
1 files changed, 239 insertions, 175 deletions
diff --git a/sys-utils/unshare.c b/sys-utils/unshare.c index 29fad71..ccb0834 100644 --- a/sys-utils/unshare.c +++ b/sys-utils/unshare.c @@ -212,12 +212,12 @@ static ino_t get_mnt_ino(pid_t pid) return st.st_ino; } -static void settime(time_t offset, clockid_t clk_id) +static void settime(int64_t offset, clockid_t clk_id) { char buf[sizeof(stringify_value(ULONG_MAX)) * 3]; int fd, len; - len = snprintf(buf, sizeof(buf), "%d %" PRId64 " 0", clk_id, (int64_t) offset); + len = snprintf(buf, sizeof(buf), "%d %" PRId64 " 0", clk_id, offset); fd = open("/proc/self/timens_offsets", O_WRONLY); if (fd < 0) @@ -364,6 +364,7 @@ static gid_t get_group(const char *s, const char *err) * @outer: First ID mapped on the outside of the namespace * @inner: First ID mapped on the inside of the namespace * @count: Length of the inside and outside ranges + * @next: Next range of IDs in the chain * * A range of uids/gids to map using new[gu]idmap. */ @@ -371,9 +372,16 @@ struct map_range { unsigned int outer; unsigned int inner; unsigned int count; + struct map_range *next; }; -#define UID_BUFSIZ sizeof(stringify_value(ULONG_MAX)) +static void insert_map_range(struct map_range **chain, struct map_range map) +{ + struct map_range *tail = *chain; + *chain = xmalloc(sizeof(**chain)); + memcpy(*chain, &map, sizeof(**chain)); + (*chain)->next = tail; +} /** * get_map_range() - Parse a mapping range from a string @@ -382,20 +390,18 @@ struct map_range { * Parse a string of the form inner:outer:count or outer,inner,count into * a new mapping range. * - * Return: A new &struct map_range + * Return: A struct map_range */ -static struct map_range *get_map_range(const char *s) +static struct map_range get_map_range(const char *s) { int end; - struct map_range *ret; - - ret = xmalloc(sizeof(*ret)); + struct map_range ret = { .next = NULL }; - if (sscanf(s, "%u:%u:%u%n", &ret->inner, &ret->outer, &ret->count, + if (sscanf(s, "%u:%u:%u%n", &ret.inner, &ret.outer, &ret.count, &end) >= 3 && !s[end]) return ret; /* inner:outer:count */ - if (sscanf(s, "%u,%u,%u%n", &ret->outer, &ret->inner, &ret->count, + if (sscanf(s, "%u,%u,%u%n", &ret.outer, &ret.inner, &ret.count, &end) >= 3 && !s[end]) return ret; /* outer,inner,count */ @@ -410,16 +416,13 @@ static struct map_range *get_map_range(const char *s) * * This finds the first subid range matching @uid in @filename. */ -static struct map_range *read_subid_range(char *filename, uid_t uid) +static struct map_range read_subid_range(char *filename, uid_t uid) { char *line = NULL, *pwbuf; FILE *idmap; size_t n = 0; struct passwd *pw; - struct map_range *map; - - map = xmalloc(sizeof(*map)); - map->inner = -1; + struct map_range map = { .inner = -1, .next = NULL }; pw = xgetpwuid(uid, &pwbuf); if (!pw) @@ -452,13 +455,13 @@ static struct map_range *read_subid_range(char *filename, uid_t uid) if (!rest) continue; *rest = '\0'; - map->outer = strtoul_or_err(s, _("failed to parse subid map")); + map.outer = strtoul_or_err(s, _("failed to parse subid map")); s = rest + 1; rest = strchr(s, '\n'); if (rest) *rest = '\0'; - map->count = strtoul_or_err(s, _("failed to parse subid map")); + map.count = strtoul_or_err(s, _("failed to parse subid map")); fclose(idmap); free(pw); @@ -472,129 +475,203 @@ static struct map_range *read_subid_range(char *filename, uid_t uid) } /** - * map_ids() - Create a new uid/gid map - * @idmapper: Either newuidmap or newgidmap - * @ppid: Pid to set the map for - * @outer: ID outside the namespace for a single map. - * @inner: ID inside the namespace for a single map. May be -1 to only use @map. - * @map: A range of IDs to map + * read_kernel_map() - Read all available IDs from the kernel + * @chain: destination list to receive pass-through ID mappings + * @filename: either /proc/self/uid_map or /proc/self/gid_map * - * This creates a new uid/gid map for @ppid using @idmapper. The ID @outer in - * the parent (our) namespace is mapped to the ID @inner in the child (@ppid's) - * namespace. In addition, the range of IDs beginning at @map->outer is mapped - * to the range of IDs beginning at @map->inner. The tricky bit is that we - * cannot let these mappings overlap. We accomplish this by removing a "hole" - * from @map, if @outer or @inner overlap it. This may result in one less than - * @map->count IDs being mapped from @map. The unmapped IDs are always the - * topmost IDs of the mapping (either in the parent or the child namespace). + * This is used by --map-users=all and --map-groups=all to construct + * pass-through mappings for all IDs available in the parent namespace. + */ +static void read_kernel_map(struct map_range **chain, char *filename) +{ + char *line = NULL; + size_t size = 0; + FILE *idmap; + + idmap = fopen(filename, "r"); + if (!idmap) + err(EXIT_FAILURE, _("could not open '%s'"), filename); + + while (getline(&line, &size, idmap) != -1) { + unsigned int start, count; + if (sscanf(line, " %u %*u %u", &start, &count) < 2) + continue; + insert_map_range(chain, (struct map_range) { + .inner = start, + .outer = start, + .count = count + }); + } + + fclose(idmap); + free(line); +} + +/** + * add_single_map_range() - Add a single-ID map into a list without overlap + * @chain: A linked list of ID range mappings + * @outer: ID outside the namespace for a single map. + * @inner: ID inside the namespace for a single map, or -1 for no map. * - * Most of the time, this function will be called with @map->outer as some - * large ID, @map->inner as 0, and @map->count as a large number (at least - * 1000, but less than @map->outer). Typically, there will be no conflict with - * @outer. However, @inner may split the mapping for e.g. --map-current-user. + * Prepend a mapping to @chain for the single ID @outer to the single ID + * @inner. The tricky bit is that we cannot let existing mappings overlap it. + * We accomplish this by removing a "hole" from each existing range @map, if + * @outer or @inner overlap it. This may result in one less than @map->count + * IDs being mapped from @map. The unmapped IDs are always the topmost IDs + * of the mapping (either in the parent or the child namespace). * - * This function always exec()s or errors out and does not return. + * Most of the time, this function will be called with a single mapping range + * @map, @map->outer as some large ID, @map->inner as 0, and @map->count as a + * large number (at least 1000, but less than @map->outer). Typically, there + * will be no conflict with @outer. However, @inner may split the mapping for + * e.g. --map-current-user. */ -static void __attribute__((__noreturn__)) -map_ids(const char *idmapper, int ppid, unsigned int outer, unsigned int inner, - struct map_range *map) + +static void add_single_map_range(struct map_range **chain, unsigned int outer, + unsigned int inner) { - /* idmapper + pid + 4 * map + NULL */ - char *argv[15]; - /* argv - idmapper - "1" - NULL */ - char args[12][UID_BUFSIZ]; - int i = 0, j = 0; - struct map_range lo, mid, hi; - unsigned int inner_offset, outer_offset; - - /* Some helper macros to reduce bookkeeping */ -#define push_str(s) do { \ - argv[i++] = s; \ -} while (0) -#define push_ul(x) do { \ - snprintf(args[j], sizeof(args[j]), "%u", x); \ - push_str(args[j++]); \ -} while (0) - - push_str(xstrdup(idmapper)); - push_ul(ppid); - if ((int)inner == -1) { + struct map_range *map = *chain; + + if (inner + 1 == 0) + outer = (unsigned int) -1; + *chain = NULL; + + while (map) { + struct map_range lo = { 0 }, mid = { 0 }, hi = { 0 }, + *next = map->next; + unsigned int inner_offset, outer_offset; + /* - * If we don't have a "single" mapping, then we can just use map - * directly, starting inner IDs from zero for an auto mapping + * Start inner IDs from zero for an auto mapping; otherwise, if + * the single mapping exists and overlaps the range, remove an ID */ - push_ul(map->inner + 1 ? map->inner : 0); - push_ul(map->outer); - push_ul(map->count); - push_str(NULL); + if (map->inner + 1 == 0) + map->inner = 0; + else if (inner + 1 != 0 && + ((outer >= map->outer && outer <= map->outer + map->count) || + (inner >= map->inner && inner <= map->inner + map->count))) + map->count--; + + /* Determine where the splits between lo, mid, and hi will be */ + outer_offset = min(outer > map->outer ? outer - map->outer : 0, + map->count); + inner_offset = min(inner > map->inner ? inner - map->inner : 0, + map->count); - execvp(idmapper, argv); - errexec(idmapper); + /* + * In the worst case, we need three mappings: + * From the bottom of map to either inner or outer + */ + lo.outer = map->outer; + lo.inner = map->inner; + lo.count = min(inner_offset, outer_offset); + + /* From the lower of inner or outer to the higher */ + mid.outer = lo.outer + lo.count; + mid.outer += mid.outer == outer; + mid.inner = lo.inner + lo.count; + mid.inner += mid.inner == inner; + mid.count = abs_diff(outer_offset, inner_offset); + + /* And from the higher of inner or outer to the end of the map */ + hi.outer = mid.outer + mid.count; + hi.outer += hi.outer == outer; + hi.inner = mid.inner + mid.count; + hi.inner += hi.inner == inner; + hi.count = map->count - lo.count - mid.count; + + /* Insert non-empty mappings into the output chain */ + if (hi.count) + insert_map_range(chain, hi); + if (mid.count) + insert_map_range(chain, mid); + if (lo.count) + insert_map_range(chain, lo); + + free(map); + map = next; } - /* - * Start inner IDs from zero for an auto mapping; otherwise, if the two - * fixed mappings overlap, remove an ID from map - */ - if (map->inner + 1 == 0) - map->inner = 0; - else if ((outer >= map->outer && outer <= map->outer + map->count) || - (inner >= map->inner && inner <= map->inner + map->count)) - map->count--; - - /* Determine where the splits between lo, mid, and hi will be */ - outer_offset = min(outer > map->outer ? outer - map->outer : 0, - map->count); - inner_offset = min(inner > map->inner ? inner - map->inner : 0, - map->count); - - /* - * In the worst case, we need three mappings: - * From the bottom of map to either inner or outer - */ - lo.outer = map->outer; - lo.inner = map->inner; - lo.count = min(inner_offset, outer_offset); - - /* From the lower of inner or outer to the higher */ - mid.outer = lo.outer + lo.count; - mid.outer += mid.outer == outer; - mid.inner = lo.inner + lo.count; - mid.inner += mid.inner == inner; - mid.count = abs_diff(outer_offset, inner_offset); - - /* And from the higher of inner or outer to the end of the map */ - hi.outer = mid.outer + mid.count; - hi.outer += hi.outer == outer; - hi.inner = mid.inner + mid.count; - hi.inner += hi.inner == inner; - hi.count = map->count - lo.count - mid.count; - - push_ul(inner); - push_ul(outer); - push_str("1"); - /* new[gu]idmap doesn't like zero-length mappings, so skip them */ - if (lo.count) { - push_ul(lo.inner); - push_ul(lo.outer); - push_ul(lo.count); + if (inner + 1 != 0) { + /* Insert single ID mapping as the first entry in the chain */ + insert_map_range(chain, (struct map_range) { + .inner = inner, + .outer = outer, + .count = 1 + }); } - if (mid.count) { - push_ul(mid.inner); - push_ul(mid.outer); - push_ul(mid.count); - } - if (hi.count) { - push_ul(hi.inner); - push_ul(hi.outer); - push_ul(hi.count); +} + +/** + * map_ids_external() - Create a new uid/gid map using setuid helper + * @idmapper: Either newuidmap or newgidmap + * @ppid: Pid to set the map for + * @chain: A linked list of ID range mappings + * + * This creates a new uid/gid map for @ppid using @idmapper to set the + * mapping for each of the ranges in @chain. + * + * This function always exec()s or errors out and does not return. + */ +static void __attribute__((__noreturn__)) +map_ids_external(const char *idmapper, int ppid, struct map_range *chain) +{ + unsigned int i = 0, length = 3; + char **argv; + + for (struct map_range *map = chain; map; map = map->next) + length += 3; + argv = xcalloc(length, sizeof(*argv)); + argv[i++] = xstrdup(idmapper); + xasprintf(&argv[i++], "%u", ppid); + + for (struct map_range *map = chain; map; map = map->next) { + xasprintf(&argv[i++], "%u", map->inner); + xasprintf(&argv[i++], "%u", map->outer); + xasprintf(&argv[i++], "%u", map->count); } - push_str(NULL); + + argv[i] = NULL; execvp(idmapper, argv); errexec(idmapper); } /** + * map_ids_internal() - Create a new uid/gid map using root privilege + * @type: Either uid_map or gid_map + * @ppid: Pid to set the map for + * @chain: A linked list of ID range mappings + * + * This creates a new uid/gid map for @ppid using a privileged write to + * /proc/@ppid/@type to set a mapping for each of the ranges in @chain. + */ +static void map_ids_internal(const char *type, int ppid, struct map_range *chain) +{ + int count, fd; + unsigned int length = 0; + char buffer[4096], *path; + + xasprintf(&path, "/proc/%u/%s", ppid, type); + for (struct map_range *map = chain; map; map = map->next) { + count = snprintf(buffer + length, sizeof(buffer) - length, + "%u %u %u\n", + map->inner, map->outer, map->count); + if (count < 0 || count + length > sizeof(buffer)) + errx(EXIT_FAILURE, + _("%s too large for kernel 4k limit"), path); + length += count; + } + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY); + if (fd < 0) + err(EXIT_FAILURE, _("failed to open %s"), path); + if (write_all(fd, buffer, length) < 0) + err(EXIT_FAILURE, _("failed to write %s"), path); + close(fd); + free(path); +} + +/** * map_ids_from_child() - Set up a new uid/gid map * @fd: The eventfd to wait on * @mapuser: The user to map the current user to (or -1) @@ -619,6 +696,19 @@ static pid_t map_ids_from_child(int *fd, uid_t mapuser, if (child) return child; + if (usermap) + add_single_map_range(&usermap, geteuid(), mapuser); + if (groupmap) + add_single_map_range(&groupmap, getegid(), mapgroup); + + if (geteuid() == 0) { + if (usermap) + map_ids_internal("uid_map", ppid, usermap); + if (groupmap) + map_ids_internal("gid_map", ppid, groupmap); + exit(EXIT_SUCCESS); + } + /* Avoid forking more than we need to */ if (usermap && groupmap) { pid = fork(); @@ -629,9 +719,9 @@ static pid_t map_ids_from_child(int *fd, uid_t mapuser, } if (!pid && usermap) - map_ids("newuidmap", ppid, geteuid(), mapuser, usermap); + map_ids_external("newuidmap", ppid, usermap); if (groupmap) - map_ids("newgidmap", ppid, getegid(), mapgroup, groupmap); + map_ids_external("newgidmap", ppid, groupmap); exit(EXIT_SUCCESS); } @@ -683,8 +773,8 @@ static void __attribute__((__noreturn__)) usage(void) fputs(_(" --boottime <offset> set clock boottime offset (seconds) in time namespaces\n"), out); fputs(USAGE_SEPARATOR, out); - printf(USAGE_HELP_OPTIONS(27)); - printf(USAGE_MAN_TAIL("unshare(1)")); + fprintf(out, USAGE_HELP_OPTIONS(27)); + fprintf(out, USAGE_MAN_TAIL("unshare(1)")); exit(EXIT_SUCCESS); } @@ -764,8 +854,8 @@ int main(int argc, char *argv[]) uid_t uid = 0, real_euid = geteuid(); gid_t gid = 0, real_egid = getegid(); int keepcaps = 0; - time_t monotonic = 0; - time_t boottime = 0; + int64_t monotonic = 0; + int64_t boottime = 0; int force_monotonic = 0; int force_boottime = 0; @@ -844,21 +934,27 @@ int main(int argc, char *argv[]) case OPT_MAPUSERS: unshare_flags |= CLONE_NEWUSER; if (!strcmp(optarg, "auto")) - usermap = read_subid_range(_PATH_SUBUID, real_euid); + insert_map_range(&usermap, + read_subid_range(_PATH_SUBUID, real_euid)); + else if (!strcmp(optarg, "all")) + read_kernel_map(&usermap, _PATH_PROC_UIDMAP); else - usermap = get_map_range(optarg); + insert_map_range(&usermap, get_map_range(optarg)); break; case OPT_MAPGROUPS: unshare_flags |= CLONE_NEWUSER; if (!strcmp(optarg, "auto")) - groupmap = read_subid_range(_PATH_SUBGID, real_euid); + insert_map_range(&groupmap, + read_subid_range(_PATH_SUBGID, real_euid)); + else if (!strcmp(optarg, "all")) + read_kernel_map(&groupmap, _PATH_PROC_GIDMAP); else - groupmap = get_map_range(optarg); + insert_map_range(&groupmap, get_map_range(optarg)); break; case OPT_MAPAUTO: unshare_flags |= CLONE_NEWUSER; - usermap = read_subid_range(_PATH_SUBUID, real_euid); - groupmap = read_subid_range(_PATH_SUBGID, real_euid); + insert_map_range(&usermap, read_subid_range(_PATH_SUBUID, real_euid)); + insert_map_range(&groupmap, read_subid_range(_PATH_SUBGID, real_euid)); break; case OPT_SETGROUPS: setgrpcmd = setgroups_str2id(optarg); @@ -895,11 +991,11 @@ int main(int argc, char *argv[]) newdir = optarg; break; case OPT_MONOTONIC: - monotonic = strtoul_or_err(optarg, _("failed to parse monotonic offset")); + monotonic = strtos64_or_err(optarg, _("failed to parse monotonic offset")); force_monotonic = 1; break; case OPT_BOOTTIME: - boottime = strtoul_or_err(optarg, _("failed to parse boottime offset")); + boottime = strtos64_or_err(optarg, _("failed to parse boottime offset")); force_boottime = 1; break; @@ -994,8 +1090,10 @@ int main(int argc, char *argv[]) int termsig = WTERMSIG(status); - if (signal(termsig, SIG_DFL) == SIG_ERR || - sigemptyset(&sigset) != 0 || + if (termsig != SIGKILL && signal(termsig, SIG_DFL) == SIG_ERR) + err(EXIT_FAILURE, + _("signal handler reset failed")); + if (sigemptyset(&sigset) != 0 || sigaddset(&sigset, termsig) != 0 || sigprocmask(SIG_UNBLOCK, &sigset, NULL) != 0) err(EXIT_FAILURE, @@ -1089,42 +1187,8 @@ int main(int argc, char *argv[]) if (force_uid && setuid(uid) < 0) /* change UID */ err(EXIT_FAILURE, _("setuid failed")); - /* We use capabilities system calls to propagate the permitted - * capabilities into the ambient set because we have already - * forked so are in async-signal-safe context. */ - if (keepcaps && (unshare_flags & CLONE_NEWUSER)) { - struct __user_cap_header_struct header = { - .version = _LINUX_CAPABILITY_VERSION_3, - .pid = 0, - }; - - struct __user_cap_data_struct payload[_LINUX_CAPABILITY_U32S_3] = {{ 0 }}; - uint64_t effective, cap; - - if (capget(&header, payload) < 0) - err(EXIT_FAILURE, _("capget failed")); - - /* In order the make capabilities ambient, we first need to ensure - * that they are all inheritable. */ - payload[0].inheritable = payload[0].permitted; - payload[1].inheritable = payload[1].permitted; - - if (capset(&header, payload) < 0) - err(EXIT_FAILURE, _("capset failed")); - - effective = ((uint64_t)payload[1].effective << 32) | (uint64_t)payload[0].effective; - - for (cap = 0; cap < (sizeof(effective) * 8); cap++) { - /* This is the same check as cap_valid(), but using - * the runtime value for the last valid cap. */ - if (cap > (uint64_t) cap_last_cap()) - continue; - - if ((effective & (1 << cap)) - && prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) < 0) - err(EXIT_FAILURE, _("prctl(PR_CAP_AMBIENT) failed")); - } - } + if (keepcaps && (unshare_flags & CLONE_NEWUSER)) + cap_permitted_to_ambient(); if (optind < argc) { execvp(argv[optind], argv + optind); |