summaryrefslogtreecommitdiffstats
path: root/sys-utils/unshare.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 19:33:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 19:33:30 +0000
commitc61e14d3a8412cd50d98aab604e607692c844c8a (patch)
tree4925aca0e6b64c8664ea2f3fdfa99a52dc93d5da /sys-utils/unshare.c
parentAdding upstream version 2.39.3. (diff)
downloadutil-linux-c61e14d3a8412cd50d98aab604e607692c844c8a.tar.xz
util-linux-c61e14d3a8412cd50d98aab604e607692c844c8a.zip
Adding upstream version 2.40.upstream/2.40
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sys-utils/unshare.c')
-rw-r--r--sys-utils/unshare.c414
1 files changed, 239 insertions, 175 deletions
diff --git a/sys-utils/unshare.c b/sys-utils/unshare.c
index 29fad71..ccb0834 100644
--- a/sys-utils/unshare.c
+++ b/sys-utils/unshare.c
@@ -212,12 +212,12 @@ static ino_t get_mnt_ino(pid_t pid)
return st.st_ino;
}
-static void settime(time_t offset, clockid_t clk_id)
+static void settime(int64_t offset, clockid_t clk_id)
{
char buf[sizeof(stringify_value(ULONG_MAX)) * 3];
int fd, len;
- len = snprintf(buf, sizeof(buf), "%d %" PRId64 " 0", clk_id, (int64_t) offset);
+ len = snprintf(buf, sizeof(buf), "%d %" PRId64 " 0", clk_id, offset);
fd = open("/proc/self/timens_offsets", O_WRONLY);
if (fd < 0)
@@ -364,6 +364,7 @@ static gid_t get_group(const char *s, const char *err)
* @outer: First ID mapped on the outside of the namespace
* @inner: First ID mapped on the inside of the namespace
* @count: Length of the inside and outside ranges
+ * @next: Next range of IDs in the chain
*
* A range of uids/gids to map using new[gu]idmap.
*/
@@ -371,9 +372,16 @@ struct map_range {
unsigned int outer;
unsigned int inner;
unsigned int count;
+ struct map_range *next;
};
-#define UID_BUFSIZ sizeof(stringify_value(ULONG_MAX))
+static void insert_map_range(struct map_range **chain, struct map_range map)
+{
+ struct map_range *tail = *chain;
+ *chain = xmalloc(sizeof(**chain));
+ memcpy(*chain, &map, sizeof(**chain));
+ (*chain)->next = tail;
+}
/**
* get_map_range() - Parse a mapping range from a string
@@ -382,20 +390,18 @@ struct map_range {
* Parse a string of the form inner:outer:count or outer,inner,count into
* a new mapping range.
*
- * Return: A new &struct map_range
+ * Return: A struct map_range
*/
-static struct map_range *get_map_range(const char *s)
+static struct map_range get_map_range(const char *s)
{
int end;
- struct map_range *ret;
-
- ret = xmalloc(sizeof(*ret));
+ struct map_range ret = { .next = NULL };
- if (sscanf(s, "%u:%u:%u%n", &ret->inner, &ret->outer, &ret->count,
+ if (sscanf(s, "%u:%u:%u%n", &ret.inner, &ret.outer, &ret.count,
&end) >= 3 && !s[end])
return ret; /* inner:outer:count */
- if (sscanf(s, "%u,%u,%u%n", &ret->outer, &ret->inner, &ret->count,
+ if (sscanf(s, "%u,%u,%u%n", &ret.outer, &ret.inner, &ret.count,
&end) >= 3 && !s[end])
return ret; /* outer,inner,count */
@@ -410,16 +416,13 @@ static struct map_range *get_map_range(const char *s)
*
* This finds the first subid range matching @uid in @filename.
*/
-static struct map_range *read_subid_range(char *filename, uid_t uid)
+static struct map_range read_subid_range(char *filename, uid_t uid)
{
char *line = NULL, *pwbuf;
FILE *idmap;
size_t n = 0;
struct passwd *pw;
- struct map_range *map;
-
- map = xmalloc(sizeof(*map));
- map->inner = -1;
+ struct map_range map = { .inner = -1, .next = NULL };
pw = xgetpwuid(uid, &pwbuf);
if (!pw)
@@ -452,13 +455,13 @@ static struct map_range *read_subid_range(char *filename, uid_t uid)
if (!rest)
continue;
*rest = '\0';
- map->outer = strtoul_or_err(s, _("failed to parse subid map"));
+ map.outer = strtoul_or_err(s, _("failed to parse subid map"));
s = rest + 1;
rest = strchr(s, '\n');
if (rest)
*rest = '\0';
- map->count = strtoul_or_err(s, _("failed to parse subid map"));
+ map.count = strtoul_or_err(s, _("failed to parse subid map"));
fclose(idmap);
free(pw);
@@ -472,129 +475,203 @@ static struct map_range *read_subid_range(char *filename, uid_t uid)
}
/**
- * map_ids() - Create a new uid/gid map
- * @idmapper: Either newuidmap or newgidmap
- * @ppid: Pid to set the map for
- * @outer: ID outside the namespace for a single map.
- * @inner: ID inside the namespace for a single map. May be -1 to only use @map.
- * @map: A range of IDs to map
+ * read_kernel_map() - Read all available IDs from the kernel
+ * @chain: destination list to receive pass-through ID mappings
+ * @filename: either /proc/self/uid_map or /proc/self/gid_map
*
- * This creates a new uid/gid map for @ppid using @idmapper. The ID @outer in
- * the parent (our) namespace is mapped to the ID @inner in the child (@ppid's)
- * namespace. In addition, the range of IDs beginning at @map->outer is mapped
- * to the range of IDs beginning at @map->inner. The tricky bit is that we
- * cannot let these mappings overlap. We accomplish this by removing a "hole"
- * from @map, if @outer or @inner overlap it. This may result in one less than
- * @map->count IDs being mapped from @map. The unmapped IDs are always the
- * topmost IDs of the mapping (either in the parent or the child namespace).
+ * This is used by --map-users=all and --map-groups=all to construct
+ * pass-through mappings for all IDs available in the parent namespace.
+ */
+static void read_kernel_map(struct map_range **chain, char *filename)
+{
+ char *line = NULL;
+ size_t size = 0;
+ FILE *idmap;
+
+ idmap = fopen(filename, "r");
+ if (!idmap)
+ err(EXIT_FAILURE, _("could not open '%s'"), filename);
+
+ while (getline(&line, &size, idmap) != -1) {
+ unsigned int start, count;
+ if (sscanf(line, " %u %*u %u", &start, &count) < 2)
+ continue;
+ insert_map_range(chain, (struct map_range) {
+ .inner = start,
+ .outer = start,
+ .count = count
+ });
+ }
+
+ fclose(idmap);
+ free(line);
+}
+
+/**
+ * add_single_map_range() - Add a single-ID map into a list without overlap
+ * @chain: A linked list of ID range mappings
+ * @outer: ID outside the namespace for a single map.
+ * @inner: ID inside the namespace for a single map, or -1 for no map.
*
- * Most of the time, this function will be called with @map->outer as some
- * large ID, @map->inner as 0, and @map->count as a large number (at least
- * 1000, but less than @map->outer). Typically, there will be no conflict with
- * @outer. However, @inner may split the mapping for e.g. --map-current-user.
+ * Prepend a mapping to @chain for the single ID @outer to the single ID
+ * @inner. The tricky bit is that we cannot let existing mappings overlap it.
+ * We accomplish this by removing a "hole" from each existing range @map, if
+ * @outer or @inner overlap it. This may result in one less than @map->count
+ * IDs being mapped from @map. The unmapped IDs are always the topmost IDs
+ * of the mapping (either in the parent or the child namespace).
*
- * This function always exec()s or errors out and does not return.
+ * Most of the time, this function will be called with a single mapping range
+ * @map, @map->outer as some large ID, @map->inner as 0, and @map->count as a
+ * large number (at least 1000, but less than @map->outer). Typically, there
+ * will be no conflict with @outer. However, @inner may split the mapping for
+ * e.g. --map-current-user.
*/
-static void __attribute__((__noreturn__))
-map_ids(const char *idmapper, int ppid, unsigned int outer, unsigned int inner,
- struct map_range *map)
+
+static void add_single_map_range(struct map_range **chain, unsigned int outer,
+ unsigned int inner)
{
- /* idmapper + pid + 4 * map + NULL */
- char *argv[15];
- /* argv - idmapper - "1" - NULL */
- char args[12][UID_BUFSIZ];
- int i = 0, j = 0;
- struct map_range lo, mid, hi;
- unsigned int inner_offset, outer_offset;
-
- /* Some helper macros to reduce bookkeeping */
-#define push_str(s) do { \
- argv[i++] = s; \
-} while (0)
-#define push_ul(x) do { \
- snprintf(args[j], sizeof(args[j]), "%u", x); \
- push_str(args[j++]); \
-} while (0)
-
- push_str(xstrdup(idmapper));
- push_ul(ppid);
- if ((int)inner == -1) {
+ struct map_range *map = *chain;
+
+ if (inner + 1 == 0)
+ outer = (unsigned int) -1;
+ *chain = NULL;
+
+ while (map) {
+ struct map_range lo = { 0 }, mid = { 0 }, hi = { 0 },
+ *next = map->next;
+ unsigned int inner_offset, outer_offset;
+
/*
- * If we don't have a "single" mapping, then we can just use map
- * directly, starting inner IDs from zero for an auto mapping
+ * Start inner IDs from zero for an auto mapping; otherwise, if
+ * the single mapping exists and overlaps the range, remove an ID
*/
- push_ul(map->inner + 1 ? map->inner : 0);
- push_ul(map->outer);
- push_ul(map->count);
- push_str(NULL);
+ if (map->inner + 1 == 0)
+ map->inner = 0;
+ else if (inner + 1 != 0 &&
+ ((outer >= map->outer && outer <= map->outer + map->count) ||
+ (inner >= map->inner && inner <= map->inner + map->count)))
+ map->count--;
+
+ /* Determine where the splits between lo, mid, and hi will be */
+ outer_offset = min(outer > map->outer ? outer - map->outer : 0,
+ map->count);
+ inner_offset = min(inner > map->inner ? inner - map->inner : 0,
+ map->count);
- execvp(idmapper, argv);
- errexec(idmapper);
+ /*
+ * In the worst case, we need three mappings:
+ * From the bottom of map to either inner or outer
+ */
+ lo.outer = map->outer;
+ lo.inner = map->inner;
+ lo.count = min(inner_offset, outer_offset);
+
+ /* From the lower of inner or outer to the higher */
+ mid.outer = lo.outer + lo.count;
+ mid.outer += mid.outer == outer;
+ mid.inner = lo.inner + lo.count;
+ mid.inner += mid.inner == inner;
+ mid.count = abs_diff(outer_offset, inner_offset);
+
+ /* And from the higher of inner or outer to the end of the map */
+ hi.outer = mid.outer + mid.count;
+ hi.outer += hi.outer == outer;
+ hi.inner = mid.inner + mid.count;
+ hi.inner += hi.inner == inner;
+ hi.count = map->count - lo.count - mid.count;
+
+ /* Insert non-empty mappings into the output chain */
+ if (hi.count)
+ insert_map_range(chain, hi);
+ if (mid.count)
+ insert_map_range(chain, mid);
+ if (lo.count)
+ insert_map_range(chain, lo);
+
+ free(map);
+ map = next;
}
- /*
- * Start inner IDs from zero for an auto mapping; otherwise, if the two
- * fixed mappings overlap, remove an ID from map
- */
- if (map->inner + 1 == 0)
- map->inner = 0;
- else if ((outer >= map->outer && outer <= map->outer + map->count) ||
- (inner >= map->inner && inner <= map->inner + map->count))
- map->count--;
-
- /* Determine where the splits between lo, mid, and hi will be */
- outer_offset = min(outer > map->outer ? outer - map->outer : 0,
- map->count);
- inner_offset = min(inner > map->inner ? inner - map->inner : 0,
- map->count);
-
- /*
- * In the worst case, we need three mappings:
- * From the bottom of map to either inner or outer
- */
- lo.outer = map->outer;
- lo.inner = map->inner;
- lo.count = min(inner_offset, outer_offset);
-
- /* From the lower of inner or outer to the higher */
- mid.outer = lo.outer + lo.count;
- mid.outer += mid.outer == outer;
- mid.inner = lo.inner + lo.count;
- mid.inner += mid.inner == inner;
- mid.count = abs_diff(outer_offset, inner_offset);
-
- /* And from the higher of inner or outer to the end of the map */
- hi.outer = mid.outer + mid.count;
- hi.outer += hi.outer == outer;
- hi.inner = mid.inner + mid.count;
- hi.inner += hi.inner == inner;
- hi.count = map->count - lo.count - mid.count;
-
- push_ul(inner);
- push_ul(outer);
- push_str("1");
- /* new[gu]idmap doesn't like zero-length mappings, so skip them */
- if (lo.count) {
- push_ul(lo.inner);
- push_ul(lo.outer);
- push_ul(lo.count);
+ if (inner + 1 != 0) {
+ /* Insert single ID mapping as the first entry in the chain */
+ insert_map_range(chain, (struct map_range) {
+ .inner = inner,
+ .outer = outer,
+ .count = 1
+ });
}
- if (mid.count) {
- push_ul(mid.inner);
- push_ul(mid.outer);
- push_ul(mid.count);
- }
- if (hi.count) {
- push_ul(hi.inner);
- push_ul(hi.outer);
- push_ul(hi.count);
+}
+
+/**
+ * map_ids_external() - Create a new uid/gid map using setuid helper
+ * @idmapper: Either newuidmap or newgidmap
+ * @ppid: Pid to set the map for
+ * @chain: A linked list of ID range mappings
+ *
+ * This creates a new uid/gid map for @ppid using @idmapper to set the
+ * mapping for each of the ranges in @chain.
+ *
+ * This function always exec()s or errors out and does not return.
+ */
+static void __attribute__((__noreturn__))
+map_ids_external(const char *idmapper, int ppid, struct map_range *chain)
+{
+ unsigned int i = 0, length = 3;
+ char **argv;
+
+ for (struct map_range *map = chain; map; map = map->next)
+ length += 3;
+ argv = xcalloc(length, sizeof(*argv));
+ argv[i++] = xstrdup(idmapper);
+ xasprintf(&argv[i++], "%u", ppid);
+
+ for (struct map_range *map = chain; map; map = map->next) {
+ xasprintf(&argv[i++], "%u", map->inner);
+ xasprintf(&argv[i++], "%u", map->outer);
+ xasprintf(&argv[i++], "%u", map->count);
}
- push_str(NULL);
+
+ argv[i] = NULL;
execvp(idmapper, argv);
errexec(idmapper);
}
/**
+ * map_ids_internal() - Create a new uid/gid map using root privilege
+ * @type: Either uid_map or gid_map
+ * @ppid: Pid to set the map for
+ * @chain: A linked list of ID range mappings
+ *
+ * This creates a new uid/gid map for @ppid using a privileged write to
+ * /proc/@ppid/@type to set a mapping for each of the ranges in @chain.
+ */
+static void map_ids_internal(const char *type, int ppid, struct map_range *chain)
+{
+ int count, fd;
+ unsigned int length = 0;
+ char buffer[4096], *path;
+
+ xasprintf(&path, "/proc/%u/%s", ppid, type);
+ for (struct map_range *map = chain; map; map = map->next) {
+ count = snprintf(buffer + length, sizeof(buffer) - length,
+ "%u %u %u\n",
+ map->inner, map->outer, map->count);
+ if (count < 0 || count + length > sizeof(buffer))
+ errx(EXIT_FAILURE,
+ _("%s too large for kernel 4k limit"), path);
+ length += count;
+ }
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("failed to open %s"), path);
+ if (write_all(fd, buffer, length) < 0)
+ err(EXIT_FAILURE, _("failed to write %s"), path);
+ close(fd);
+ free(path);
+}
+
+/**
* map_ids_from_child() - Set up a new uid/gid map
* @fd: The eventfd to wait on
* @mapuser: The user to map the current user to (or -1)
@@ -619,6 +696,19 @@ static pid_t map_ids_from_child(int *fd, uid_t mapuser,
if (child)
return child;
+ if (usermap)
+ add_single_map_range(&usermap, geteuid(), mapuser);
+ if (groupmap)
+ add_single_map_range(&groupmap, getegid(), mapgroup);
+
+ if (geteuid() == 0) {
+ if (usermap)
+ map_ids_internal("uid_map", ppid, usermap);
+ if (groupmap)
+ map_ids_internal("gid_map", ppid, groupmap);
+ exit(EXIT_SUCCESS);
+ }
+
/* Avoid forking more than we need to */
if (usermap && groupmap) {
pid = fork();
@@ -629,9 +719,9 @@ static pid_t map_ids_from_child(int *fd, uid_t mapuser,
}
if (!pid && usermap)
- map_ids("newuidmap", ppid, geteuid(), mapuser, usermap);
+ map_ids_external("newuidmap", ppid, usermap);
if (groupmap)
- map_ids("newgidmap", ppid, getegid(), mapgroup, groupmap);
+ map_ids_external("newgidmap", ppid, groupmap);
exit(EXIT_SUCCESS);
}
@@ -683,8 +773,8 @@ static void __attribute__((__noreturn__)) usage(void)
fputs(_(" --boottime <offset> set clock boottime offset (seconds) in time namespaces\n"), out);
fputs(USAGE_SEPARATOR, out);
- printf(USAGE_HELP_OPTIONS(27));
- printf(USAGE_MAN_TAIL("unshare(1)"));
+ fprintf(out, USAGE_HELP_OPTIONS(27));
+ fprintf(out, USAGE_MAN_TAIL("unshare(1)"));
exit(EXIT_SUCCESS);
}
@@ -764,8 +854,8 @@ int main(int argc, char *argv[])
uid_t uid = 0, real_euid = geteuid();
gid_t gid = 0, real_egid = getegid();
int keepcaps = 0;
- time_t monotonic = 0;
- time_t boottime = 0;
+ int64_t monotonic = 0;
+ int64_t boottime = 0;
int force_monotonic = 0;
int force_boottime = 0;
@@ -844,21 +934,27 @@ int main(int argc, char *argv[])
case OPT_MAPUSERS:
unshare_flags |= CLONE_NEWUSER;
if (!strcmp(optarg, "auto"))
- usermap = read_subid_range(_PATH_SUBUID, real_euid);
+ insert_map_range(&usermap,
+ read_subid_range(_PATH_SUBUID, real_euid));
+ else if (!strcmp(optarg, "all"))
+ read_kernel_map(&usermap, _PATH_PROC_UIDMAP);
else
- usermap = get_map_range(optarg);
+ insert_map_range(&usermap, get_map_range(optarg));
break;
case OPT_MAPGROUPS:
unshare_flags |= CLONE_NEWUSER;
if (!strcmp(optarg, "auto"))
- groupmap = read_subid_range(_PATH_SUBGID, real_euid);
+ insert_map_range(&groupmap,
+ read_subid_range(_PATH_SUBGID, real_euid));
+ else if (!strcmp(optarg, "all"))
+ read_kernel_map(&groupmap, _PATH_PROC_GIDMAP);
else
- groupmap = get_map_range(optarg);
+ insert_map_range(&groupmap, get_map_range(optarg));
break;
case OPT_MAPAUTO:
unshare_flags |= CLONE_NEWUSER;
- usermap = read_subid_range(_PATH_SUBUID, real_euid);
- groupmap = read_subid_range(_PATH_SUBGID, real_euid);
+ insert_map_range(&usermap, read_subid_range(_PATH_SUBUID, real_euid));
+ insert_map_range(&groupmap, read_subid_range(_PATH_SUBGID, real_euid));
break;
case OPT_SETGROUPS:
setgrpcmd = setgroups_str2id(optarg);
@@ -895,11 +991,11 @@ int main(int argc, char *argv[])
newdir = optarg;
break;
case OPT_MONOTONIC:
- monotonic = strtoul_or_err(optarg, _("failed to parse monotonic offset"));
+ monotonic = strtos64_or_err(optarg, _("failed to parse monotonic offset"));
force_monotonic = 1;
break;
case OPT_BOOTTIME:
- boottime = strtoul_or_err(optarg, _("failed to parse boottime offset"));
+ boottime = strtos64_or_err(optarg, _("failed to parse boottime offset"));
force_boottime = 1;
break;
@@ -994,8 +1090,10 @@ int main(int argc, char *argv[])
int termsig = WTERMSIG(status);
- if (signal(termsig, SIG_DFL) == SIG_ERR ||
- sigemptyset(&sigset) != 0 ||
+ if (termsig != SIGKILL && signal(termsig, SIG_DFL) == SIG_ERR)
+ err(EXIT_FAILURE,
+ _("signal handler reset failed"));
+ if (sigemptyset(&sigset) != 0 ||
sigaddset(&sigset, termsig) != 0 ||
sigprocmask(SIG_UNBLOCK, &sigset, NULL) != 0)
err(EXIT_FAILURE,
@@ -1089,42 +1187,8 @@ int main(int argc, char *argv[])
if (force_uid && setuid(uid) < 0) /* change UID */
err(EXIT_FAILURE, _("setuid failed"));
- /* We use capabilities system calls to propagate the permitted
- * capabilities into the ambient set because we have already
- * forked so are in async-signal-safe context. */
- if (keepcaps && (unshare_flags & CLONE_NEWUSER)) {
- struct __user_cap_header_struct header = {
- .version = _LINUX_CAPABILITY_VERSION_3,
- .pid = 0,
- };
-
- struct __user_cap_data_struct payload[_LINUX_CAPABILITY_U32S_3] = {{ 0 }};
- uint64_t effective, cap;
-
- if (capget(&header, payload) < 0)
- err(EXIT_FAILURE, _("capget failed"));
-
- /* In order the make capabilities ambient, we first need to ensure
- * that they are all inheritable. */
- payload[0].inheritable = payload[0].permitted;
- payload[1].inheritable = payload[1].permitted;
-
- if (capset(&header, payload) < 0)
- err(EXIT_FAILURE, _("capset failed"));
-
- effective = ((uint64_t)payload[1].effective << 32) | (uint64_t)payload[0].effective;
-
- for (cap = 0; cap < (sizeof(effective) * 8); cap++) {
- /* This is the same check as cap_valid(), but using
- * the runtime value for the last valid cap. */
- if (cap > (uint64_t) cap_last_cap())
- continue;
-
- if ((effective & (1 << cap))
- && prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) < 0)
- err(EXIT_FAILURE, _("prctl(PR_CAP_AMBIENT) failed"));
- }
- }
+ if (keepcaps && (unshare_flags & CLONE_NEWUSER))
+ cap_permitted_to_ambient();
if (optind < argc) {
execvp(argv[optind], argv + optind);