summaryrefslogtreecommitdiffstats
path: root/src/core/exec-invoke.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/exec-invoke.c')
-rw-r--r--src/core/exec-invoke.c649
1 files changed, 380 insertions, 269 deletions
diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c
index 8e6de15..ee8db04 100644
--- a/src/core/exec-invoke.c
+++ b/src/core/exec-invoke.c
@@ -22,7 +22,7 @@
#include "argv-util.h"
#include "barrier.h"
#include "bpf-dlopen.h"
-#include "bpf-lsm.h"
+#include "bpf-restrict-fs.h"
#include "btrfs-util.h"
#include "capability-util.h"
#include "cgroup-setup.h"
@@ -41,6 +41,7 @@
#include "hexdecoct.h"
#include "io-util.h"
#include "iovec-util.h"
+#include "journal-send.h"
#include "missing_ioprio.h"
#include "missing_prctl.h"
#include "missing_securebits.h"
@@ -59,52 +60,13 @@
#include "strv.h"
#include "terminal-util.h"
#include "utmp-wtmp.h"
+#include "vpick.h"
#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
#define SNDBUF_SIZE (8*1024*1024)
-static int shift_fds(int fds[], size_t n_fds) {
- if (n_fds <= 0)
- return 0;
-
- /* Modifies the fds array! (sorts it) */
-
- assert(fds);
-
- for (int start = 0;;) {
- int restart_from = -1;
-
- for (int i = start; i < (int) n_fds; i++) {
- int nfd;
-
- /* Already at right index? */
- if (fds[i] == i+3)
- continue;
-
- nfd = fcntl(fds[i], F_DUPFD, i + 3);
- if (nfd < 0)
- return -errno;
-
- safe_close(fds[i]);
- fds[i] = nfd;
-
- /* Hmm, the fd we wanted isn't free? Then
- * let's remember that and try again from here */
- if (nfd != i+3 && restart_from < 0)
- restart_from = i;
- }
-
- if (restart_from < 0)
- break;
-
- start = restart_from;
- }
-
- return 0;
-}
-
static int flag_fds(
const int fds[],
size_t n_socket_fds,
@@ -198,9 +160,11 @@ static int connect_journal_socket(
const char *j;
int r;
- j = log_namespace ?
- strjoina("/run/systemd/journal.", log_namespace, "/stdout") :
- "/run/systemd/journal/stdout";
+ assert(fd >= 0);
+
+ j = journal_stream_path(log_namespace);
+ if (!j)
+ return -EINVAL;
if (gid_is_valid(gid)) {
oldgid = getgid();
@@ -449,7 +413,7 @@ static int setup_input(
case EXEC_INPUT_DATA: {
int fd;
- fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
+ fd = acquire_data_fd_full(context->stdin_data, context->stdin_data_size, /* flags = */ 0);
if (fd < 0)
return fd;
@@ -670,12 +634,8 @@ static int chown_terminal(int fd, uid_t uid) {
assert(fd >= 0);
/* Before we chown/chmod the TTY, let's ensure this is actually a tty */
- if (isatty(fd) < 1) {
- if (IN_SET(errno, EINVAL, ENOTTY))
- return 0; /* not a tty */
-
- return -errno;
- }
+ if (!isatty_safe(fd))
+ return 0;
/* This might fail. What matters are the results. */
r = fchmod_and_chown(fd, TTY_MODE, uid, GID_INVALID);
@@ -1126,7 +1086,8 @@ static int setup_pam(
gid_t gid,
const char *tty,
char ***env, /* updated on success */
- const int fds[], size_t n_fds) {
+ const int fds[], size_t n_fds,
+ int exec_fd) {
#if HAVE_PAM
@@ -1141,7 +1102,7 @@ static int setup_pam(
sigset_t old_ss;
int pam_code = PAM_SUCCESS, r;
bool close_session = false;
- pid_t pam_pid = 0, parent_pid;
+ pid_t parent_pid;
int flags = 0;
assert(name);
@@ -1196,7 +1157,7 @@ static int setup_pam(
pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | flags);
if (pam_code != PAM_SUCCESS)
- log_debug("pam_setcred() failed, ignoring: %s", pam_strerror(handle, pam_code));
+ log_debug("pam_setcred(PAM_ESTABLISH_CRED) failed, ignoring: %s", pam_strerror(handle, pam_code));
pam_code = pam_open_session(handle, flags);
if (pam_code != PAM_SUCCESS)
@@ -1212,15 +1173,15 @@ static int setup_pam(
/* Block SIGTERM, so that we know that it won't get lost in the child */
- assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
+ assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM) >= 0);
parent_pid = getpid_cached();
- r = safe_fork("(sd-pam)", 0, &pam_pid);
+ r = safe_fork("(sd-pam)", 0, NULL);
if (r < 0)
goto fail;
if (r == 0) {
- int sig, ret = EXIT_PAM;
+ int ret = EXIT_PAM;
/* The child's job is to reset the PAM session on termination */
barrier_set_role(&barrier, BARRIER_CHILD);
@@ -1229,17 +1190,18 @@ static int setup_pam(
* those fds are open here that have been opened by PAM. */
(void) close_many(fds, n_fds);
+ /* Also close the 'exec_fd' in the child, since the service manager waits for the EOF induced
+ * by the execve() to wait for completion, and if we'd keep the fd open here in the child
+ * we'd never signal completion. */
+ exec_fd = safe_close(exec_fd);
+
/* Drop privileges - we don't need any to pam_close_session and this will make
* PR_SET_PDEATHSIG work in most cases. If this fails, ignore the error - but expect sd-pam
* threads to fail to exit normally */
- r = maybe_setgroups(0, NULL);
+ r = fully_set_uid_gid(uid, gid, /* supplementary_gids= */ NULL, /* n_supplementary_gids= */ 0);
if (r < 0)
- log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
- if (setresgid(gid, gid, gid) < 0)
- log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
- if (setresuid(uid, uid, uid) < 0)
- log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
+ log_warning_errno(r, "Failed to drop privileges in sd-pam: %m");
(void) ignore_signals(SIGPIPE);
@@ -1258,21 +1220,13 @@ static int setup_pam(
/* Check if our parent process might already have died? */
if (getppid() == parent_pid) {
sigset_t ss;
+ int sig;
assert_se(sigemptyset(&ss) >= 0);
assert_se(sigaddset(&ss, SIGTERM) >= 0);
- for (;;) {
- if (sigwait(&ss, &sig) < 0) {
- if (errno == EINTR)
- continue;
-
- goto child_finish;
- }
-
- assert(sig == SIGTERM);
- break;
- }
+ assert_se(sigwait(&ss, &sig) == 0);
+ assert(sig == SIGTERM);
}
/* If our parent died we'll end the session */
@@ -1361,7 +1315,7 @@ static void rename_process_from_path(const char *path) {
process_name[1+l] = ')';
process_name[1+l+1] = 0;
- rename_process(process_name);
+ (void) rename_process(process_name);
}
static bool context_has_address_families(const ExecContext *c) {
@@ -1725,7 +1679,7 @@ static int apply_restrict_filesystems(const ExecContext *c, const ExecParameters
if (!exec_context_restrict_filesystems_set(c))
return 0;
- if (p->bpf_outer_map_fd < 0) {
+ if (p->bpf_restrict_fs_map_fd < 0) {
/* LSM BPF is unsupported or lsm_bpf_setup failed */
log_exec_debug(c, p, "LSM BPF not supported, skipping RestrictFileSystems=");
return 0;
@@ -1736,7 +1690,7 @@ static int apply_restrict_filesystems(const ExecContext *c, const ExecParameters
if (r < 0)
return r;
- return lsm_bpf_restrict_filesystems(c->restrict_filesystems, p->cgroup_id, p->bpf_outer_map_fd, c->restrict_filesystems_allow_list);
+ return bpf_restrict_fs_update(c->restrict_filesystems, p->cgroup_id, p->bpf_restrict_fs_map_fd, c->restrict_filesystems_allow_list);
}
#endif
@@ -1817,10 +1771,10 @@ static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
* the service payload in. */
static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
- [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
- [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
- [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
- [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
+ [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
+ [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
+ [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
+ [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
[EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
};
@@ -1907,7 +1861,7 @@ static int build_environment(
"Failed to determine user credentials for root: %m");
}
- bool set_user_login_env = c->set_login_environment >= 0 ? c->set_login_environment : (c->user || c->dynamic_user);
+ bool set_user_login_env = exec_context_get_set_login_environment(c);
if (username) {
x = strjoin("USER=", username);
@@ -1961,7 +1915,7 @@ static int build_environment(
* to inherit the $TERM set for PID 1. This is useful for containers so that the $TERM the
* container manager passes to PID 1 ends up all the way in the console login shown. */
- if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
+ if (path_equal(tty_path, "/dev/console") && getppid() == 1)
term = getenv("TERM");
else if (tty_path && in_charset(skip_dev_prefix(tty_path), ALPHANUMERICAL)) {
_cleanup_free_ char *key = NULL;
@@ -2315,10 +2269,10 @@ static int setup_exec_directory(
int *exit_status) {
static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
- [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
- [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
- [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
- [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
+ [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
+ [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
+ [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
+ [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
[EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
};
int r;
@@ -2338,10 +2292,10 @@ static int setup_exec_directory(
gid = 0;
}
- for (size_t i = 0; i < context->directories[type].n_items; i++) {
+ FOREACH_ARRAY(i, context->directories[type].items, context->directories[type].n_items) {
_cleanup_free_ char *p = NULL, *pp = NULL;
- p = path_join(params->prefix[type], context->directories[type].items[i].path);
+ p = path_join(params->prefix[type], i->path);
if (!p) {
r = -ENOMEM;
goto fail;
@@ -2357,7 +2311,7 @@ static int setup_exec_directory(
* doesn't exist, then we likely are upgrading from an older systemd version that
* didn't know the more recent addition to the xdg-basedir spec: the $XDG_STATE_HOME
* directory. In older systemd versions EXEC_DIRECTORY_STATE was aliased to
- * EXEC_DIRECTORY_CONFIGURATION, with the advent of $XDG_STATE_HOME is is now
+ * EXEC_DIRECTORY_CONFIGURATION, with the advent of $XDG_STATE_HOME it is now
* separated. If a service has both dirs configured but only the configuration dir
* exists and the state dir does not, we assume we are looking at an update
* situation. Hence, create a compatibility symlink, so that all expectations are
@@ -2378,9 +2332,9 @@ static int setup_exec_directory(
* under the configuration hierarchy. */
if (type == EXEC_DIRECTORY_STATE)
- q = path_join(params->prefix[EXEC_DIRECTORY_CONFIGURATION], context->directories[type].items[i].path);
+ q = path_join(params->prefix[EXEC_DIRECTORY_CONFIGURATION], i->path);
else if (type == EXEC_DIRECTORY_LOGS)
- q = path_join(params->prefix[EXEC_DIRECTORY_CONFIGURATION], "log", context->directories[type].items[i].path);
+ q = path_join(params->prefix[EXEC_DIRECTORY_CONFIGURATION], "log", i->path);
else
assert_not_reached();
if (!q) {
@@ -2443,7 +2397,7 @@ static int setup_exec_directory(
if (r < 0)
goto fail;
- if (!path_extend(&pp, context->directories[type].items[i].path)) {
+ if (!path_extend(&pp, i->path)) {
r = -ENOMEM;
goto fail;
}
@@ -2477,7 +2431,7 @@ static int setup_exec_directory(
goto fail;
}
- if (!context->directories[type].items[i].only_create) {
+ if (!i->only_create) {
/* And link it up from the original place.
* Notes
* 1) If a mount namespace is going to be used, then this symlink remains on
@@ -2514,7 +2468,7 @@ static int setup_exec_directory(
if (r < 0)
goto fail;
- q = path_join(params->prefix[type], "private", context->directories[type].items[i].path);
+ q = path_join(params->prefix[type], "private", i->path);
if (!q) {
r = -ENOMEM;
goto fail;
@@ -2568,7 +2522,7 @@ static int setup_exec_directory(
params,
"%s \'%s\' already exists but the mode is different. "
"(File system: %o %sMode: %o)",
- exec_directory_type_to_string(type), context->directories[type].items[i].path,
+ exec_directory_type_to_string(type), i->path,
st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
continue;
@@ -2599,10 +2553,8 @@ static int setup_exec_directory(
/* If we are not going to run in a namespace, set up the symlinks - otherwise
* they are set up later, to allow configuring empty var/run/etc. */
if (!needs_mount_namespace)
- for (size_t i = 0; i < context->directories[type].n_items; i++) {
- r = create_many_symlinks(params->prefix[type],
- context->directories[type].items[i].path,
- context->directories[type].items[i].symlinks);
+ FOREACH_ARRAY(i, context->directories[type].items, context->directories[type].n_items) {
+ r = create_many_symlinks(params->prefix[type], i->path, i->symlinks);
if (r < 0)
goto fail;
}
@@ -2669,8 +2621,8 @@ static int compile_bind_mounts(
if (!params->prefix[t])
continue;
- for (size_t i = 0; i < context->directories[t].n_items; i++)
- n += !context->directories[t].items[i].only_create;
+ FOREACH_ARRAY(i, context->directories[t].items, context->directories[t].n_items)
+ n += !i->only_create;
}
if (n <= 0) {
@@ -2684,8 +2636,7 @@ static int compile_bind_mounts(
if (!bind_mounts)
return -ENOMEM;
- for (size_t i = 0; i < context->n_bind_mounts; i++) {
- BindMount *item = context->bind_mounts + i;
+ FOREACH_ARRAY(item, context->bind_mounts, context->n_bind_mounts) {
_cleanup_free_ char *s = NULL, *d = NULL;
s = strdup(item->source);
@@ -2729,18 +2680,18 @@ static int compile_bind_mounts(
return r;
}
- for (size_t i = 0; i < context->directories[t].n_items; i++) {
+ FOREACH_ARRAY(i, context->directories[t].items, context->directories[t].n_items) {
_cleanup_free_ char *s = NULL, *d = NULL;
/* When one of the parent directories is in the list, we cannot create the symlink
* for the child directory. See also the comments in setup_exec_directory(). */
- if (context->directories[t].items[i].only_create)
+ if (i->only_create)
continue;
if (exec_directory_is_private(context, t))
- s = path_join(params->prefix[t], "private", context->directories[t].items[i].path);
+ s = path_join(params->prefix[t], "private", i->path);
else
- s = path_join(params->prefix[t], context->directories[t].items[i].path);
+ s = path_join(params->prefix[t], i->path);
if (!s)
return -ENOMEM;
@@ -2749,7 +2700,7 @@ static int compile_bind_mounts(
/* When RootDirectory= or RootImage= are set, then the symbolic link to the private
* directory is not created on the root directory. So, let's bind-mount the directory
* on the 'non-private' place. */
- d = path_join(params->prefix[t], context->directories[t].items[i].path);
+ d = path_join(params->prefix[t], i->path);
else
d = strdup(s);
if (!d)
@@ -2758,10 +2709,8 @@ static int compile_bind_mounts(
bind_mounts[h++] = (BindMount) {
.source = TAKE_PTR(s),
.destination = TAKE_PTR(d),
- .read_only = false,
.nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
.recursive = true,
- .ignore_enoent = false,
};
}
}
@@ -2791,14 +2740,14 @@ static int compile_symlinks(
assert(params);
assert(ret_symlinks);
- for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
- for (size_t i = 0; i < context->directories[dt].n_items; i++) {
+ for (ExecDirectoryType dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++)
+ FOREACH_ARRAY(i, context->directories[dt].items, context->directories[dt].n_items) {
_cleanup_free_ char *private_path = NULL, *path = NULL;
- STRV_FOREACH(symlink, context->directories[dt].items[i].symlinks) {
+ STRV_FOREACH(symlink, i->symlinks) {
_cleanup_free_ char *src_abs = NULL, *dst_abs = NULL;
- src_abs = path_join(params->prefix[dt], context->directories[dt].items[i].path);
+ src_abs = path_join(params->prefix[dt], i->path);
dst_abs = path_join(params->prefix[dt], *symlink);
if (!src_abs || !dst_abs)
return -ENOMEM;
@@ -2810,14 +2759,14 @@ static int compile_symlinks(
if (!exec_directory_is_private(context, dt) ||
exec_context_with_rootfs(context) ||
- context->directories[dt].items[i].only_create)
+ i->only_create)
continue;
- private_path = path_join(params->prefix[dt], "private", context->directories[dt].items[i].path);
+ private_path = path_join(params->prefix[dt], "private", i->path);
if (!private_path)
return -ENOMEM;
- path = path_join(params->prefix[dt], context->directories[dt].items[i].path);
+ path = path_join(params->prefix[dt], i->path);
if (!path)
return -ENOMEM;
@@ -2825,18 +2774,16 @@ static int compile_symlinks(
if (r < 0)
return r;
}
- }
/* We make the host's os-release available via a symlink, so that we can copy it atomically
* and readers will never get a half-written version. Note that, while the paths specified here are
* absolute, when they are processed in namespace.c they will be made relative automatically, i.e.:
* 'os-release -> .os-release-stage/os-release' is what will be created. */
if (setup_os_release_symlink) {
- r = strv_extend(&symlinks, "/run/host/.os-release-stage/os-release");
- if (r < 0)
- return r;
-
- r = strv_extend(&symlinks, "/run/host/os-release");
+ r = strv_extend_many(
+ &symlinks,
+ "/run/host/.os-release-stage/os-release",
+ "/run/host/os-release");
if (r < 0)
return r;
}
@@ -2877,8 +2824,8 @@ static bool insist_on_sandboxing(
/* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
* essential. */
- for (size_t i = 0; i < n_bind_mounts; i++)
- if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
+ FOREACH_ARRAY(i, bind_mounts, n_bind_mounts)
+ if (!path_equal(i->source, i->destination))
return true;
if (context->log_namespace)
@@ -2887,13 +2834,33 @@ static bool insist_on_sandboxing(
return false;
}
-static int setup_ephemeral(const ExecContext *context, ExecRuntime *runtime) {
+static int setup_ephemeral(
+ const ExecContext *context,
+ ExecRuntime *runtime,
+ char **root_image, /* both input and output! modified if ephemeral logic enabled */
+ char **root_directory) { /* ditto */
+
_cleanup_close_ int fd = -EBADF;
+ _cleanup_free_ char *new_root = NULL;
int r;
+ assert(context);
+ assert(root_image);
+ assert(root_directory);
+
+ if (!*root_image && !*root_directory)
+ return 0;
+
if (!runtime || !runtime->ephemeral_copy)
return 0;
+ assert(runtime->ephemeral_storage_socket[0] >= 0);
+ assert(runtime->ephemeral_storage_socket[1] >= 0);
+
+ new_root = strdup(runtime->ephemeral_copy);
+ if (!new_root)
+ return log_oom_debug();
+
r = posix_lock(runtime->ephemeral_storage_socket[0], LOCK_EX);
if (r < 0)
return log_debug_errno(r, "Failed to lock ephemeral storage socket: %m");
@@ -2904,28 +2871,23 @@ static int setup_ephemeral(const ExecContext *context, ExecRuntime *runtime) {
if (fd >= 0)
/* We got an fd! That means ephemeral has already been set up, so nothing to do here. */
return 0;
-
if (fd != -EAGAIN)
return log_debug_errno(fd, "Failed to receive file descriptor queued on ephemeral storage socket: %m");
- log_debug("Making ephemeral snapshot of %s to %s",
- context->root_image ?: context->root_directory, runtime->ephemeral_copy);
+ if (*root_image) {
+ log_debug("Making ephemeral copy of %s to %s", *root_image, new_root);
- if (context->root_image)
- fd = copy_file(context->root_image, runtime->ephemeral_copy, O_EXCL, 0600,
- COPY_LOCK_BSD|COPY_REFLINK|COPY_CRTIME);
- else
- fd = btrfs_subvol_snapshot_at(AT_FDCWD, context->root_directory,
- AT_FDCWD, runtime->ephemeral_copy,
- BTRFS_SNAPSHOT_FALLBACK_COPY |
- BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
- BTRFS_SNAPSHOT_RECURSIVE |
- BTRFS_SNAPSHOT_LOCK_BSD);
- if (fd < 0)
- return log_debug_errno(fd, "Failed to snapshot %s to %s: %m",
- context->root_image ?: context->root_directory, runtime->ephemeral_copy);
+ fd = copy_file(*root_image,
+ new_root,
+ O_EXCL,
+ 0600,
+ COPY_LOCK_BSD|
+ COPY_REFLINK|
+ COPY_CRTIME);
+ if (fd < 0)
+ return log_debug_errno(fd, "Failed to copy image %s to %s: %m",
+ *root_image, new_root);
- if (context->root_image) {
/* A root image might be subject to lots of random writes so let's try to disable COW on it
* which tends to not perform well in combination with lots of random writes.
*
@@ -2934,13 +2896,35 @@ static int setup_ephemeral(const ExecContext *context, ExecRuntime *runtime) {
*/
r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
if (r < 0)
- log_debug_errno(fd, "Failed to disable copy-on-write for %s, ignoring: %m", runtime->ephemeral_copy);
+ log_debug_errno(r, "Failed to disable copy-on-write for %s, ignoring: %m", new_root);
+ } else {
+ assert(*root_directory);
+
+ log_debug("Making ephemeral snapshot of %s to %s", *root_directory, new_root);
+
+ fd = btrfs_subvol_snapshot_at(
+ AT_FDCWD, *root_directory,
+ AT_FDCWD, new_root,
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_LOCK_BSD);
+ if (fd < 0)
+ return log_debug_errno(fd, "Failed to snapshot directory %s to %s: %m",
+ *root_directory, new_root);
}
r = send_one_fd(runtime->ephemeral_storage_socket[1], fd, MSG_DONTWAIT);
if (r < 0)
return log_debug_errno(r, "Failed to queue file descriptor on ephemeral storage socket: %m");
+ if (*root_image)
+ free_and_replace(*root_image, new_root);
+ else {
+ assert(*root_directory);
+ free_and_replace(*root_directory, new_root);
+ }
+
return 1;
}
@@ -3000,22 +2984,80 @@ static int verity_settings_prepare(
return 0;
}
+static int pick_versions(
+ const ExecContext *context,
+ const ExecParameters *params,
+ char **ret_root_image,
+ char **ret_root_directory) {
+
+ int r;
+
+ assert(context);
+ assert(params);
+ assert(ret_root_image);
+ assert(ret_root_directory);
+
+ if (context->root_image) {
+ _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
+
+ r = path_pick(/* toplevel_path= */ NULL,
+ /* toplevel_fd= */ AT_FDCWD,
+ context->root_image,
+ &pick_filter_image_raw,
+ PICK_ARCHITECTURE|PICK_TRIES|PICK_RESOLVE,
+ &result);
+ if (r < 0)
+ return r;
+
+ if (!result.path)
+ return log_exec_debug_errno(context, params, SYNTHETIC_ERRNO(ENOENT), "No matching entry in .v/ directory %s found.", context->root_image);
+
+ *ret_root_image = TAKE_PTR(result.path);
+ *ret_root_directory = NULL;
+ return r;
+ }
+
+ if (context->root_directory) {
+ _cleanup_(pick_result_done) PickResult result = PICK_RESULT_NULL;
+
+ r = path_pick(/* toplevel_path= */ NULL,
+ /* toplevel_fd= */ AT_FDCWD,
+ context->root_directory,
+ &pick_filter_image_dir,
+ PICK_ARCHITECTURE|PICK_TRIES|PICK_RESOLVE,
+ &result);
+ if (r < 0)
+ return r;
+
+ if (!result.path)
+ return log_exec_debug_errno(context, params, SYNTHETIC_ERRNO(ENOENT), "No matching entry in .v/ directory %s found.", context->root_directory);
+
+ *ret_root_image = NULL;
+ *ret_root_directory = TAKE_PTR(result.path);
+ return r;
+ }
+
+ *ret_root_image = *ret_root_directory = NULL;
+ return 0;
+}
+
static int apply_mount_namespace(
ExecCommandFlags command_flags,
const ExecContext *context,
const ExecParameters *params,
ExecRuntime *runtime,
const char *memory_pressure_path,
+ bool needs_sandboxing,
char **error_path) {
_cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
_cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
**read_write_paths_cleanup = NULL;
_cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL,
- *extension_dir = NULL, *host_os_release_stage = NULL;
- const char *root_dir = NULL, *root_image = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
+ *extension_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
+ const char *tmp_dir = NULL, *var_tmp_dir = NULL;
char **read_write_paths;
- bool needs_sandboxing, setup_os_release_symlink;
+ bool setup_os_release_symlink;
BindMount *bind_mounts = NULL;
size_t n_bind_mounts = 0;
int r;
@@ -3025,14 +3067,21 @@ static int apply_mount_namespace(
CLEANUP_ARRAY(bind_mounts, n_bind_mounts, bind_mount_free_many);
if (params->flags & EXEC_APPLY_CHROOT) {
- r = setup_ephemeral(context, runtime);
+ r = pick_versions(
+ context,
+ params,
+ &root_image,
+ &root_dir);
if (r < 0)
return r;
- if (context->root_image)
- root_image = (runtime ? runtime->ephemeral_copy : NULL) ?: context->root_image;
- else
- root_dir = (runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory;
+ r = setup_ephemeral(
+ context,
+ runtime,
+ &root_image,
+ &root_dir);
+ if (r < 0)
+ return r;
}
r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
@@ -3054,7 +3103,6 @@ static int apply_mount_namespace(
} else
read_write_paths = context->read_write_paths;
- needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command_flags & EXEC_COMMAND_FULLY_PRIVILEGED);
if (needs_sandboxing) {
/* The runtime struct only contains the parent of the private /tmp, which is non-accessible
* to world users. Inside of it there's a /tmp that is sticky, and that's the one we want to
@@ -3084,11 +3132,9 @@ static int apply_mount_namespace(
params,
"shared mount propagation hidden by other fs namespacing unit settings: ignoring");
- if (FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
- r = exec_context_get_credential_directory(context, params, params->unit_id, &creds_path);
- if (r < 0)
- return r;
- }
+ r = exec_context_get_credential_directory(context, params, params->unit_id, &creds_path);
+ if (r < 0)
+ return r;
if (params->runtime_scope == RUNTIME_SCOPE_SYSTEM) {
propagate_dir = path_join("/run/systemd/propagate/", params->unit_id);
@@ -3246,31 +3292,39 @@ static int apply_working_directory(
const char *home,
int *exit_status) {
- const char *d, *wd;
+ const char *wd;
+ int r;
assert(context);
assert(exit_status);
if (context->working_directory_home) {
-
if (!home) {
*exit_status = EXIT_CHDIR;
return -ENXIO;
}
wd = home;
-
} else
wd = empty_to_root(context->working_directory);
if (params->flags & EXEC_APPLY_CHROOT)
- d = wd;
- else
- d = prefix_roota((runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory, wd);
+ r = RET_NERRNO(chdir(wd));
+ else {
+ _cleanup_close_ int dfd = -EBADF;
+
+ r = chase(wd,
+ (runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory,
+ CHASE_PREFIX_ROOT|CHASE_AT_RESOLVE_IN_ROOT,
+ /* ret_path= */ NULL,
+ &dfd);
+ if (r >= 0)
+ r = RET_NERRNO(fchdir(dfd));
+ }
- if (chdir(d) < 0 && !context->working_directory_missing_ok) {
+ if (r < 0 && !context->working_directory_missing_ok) {
*exit_status = EXIT_CHDIR;
- return -errno;
+ return r;
}
return 0;
@@ -3459,7 +3513,7 @@ static int close_remaining_fds(
const int *fds, size_t n_fds) {
size_t n_dont_close = 0;
- int dont_close[n_fds + 15];
+ int dont_close[n_fds + 16];
assert(params);
@@ -3495,6 +3549,9 @@ static int close_remaining_fds(
if (params->user_lookup_fd >= 0)
dont_close[n_dont_close++] = params->user_lookup_fd;
+ if (params->handoff_timestamp_fd >= 0)
+ dont_close[n_dont_close++] = params->handoff_timestamp_fd;
+
assert(n_dont_close <= ELEMENTSOF(dont_close));
return close_all_fds(dont_close, n_dont_close);
@@ -3528,26 +3585,29 @@ static int send_user_lookup(
return 0;
}
-static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
+static int acquire_home(const ExecContext *c, const char **home, char **ret_buf) {
int r;
assert(c);
assert(home);
- assert(buf);
+ assert(ret_buf);
/* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
- if (*home)
+ if (*home) /* Already acquired from get_fixed_user()? */
return 0;
if (!c->working_directory_home)
return 0;
- r = get_home_dir(buf);
+ if (c->dynamic_user)
+ return -EADDRNOTAVAIL;
+
+ r = get_home_dir(ret_buf);
if (r < 0)
return r;
- *home = *buf;
+ *home = *ret_buf;
return 1;
}
@@ -3641,11 +3701,12 @@ static int add_shifted_fd(int *fds, size_t fds_size, size_t *n_fds, int *fd) {
}
static int connect_unix_harder(const ExecContext *c, const ExecParameters *p, const OpenFile *of, int ofd) {
+ static const int socket_types[] = { SOCK_DGRAM, SOCK_STREAM, SOCK_SEQPACKET };
+
union sockaddr_union addr = {
.un.sun_family = AF_UNIX,
};
socklen_t sa_len;
- static const int socket_types[] = { SOCK_DGRAM, SOCK_STREAM, SOCK_SEQPACKET };
int r;
assert(c);
@@ -3655,43 +3716,35 @@ static int connect_unix_harder(const ExecContext *c, const ExecParameters *p, co
r = sockaddr_un_set_path(&addr.un, FORMAT_PROC_FD_PATH(ofd));
if (r < 0)
- return log_exec_error_errno(c, p, r, "Failed to set sockaddr for %s: %m", of->path);
-
+ return log_exec_error_errno(c, p, r, "Failed to set sockaddr for '%s': %m", of->path);
sa_len = r;
- for (size_t i = 0; i < ELEMENTSOF(socket_types); i++) {
+ FOREACH_ELEMENT(i, socket_types) {
_cleanup_close_ int fd = -EBADF;
- fd = socket(AF_UNIX, socket_types[i] | SOCK_CLOEXEC, 0);
+ fd = socket(AF_UNIX, *i|SOCK_CLOEXEC, 0);
if (fd < 0)
- return log_exec_error_errno(c,
- p,
- errno,
- "Failed to create socket for %s: %m",
+ return log_exec_error_errno(c, p,
+ errno, "Failed to create socket for '%s': %m",
of->path);
r = RET_NERRNO(connect(fd, &addr.sa, sa_len));
- if (r == -EPROTOTYPE)
- continue;
- if (r < 0)
- return log_exec_error_errno(c,
- p,
- r,
- "Failed to connect socket for %s: %m",
+ if (r >= 0)
+ return TAKE_FD(fd);
+ if (r != -EPROTOTYPE)
+ return log_exec_error_errno(c, p,
+ r, "Failed to connect to socket for '%s': %m",
of->path);
-
- return TAKE_FD(fd);
}
- return log_exec_error_errno(c,
- p,
- SYNTHETIC_ERRNO(EPROTOTYPE), "Failed to connect socket for \"%s\".",
+ return log_exec_error_errno(c, p,
+ SYNTHETIC_ERRNO(EPROTOTYPE), "No suitable socket type to connect to socket '%s'.",
of->path);
}
static int get_open_file_fd(const ExecContext *c, const ExecParameters *p, const OpenFile *of) {
- struct stat st;
_cleanup_close_ int fd = -EBADF, ofd = -EBADF;
+ struct stat st;
assert(c);
assert(p);
@@ -3699,10 +3752,10 @@ static int get_open_file_fd(const ExecContext *c, const ExecParameters *p, const
ofd = open(of->path, O_PATH | O_CLOEXEC);
if (ofd < 0)
- return log_exec_error_errno(c, p, errno, "Could not open \"%s\": %m", of->path);
+ return log_exec_error_errno(c, p, errno, "Failed to open '%s' as O_PATH: %m", of->path);
if (fstat(ofd, &st) < 0)
- return log_exec_error_errno(c, p, errno, "Failed to stat %s: %m", of->path);
+ return log_exec_error_errno(c, p, errno, "Failed to stat '%s': %m", of->path);
if (S_ISSOCK(st.st_mode)) {
fd = connect_unix_harder(c, p, of, ofd);
@@ -3710,10 +3763,11 @@ static int get_open_file_fd(const ExecContext *c, const ExecParameters *p, const
return fd;
if (FLAGS_SET(of->flags, OPENFILE_READ_ONLY) && shutdown(fd, SHUT_WR) < 0)
- return log_exec_error_errno(c, p, errno, "Failed to shutdown send for socket %s: %m",
+ return log_exec_error_errno(c, p,
+ errno, "Failed to shutdown send for socket '%s': %m",
of->path);
- log_exec_debug(c, p, "socket %s opened (fd=%d)", of->path, fd);
+ log_exec_debug(c, p, "Opened socket '%s' as fd %d.", of->path, fd);
} else {
int flags = FLAGS_SET(of->flags, OPENFILE_READ_ONLY) ? O_RDONLY : O_RDWR;
if (FLAGS_SET(of->flags, OPENFILE_APPEND))
@@ -3723,9 +3777,9 @@ static int get_open_file_fd(const ExecContext *c, const ExecParameters *p, const
fd = fd_reopen(ofd, flags | O_CLOEXEC);
if (fd < 0)
- return log_exec_error_errno(c, p, fd, "Failed to open file %s: %m", of->path);
+ return log_exec_error_errno(c, p, fd, "Failed to reopen file '%s': %m", of->path);
- log_exec_debug(c, p, "file %s opened (fd=%d)", of->path, fd);
+ log_exec_debug(c, p, "Opened file '%s' as fd %d.", of->path, fd);
}
return TAKE_FD(fd);
@@ -3744,7 +3798,9 @@ static int collect_open_file_fds(const ExecContext *c, ExecParameters *p, size_t
fd = get_open_file_fd(c, p, of);
if (fd < 0) {
if (FLAGS_SET(of->flags, OPENFILE_GRACEFUL)) {
- log_exec_debug_errno(c, p, fd, "Failed to get OpenFile= file descriptor for %s, ignoring: %m", of->path);
+ log_exec_warning_errno(c, p, fd,
+ "Failed to get OpenFile= file descriptor for '%s', ignoring: %m",
+ of->path);
continue;
}
@@ -3758,9 +3814,7 @@ static int collect_open_file_fds(const ExecContext *c, ExecParameters *p, size_t
if (r < 0)
return r;
- p->fds[*n_fds] = TAKE_FD(fd);
-
- (*n_fds)++;
+ p->fds[(*n_fds)++] = TAKE_FD(fd);
}
return 0;
@@ -3810,7 +3864,7 @@ static bool exec_context_need_unprivileged_private_users(
context->private_ipc ||
context->ipc_namespace_path ||
context->private_mounts > 0 ||
- context->mount_apivfs ||
+ context->mount_apivfs > 0 ||
context->n_bind_mounts > 0 ||
context->n_temporary_filesystems > 0 ||
context->root_directory ||
@@ -3920,6 +3974,52 @@ static void exec_params_close(ExecParameters *p) {
p->stderr_fd = safe_close(p->stderr_fd);
}
+static int exec_fd_mark_hot(
+ const ExecContext *c,
+ ExecParameters *p,
+ bool hot,
+ int *reterr_exit_status) {
+
+ assert(c);
+ assert(p);
+
+ if (p->exec_fd < 0)
+ return 0;
+
+ uint8_t x = hot;
+
+ if (write(p->exec_fd, &x, sizeof(x)) < 0) {
+ if (reterr_exit_status)
+ *reterr_exit_status = EXIT_EXEC;
+ return log_exec_error_errno(c, p, errno, "Failed to mark exec_fd as %s: %m", hot ? "hot" : "cold");
+ }
+
+ return 1;
+}
+
+static int send_handoff_timestamp(
+ const ExecContext *c,
+ ExecParameters *p,
+ int *reterr_exit_status) {
+
+ assert(c);
+ assert(p);
+
+ if (p->handoff_timestamp_fd < 0)
+ return 0;
+
+ dual_timestamp dt;
+ dual_timestamp_now(&dt);
+
+ if (send(p->handoff_timestamp_fd, (const usec_t[2]) { dt.realtime, dt.monotonic }, sizeof(usec_t) * 2, 0) < 0) {
+ if (reterr_exit_status)
+ *reterr_exit_status = EXIT_EXEC;
+ return log_exec_error_errno(c, p, errno, "Failed to send handoff timestamp: %m");
+ }
+
+ return 1;
+}
+
int exec_invoke(
const ExecCommand *command,
const ExecContext *context,
@@ -3974,6 +4074,8 @@ int exec_invoke(
assert(params);
assert(exit_status);
+ /* This should be mostly redundant, as the log level is also passed as an argument of the executor,
+ * and is already applied earlier. Just for safety. */
if (context->log_level_max >= 0)
log_set_max_level(context->log_level_max);
@@ -4049,7 +4151,7 @@ int exec_invoke(
return log_exec_error_errno(context, params, r, "Failed to get OpenFile= file descriptors: %m");
}
- int keep_fds[n_fds + 3];
+ int keep_fds[n_fds + 4];
memcpy_safe(keep_fds, params->fds, n_fds * sizeof(int));
n_keep_fds = n_fds;
@@ -4059,8 +4161,14 @@ int exec_invoke(
return log_exec_error_errno(context, params, r, "Failed to collect shifted fd: %m");
}
+ r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, &params->handoff_timestamp_fd);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_exec_error_errno(context, params, r, "Failed to collect shifted fd: %m");
+ }
+
#if HAVE_LIBBPF
- r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, &params->bpf_outer_map_fd);
+ r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, &params->bpf_restrict_fs_map_fd);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_exec_error_errno(context, params, r, "Failed to collect shifted fd: %m");
@@ -4099,7 +4207,7 @@ int exec_invoke(
*exit_status = EXIT_CONFIRM;
return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ECANCELED),
- "Execution cancelled by the user");
+ "Execution cancelled by the user.");
}
}
@@ -4141,12 +4249,12 @@ int exec_invoke(
if (!uid_is_valid(uid)) {
*exit_status = EXIT_USER;
- return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ESRCH), "UID validation failed for \""UID_FMT"\"", uid);
+ return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ESRCH), "UID validation failed for \""UID_FMT"\".", uid);
}
if (!gid_is_valid(gid)) {
*exit_status = EXIT_USER;
- return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ESRCH), "GID validation failed for \""GID_FMT"\"", gid);
+ return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(ESRCH), "GID validation failed for \""GID_FMT"\".", gid);
}
if (runtime->dynamic_creds->user)
@@ -4186,7 +4294,7 @@ int exec_invoke(
params->user_lookup_fd = safe_close(params->user_lookup_fd);
- r = acquire_home(context, uid, &home, &home_buffer);
+ r = acquire_home(context, &home, &home_buffer);
if (r < 0) {
*exit_status = EXIT_CHDIR;
return log_exec_error_errno(context, params, r, "Failed to determine $HOME for user: %m");
@@ -4210,9 +4318,10 @@ int exec_invoke(
r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
if (r == -EUCLEAN) {
*exit_status = EXIT_CGROUP;
- return log_exec_error_errno(context, params, r, "Failed to attach process to cgroup %s "
+ return log_exec_error_errno(context, params, r,
+ "Failed to attach process to cgroup '%s', "
"because the cgroup or one of its parents or "
- "siblings is in the threaded mode: %m", p);
+ "siblings is in the threaded mode.", p);
}
if (r < 0) {
*exit_status = EXIT_CGROUP;
@@ -4242,13 +4351,20 @@ int exec_invoke(
return log_exec_error_errno(context, params, r, "Failed to set up standard input: %m");
}
- r = setup_output(context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ _cleanup_free_ char *fname = NULL;
+ r = path_extract_filename(command->path, &fname);
+ if (r < 0) {
+ *exit_status = EXIT_STDOUT;
+ return log_exec_error_errno(context, params, r, "Failed to extract filename from path %s: %m", command->path);
+ }
+
+ r = setup_output(context, params, STDOUT_FILENO, socket_fd, named_iofds, fname, uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDOUT;
return log_exec_error_errno(context, params, r, "Failed to set up standard output: %m");
}
- r = setup_output(context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
+ r = setup_output(context, params, STDERR_FILENO, socket_fd, named_iofds, fname, uid, gid, &journal_stream_dev, &journal_stream_ino);
if (r < 0) {
*exit_status = EXIT_STDERR;
return log_exec_error_errno(context, params, r, "Failed to set up standard error output: %m");
@@ -4445,12 +4561,10 @@ int exec_invoke(
return log_exec_error_errno(context, params, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
}
- if (FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
- r = exec_setup_credentials(context, params, params->unit_id, uid, gid);
- if (r < 0) {
- *exit_status = EXIT_CREDENTIALS;
- return log_exec_error_errno(context, params, r, "Failed to set up credentials: %m");
- }
+ r = exec_setup_credentials(context, params, params->unit_id, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_CREDENTIALS;
+ return log_exec_error_errno(context, params, r, "Failed to set up credentials: %m");
}
r = build_environment(
@@ -4567,7 +4681,7 @@ int exec_invoke(
* wins here. (See above.) */
/* All fds passed in the fds array will be closed in the pam child process. */
- r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, params->fds, n_fds);
+ r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, params->fds, n_fds, params->exec_fd);
if (r < 0) {
*exit_status = EXIT_PAM;
return log_exec_error_errno(context, params, r, "Failed to set up PAM session: %m");
@@ -4639,7 +4753,7 @@ int exec_invoke(
if (ns_type_supported(NAMESPACE_IPC)) {
r = setup_shareable_ns(runtime->shared->ipcns_storage_socket, CLONE_NEWIPC);
- if (r == -EPERM)
+ if (ERRNO_IS_NEG_PRIVILEGE(r))
log_exec_warning_errno(context, params, r,
"PrivateIPC=yes is configured, but IPC namespace setup failed, ignoring: %m");
else if (r < 0) {
@@ -4657,7 +4771,13 @@ int exec_invoke(
if (needs_mount_namespace) {
_cleanup_free_ char *error_path = NULL;
- r = apply_mount_namespace(command->flags, context, params, runtime, memory_pressure_path, &error_path);
+ r = apply_mount_namespace(command->flags,
+ context,
+ params,
+ runtime,
+ memory_pressure_path,
+ needs_sandboxing,
+ &error_path);
if (r < 0) {
*exit_status = EXIT_NAMESPACE;
return log_exec_error_errno(context, params, r, "Failed to set up mount namespacing%s%s: %m",
@@ -4672,7 +4792,7 @@ int exec_invoke(
}
if (context->memory_ksm >= 0)
- if (prctl(PR_SET_MEMORY_MERGE, context->memory_ksm) < 0) {
+ if (prctl(PR_SET_MEMORY_MERGE, context->memory_ksm, 0, 0, 0) < 0) {
if (ERRNO_IS_NOT_SUPPORTED(errno))
log_exec_debug_errno(context,
params,
@@ -4731,26 +4851,16 @@ int exec_invoke(
_cleanup_close_ int executable_fd = -EBADF;
r = find_executable_full(command->path, /* root= */ NULL, context->exec_search_path, false, &executable, &executable_fd);
if (r < 0) {
- if (r != -ENOMEM && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
- log_exec_struct_errno(context, params, LOG_INFO, r,
- "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
- LOG_EXEC_INVOCATION_ID(params),
- LOG_EXEC_MESSAGE(params,
- "Executable %s missing, skipping: %m",
- command->path),
- "EXECUTABLE=%s", command->path);
- *exit_status = EXIT_SUCCESS;
- return 0;
- }
-
*exit_status = EXIT_EXEC;
- return log_exec_struct_errno(context, params, LOG_INFO, r,
- "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
- LOG_EXEC_INVOCATION_ID(params),
- LOG_EXEC_MESSAGE(params,
- "Failed to locate executable %s: %m",
- command->path),
- "EXECUTABLE=%s", command->path);
+ log_exec_struct_errno(context, params, LOG_NOTICE, r,
+ "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
+ LOG_EXEC_MESSAGE(params,
+ "Unable to locate executable '%s': %m",
+ command->path),
+ "EXECUTABLE=%s", command->path);
+ /* If the error will be ignored by manager, tune down the log level here. Missing executable
+ * is very much expected in this case. */
+ return r != -ENOMEM && FLAGS_SET(command->flags, EXEC_COMMAND_IGNORE_FAILURE) ? 1 : r;
}
r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, &executable_fd);
@@ -4791,15 +4901,16 @@ int exec_invoke(
/* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that
* we are more aggressive this time, since we don't need socket_fd and the netns and ipcns fds any
- * more. We do keep exec_fd however, if we have it, since we need to keep it open until the final
- * execve(). But first, close the remaining sockets in the context objects. */
+ * more. We do keep exec_fd and handoff_timestamp_fd however, if we have it, since we need to keep
+ * them open until the final execve(). But first, close the remaining sockets in the context
+ * objects. */
exec_runtime_close(runtime);
exec_params_close(params);
r = close_all_fds(keep_fds, n_keep_fds);
if (r >= 0)
- r = shift_fds(params->fds, n_fds);
+ r = pack_fds(params->fds, n_fds);
if (r >= 0)
r = flag_fds(params->fds, n_socket_fds, n_fds, context->non_blocking);
if (r < 0) {
@@ -4945,8 +5056,10 @@ int exec_invoke(
}
}
- /* Apply working directory here, because the working directory might be on NFS and only the user running
- * this service might have the correct privilege to change to the working directory */
+ /* Apply working directory here, because the working directory might be on NFS and only the user
+ * running this service might have the correct privilege to change to the working directory. Also, it
+ * is absolutely 💣 crucial 💣 we applied all mount namespacing rearrangements before this, so that
+ * the cwd cannot be used to pin directories outside of the sandbox. */
r = apply_working_directory(context, params, runtime, home, exit_status);
if (r < 0)
return log_exec_error_errno(context, params, r, "Changing to the requested working directory failed: %m");
@@ -5206,31 +5319,29 @@ int exec_invoke(
log_command_line(context, params, "Executing", executable, final_argv);
- if (params->exec_fd >= 0) {
- uint8_t hot = 1;
+ /* We have finished with all our initializations. Let's now let the manager know that. From this
+ * point on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
- /* We have finished with all our initializations. Let's now let the manager know that. From this point
- * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
+ r = exec_fd_mark_hot(context, params, /* hot= */ true, exit_status);
+ if (r < 0)
+ return r;
- if (write(params->exec_fd, &hot, sizeof(hot)) < 0) {
- *exit_status = EXIT_EXEC;
- return log_exec_error_errno(context, params, errno, "Failed to enable exec_fd: %m");
- }
+ /* As last thing before the execve(), let's send the handoff timestamp */
+ r = send_handoff_timestamp(context, params, exit_status);
+ if (r < 0) {
+ /* If this handoff timestamp failed, let's undo the marking as hot */
+ (void) exec_fd_mark_hot(context, params, /* hot= */ false, /* reterr_exit_status= */ NULL);
+ return r;
}
- r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
-
- if (params->exec_fd >= 0) {
- uint8_t hot = 0;
+ /* NB: we leave executable_fd, exec_fd, handoff_timestamp_fd open here. This is safe, because they
+ * have O_CLOEXEC set, and the execve() below will thus automatically close them. In fact, for
+ * exec_fd this is pretty much the whole raison d'etre. */
- /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
- * that POLLHUP on it no longer means execve() succeeded. */
+ r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
- if (write(params->exec_fd, &hot, sizeof(hot)) < 0) {
- *exit_status = EXIT_EXEC;
- return log_exec_error_errno(context, params, errno, "Failed to disable exec_fd: %m");
- }
- }
+ /* The execve() failed, let's undo the marking as hot */
+ (void) exec_fd_mark_hot(context, params, /* hot= */ false, /* reterr_exit_status= */ NULL);
*exit_status = EXIT_EXEC;
return log_exec_error_errno(context, params, r, "Failed to execute %s: %m", executable);