diff options
Diffstat (limited to '')
-rw-r--r-- | src/basic/process-util.c | 270 |
1 files changed, 219 insertions, 51 deletions
diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 4492e7d..c9d968d 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -25,6 +25,7 @@ #include "alloc-util.h" #include "architecture.h" #include "argv-util.h" +#include "cgroup-util.h" #include "dirent-util.h" #include "env-file.h" #include "env-util.h" @@ -510,7 +511,7 @@ static int get_process_link_contents(pid_t pid, const char *proc_file, char **re p = procfs_file_alloca(pid, proc_file); r = readlink_malloc(p, ret); - return r == -ENOENT ? -ESRCH : r; + return (r == -ENOENT && proc_mounted() > 0) ? -ESRCH : r; } int get_process_exe(pid_t pid, char **ret) { @@ -730,6 +731,82 @@ int get_process_ppid(pid_t pid, pid_t *ret) { return 0; } +int pid_get_start_time(pid_t pid, uint64_t *ret) { + _cleanup_free_ char *line = NULL; + const char *p; + int r; + + assert(pid >= 0); + + p = procfs_file_alloca(pid, "stat"); + r = read_one_line_file(p, &line); + if (r == -ENOENT) + return -ESRCH; + if (r < 0) + return r; + + /* Let's skip the pid and comm fields. The latter is enclosed in () but does not escape any () in its + * value, so let's skip over it manually */ + + p = strrchr(line, ')'); + if (!p) + return -EIO; + + p++; + + unsigned long llu; + + if (sscanf(p, " " + "%*c " /* state */ + "%*u " /* ppid */ + "%*u " /* pgrp */ + "%*u " /* session */ + "%*u " /* tty_nr */ + "%*u " /* tpgid */ + "%*u " /* flags */ + "%*u " /* minflt */ + "%*u " /* cminflt */ + "%*u " /* majflt */ + "%*u " /* cmajflt */ + "%*u " /* utime */ + "%*u " /* stime */ + "%*u " /* cutime */ + "%*u " /* cstime */ + "%*i " /* priority */ + "%*i " /* nice */ + "%*u " /* num_threads */ + "%*u " /* itrealvalue */ + "%lu ", /* starttime */ + &llu) != 1) + return -EIO; + + if (ret) + *ret = llu; + + return 0; +} + +int pidref_get_start_time(const PidRef *pid, uint64_t *ret) { + uint64_t t; + int r; + + if (!pidref_is_set(pid)) + return -ESRCH; + + r = pid_get_start_time(pid->pid, ret ? &t : NULL); + if (r < 0) + return r; + + r = pidref_verify(pid); + if (r < 0) + return r; + + if (ret) + *ret = t; + + return 0; +} + int get_process_umask(pid_t pid, mode_t *ret) { _cleanup_free_ char *m = NULL; const char *p; @@ -946,31 +1023,16 @@ int kill_and_sigcont(pid_t pid, int sig) { int getenv_for_pid(pid_t pid, const char *field, char **ret) { _cleanup_fclose_ FILE *f = NULL; - char *value = NULL; const char *path; - size_t l, sum = 0; + size_t sum = 0; int r; assert(pid >= 0); assert(field); assert(ret); - if (pid == 0 || pid == getpid_cached()) { - const char *e; - - e = getenv(field); - if (!e) { - *ret = NULL; - return 0; - } - - value = strdup(e); - if (!value) - return -ENOMEM; - - *ret = value; - return 1; - } + if (pid == 0 || pid == getpid_cached()) + return strdup_to_full(ret, getenv(field)); if (!pid_is_valid(pid)) return -EINVAL; @@ -983,9 +1045,9 @@ int getenv_for_pid(pid_t pid, const char *field, char **ret) { if (r < 0) return r; - l = strlen(field); for (;;) { _cleanup_free_ char *line = NULL; + const char *match; if (sum > ENVIRONMENT_BLOCK_MAX) /* Give up searching eventually */ return -ENOBUFS; @@ -998,14 +1060,9 @@ int getenv_for_pid(pid_t pid, const char *field, char **ret) { sum += r; - if (strneq(line, field, l) && line[l] == '=') { - value = strdup(line + l + 1); - if (!value) - return -ENOMEM; - - *ret = value; - return 1; - } + match = startswith(line, field); + if (match && *match == '=') + return strdup_to_full(ret, match + 1); } *ret = NULL; @@ -1112,8 +1169,10 @@ int pidref_is_alive(const PidRef *pidref) { return -ESRCH; result = pid_is_alive(pidref->pid); - if (result < 0) + if (result < 0) { + assert(result != -ESRCH); return result; + } r = pidref_verify(pidref); if (r == -ESRCH) @@ -1224,7 +1283,7 @@ int opinionated_personality(unsigned long *ret) { if (current < 0) return current; - if (((unsigned long) current & 0xffff) == PER_LINUX32) + if (((unsigned long) current & OPINIONATED_PERSONALITY_MASK) == PER_LINUX32) *ret = PER_LINUX32; else *ret = PER_LINUX; @@ -1389,7 +1448,7 @@ static int fork_flags_to_signal(ForkFlags flags) { int safe_fork_full( const char *name, const int stdio_fds[3], - const int except_fds[], + int except_fds[], size_t n_except_fds, ForkFlags flags, pid_t *ret_pid) { @@ -1462,10 +1521,11 @@ int safe_fork_full( } } - if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS)) != 0) + if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS)) != 0) pid = raw_clone(SIGCHLD| (FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) | - (FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0)); + (FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0) | + (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0)); else pid = fork(); if (pid < 0) @@ -1589,6 +1649,9 @@ int safe_fork_full( log_full_errno(prio, r, "Failed to rearrange stdio fds: %m"); _exit(EXIT_FAILURE); } + + /* Turn off O_NONBLOCK on the fdio fds, in case it was left on */ + stdio_disable_nonblock(); } else { r = make_null_stdio(); if (r < 0) { @@ -1614,6 +1677,19 @@ int safe_fork_full( } } + if (flags & FORK_PACK_FDS) { + /* FORK_CLOSE_ALL_FDS ensures that except_fds are the only FDs >= 3 that are + * open, this is including the log. This is required by pack_fds, which will + * get stuck in an infinite loop of any FDs other than except_fds are open. */ + assert(FLAGS_SET(flags, FORK_CLOSE_ALL_FDS)); + + r = pack_fds(except_fds, n_except_fds); + if (r < 0) { + log_full_errno(prio, r, "Failed to pack file descriptors: %m"); + _exit(EXIT_FAILURE); + } + } + if (flags & FORK_CLOEXEC_OFF) { r = fd_cloexec_many(except_fds, n_except_fds, false); if (r < 0) { @@ -1650,10 +1726,34 @@ int safe_fork_full( return 0; } +int pidref_safe_fork_full( + const char *name, + const int stdio_fds[3], + int except_fds[], + size_t n_except_fds, + ForkFlags flags, + PidRef *ret_pid) { + + pid_t pid; + int r, q; + + assert(!FLAGS_SET(flags, FORK_WAIT)); + + r = safe_fork_full(name, stdio_fds, except_fds, n_except_fds, flags, &pid); + if (r < 0) + return r; + + q = pidref_set_pid(ret_pid, pid); + if (q < 0) /* Let's not fail for this, no matter what, the process exists after all, and that's key */ + *ret_pid = PIDREF_MAKE_FROM_PID(pid); + + return r; +} + int namespace_fork( const char *outer_name, const char *inner_name, - const int except_fds[], + int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, @@ -1927,47 +2027,115 @@ int make_reaper_process(bool b) { return 0; } -int posix_spawn_wrapper(const char *path, char *const *argv, char *const *envp, pid_t *ret_pid) { +DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(posix_spawnattr_t*, posix_spawnattr_destroy, NULL); + +int posix_spawn_wrapper( + const char *path, + char * const *argv, + char * const *envp, + const char *cgroup, + PidRef *ret_pidref) { + + short flags = POSIX_SPAWN_SETSIGMASK|POSIX_SPAWN_SETSIGDEF; posix_spawnattr_t attr; sigset_t mask; - pid_t pid; int r; /* Forks and invokes 'path' with 'argv' and 'envp' using CLONE_VM and CLONE_VFORK, which means the * caller will be blocked until the child either exits or exec's. The memory of the child will be * fully shared with the memory of the parent, so that there are no copy-on-write or memory.max - * issues. */ + * issues. + * + * Also, move the newly-created process into 'cgroup' through POSIX_SPAWN_SETCGROUP (clone3()) + * if available. Note that CLONE_INTO_CGROUP is only supported on cgroup v2. + * returns 1: We're already in the right cgroup + * 0: 'cgroup' not specified or POSIX_SPAWN_SETCGROUP is not supported. The caller + * needs to call 'cg_attach' on their own */ assert(path); assert(argv); - assert(ret_pid); + assert(ret_pidref); assert_se(sigfillset(&mask) >= 0); r = posix_spawnattr_init(&attr); if (r != 0) return -r; /* These functions return a positive errno on failure */ - /* Set all signals to SIG_DFL */ - r = posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETSIGMASK|POSIX_SPAWN_SETSIGDEF); + + /* Initialization needs to succeed before we can set up a destructor. */ + _unused_ _cleanup_(posix_spawnattr_destroyp) posix_spawnattr_t *attr_destructor = &attr; + +#if HAVE_PIDFD_SPAWN + _cleanup_close_ int cgroup_fd = -EBADF; + + if (cgroup) { + _cleanup_free_ char *resolved_cgroup = NULL; + + r = cg_get_path_and_check( + SYSTEMD_CGROUP_CONTROLLER, + cgroup, + /* suffix= */ NULL, + &resolved_cgroup); + if (r < 0) + return r; + + cgroup_fd = open(resolved_cgroup, O_PATH|O_DIRECTORY|O_CLOEXEC); + if (cgroup_fd < 0) + return -errno; + + r = posix_spawnattr_setcgroup_np(&attr, cgroup_fd); + if (r != 0) + return -r; + + flags |= POSIX_SPAWN_SETCGROUP; + } +#endif + + r = posix_spawnattr_setflags(&attr, flags); if (r != 0) - goto fail; + return -r; r = posix_spawnattr_setsigmask(&attr, &mask); if (r != 0) - goto fail; + return -r; - r = posix_spawn(&pid, path, NULL, &attr, argv, envp); +#if HAVE_PIDFD_SPAWN + _cleanup_close_ int pidfd = -EBADF; + + r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp); + if (r == 0) { + r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd)); + if (r < 0) + return r; + + return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP); + } + if (ERRNO_IS_NOT_SUPPORTED(r)) { + /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode. */ + if (cgroup && cg_is_threaded(cgroup) > 0) + return -EUCLEAN; + + /* clone3() not available? */ + } else if (!ERRNO_IS_PRIVILEGE(r)) + return -r; + + /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but need to change the + * flags to remove the cgroup one, which is what redirects to clone3() */ + flags &= ~POSIX_SPAWN_SETCGROUP; + r = posix_spawnattr_setflags(&attr, flags); if (r != 0) - goto fail; + return -r; +#endif - *ret_pid = pid; + pid_t pid; + r = posix_spawn(&pid, path, NULL, &attr, argv, envp); + if (r != 0) + return -r; - posix_spawnattr_destroy(&attr); - return 0; + r = pidref_set_pid(ret_pidref, pid); + if (r < 0) + return r; -fail: - assert(r > 0); - posix_spawnattr_destroy(&attr); - return -r; + return 0; /* We did not use CLONE_INTO_CGROUP so return 0, the caller will have to move the child */ } int proc_dir_open(DIR **ret) { |