From 2c3c1048746a4622d8c89a29670120dc8fab93c4 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:49:45 +0200 Subject: Adding upstream version 6.1.76. Signed-off-by: Daniel Baumann --- tools/testing/selftests/pidfd/.gitignore | 8 + tools/testing/selftests/pidfd/Makefile | 8 + tools/testing/selftests/pidfd/config | 7 + tools/testing/selftests/pidfd/pidfd.h | 118 +++++ tools/testing/selftests/pidfd/pidfd_fdinfo_test.c | 310 ++++++++++++ tools/testing/selftests/pidfd/pidfd_getfd_test.c | 246 +++++++++ tools/testing/selftests/pidfd/pidfd_open_test.c | 163 ++++++ tools/testing/selftests/pidfd/pidfd_poll_test.c | 116 +++++ tools/testing/selftests/pidfd/pidfd_setns_test.c | 559 +++++++++++++++++++++ tools/testing/selftests/pidfd/pidfd_test.c | 575 ++++++++++++++++++++++ tools/testing/selftests/pidfd/pidfd_wait.c | 233 +++++++++ 11 files changed, 2343 insertions(+) create mode 100644 tools/testing/selftests/pidfd/.gitignore create mode 100644 tools/testing/selftests/pidfd/Makefile create mode 100644 tools/testing/selftests/pidfd/config create mode 100644 tools/testing/selftests/pidfd/pidfd.h create mode 100644 tools/testing/selftests/pidfd/pidfd_fdinfo_test.c create mode 100644 tools/testing/selftests/pidfd/pidfd_getfd_test.c create mode 100644 tools/testing/selftests/pidfd/pidfd_open_test.c create mode 100644 tools/testing/selftests/pidfd/pidfd_poll_test.c create mode 100644 tools/testing/selftests/pidfd/pidfd_setns_test.c create mode 100644 tools/testing/selftests/pidfd/pidfd_test.c create mode 100644 tools/testing/selftests/pidfd/pidfd_wait.c (limited to 'tools/testing/selftests/pidfd') diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore new file mode 100644 index 000000000..973198a3e --- /dev/null +++ b/tools/testing/selftests/pidfd/.gitignore @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +pidfd_open_test +pidfd_poll_test +pidfd_test +pidfd_wait +pidfd_fdinfo_test +pidfd_getfd_test +pidfd_setns_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile new file mode 100644 index 000000000..d731e3e76 --- /dev/null +++ b/tools/testing/selftests/pidfd/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall + +TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ + pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test + +include ../lib.mk + diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config new file mode 100644 index 000000000..f6f2965e1 --- /dev/null +++ b/tools/testing/selftests/pidfd/config @@ -0,0 +1,7 @@ +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y +CONFIG_NET_NS=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h new file mode 100644 index 000000000..6922d6417 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PIDFD_H +#define __PIDFD_H + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest.h" + +#ifndef P_PIDFD +#define P_PIDFD 3 +#endif + +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 +#endif + +#ifndef CLONE_PIDFD +#define CLONE_PIDFD 0x00001000 +#endif + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open -1 +#endif + +#ifndef __NR_pidfd_send_signal +#define __NR_pidfd_send_signal -1 +#endif + +#ifndef __NR_clone3 +#define __NR_clone3 -1 +#endif + +#ifndef __NR_pidfd_getfd +#define __NR_pidfd_getfd -1 +#endif + +#ifndef PIDFD_NONBLOCK +#define PIDFD_NONBLOCK O_NONBLOCK +#endif + +/* + * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c + * That means, when it wraps around any pid < 300 will be skipped. + * So we need to use a pid > 300 in order to test recycling. + */ +#define PID_RECYCLE 1000 + +/* + * Define a few custom error codes for the child process to clearly indicate + * what is happening. This way we can tell the difference between a system + * error, a test error, etc. + */ +#define PIDFD_PASS 0 +#define PIDFD_FAIL 1 +#define PIDFD_ERROR 2 +#define PIDFD_SKIP 3 +#define PIDFD_XFAIL 4 + +static inline int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + ksft_print_msg("waitpid returned -1, errno=%d\n", errno); + return -1; + } + + if (!WIFEXITED(status)) { + ksft_print_msg( + "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n", + WIFSIGNALED(status), WTERMSIG(status)); + return -1; + } + + ret = WEXITSTATUS(status); + ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret); + return ret; +} + +static inline int sys_pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, + unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +static inline int sys_pidfd_getfd(int pidfd, int fd, int flags) +{ + return syscall(__NR_pidfd_getfd, pidfd, fd, flags); +} + +static inline int sys_memfd_create(const char *name, unsigned int flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + +#endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c new file mode 100644 index 000000000..3bc46d615 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest.h" + +struct error { + int code; + char msg[512]; +}; + +static int error_set(struct error *err, int code, const char *fmt, ...) +{ + va_list args; + int r; + + if (code == PIDFD_PASS || !err || err->code != PIDFD_PASS) + return code; + + err->code = code; + va_start(args, fmt); + r = vsnprintf(err->msg, sizeof(err->msg), fmt, args); + assert((size_t)r < sizeof(err->msg)); + va_end(args); + + return code; +} + +static void error_report(struct error *err, const char *test_name) +{ + switch (err->code) { + case PIDFD_ERROR: + ksft_exit_fail_msg("%s test: Fatal: %s\n", test_name, err->msg); + break; + + case PIDFD_FAIL: + /* will be: not ok %d # error %s test: %s */ + ksft_test_result_error("%s test: %s\n", test_name, err->msg); + break; + + case PIDFD_SKIP: + /* will be: not ok %d # SKIP %s test: %s */ + ksft_test_result_skip("%s test: %s\n", test_name, err->msg); + break; + + case PIDFD_XFAIL: + ksft_test_result_pass("%s test: Expected failure: %s\n", + test_name, err->msg); + break; + + case PIDFD_PASS: + ksft_test_result_pass("%s test: Passed\n", test_name); + break; + + default: + ksft_exit_fail_msg("%s test: Unknown code: %d %s\n", + test_name, err->code, err->msg); + break; + } +} + +static inline int error_check(struct error *err, const char *test_name) +{ + /* In case of error we bail out and terminate the test program */ + if (err->code == PIDFD_ERROR) + error_report(err, test_name); + + return err->code; +} + +#define CHILD_STACK_SIZE 8192 + +struct child { + char *stack; + pid_t pid; + int fd; +}; + +static struct child clone_newns(int (*fn)(void *), void *args, + struct error *err) +{ + static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD; + struct child ret; + + if (!(flags & CLONE_NEWUSER) && geteuid() != 0) + flags |= CLONE_NEWUSER; + + ret.stack = mmap(NULL, CHILD_STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (ret.stack == MAP_FAILED) { + error_set(err, -1, "mmap of stack failed (errno %d)", errno); + return ret; + } + +#ifdef __ia64__ + ret.pid = __clone2(fn, ret.stack, CHILD_STACK_SIZE, flags, args, &ret.fd); +#else + ret.pid = clone(fn, ret.stack + CHILD_STACK_SIZE, flags, args, &ret.fd); +#endif + + if (ret.pid < 0) { + error_set(err, PIDFD_ERROR, "clone failed (ret %d, errno %d)", + ret.fd, errno); + return ret; + } + + ksft_print_msg("New child: %d, fd: %d\n", ret.pid, ret.fd); + + return ret; +} + +static inline void child_close(struct child *child) +{ + close(child->fd); +} + +static inline int child_join(struct child *child, struct error *err) +{ + int r; + + r = wait_for_pid(child->pid); + if (r < 0) + error_set(err, PIDFD_ERROR, "waitpid failed (ret %d, errno %d)", + r, errno); + else if (r > 0) + error_set(err, r, "child %d reported: %d", child->pid, r); + + if (munmap(child->stack, CHILD_STACK_SIZE)) { + error_set(err, -1, "munmap of child stack failed (errno %d)", errno); + r = -1; + } + + return r; +} + +static inline int child_join_close(struct child *child, struct error *err) +{ + child_close(child); + return child_join(child, err); +} + +static inline void trim_newline(char *str) +{ + char *pos = strrchr(str, '\n'); + + if (pos) + *pos = '\0'; +} + +static int verify_fdinfo(int pidfd, struct error *err, const char *prefix, + size_t prefix_len, const char *expect, ...) +{ + char buffer[512] = {0, }; + char path[512] = {0, }; + va_list args; + FILE *f; + char *line = NULL; + size_t n = 0; + int found = 0; + int r; + + va_start(args, expect); + r = vsnprintf(buffer, sizeof(buffer), expect, args); + assert((size_t)r < sizeof(buffer)); + va_end(args); + + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd); + f = fopen(path, "re"); + if (!f) + return error_set(err, PIDFD_ERROR, "fdinfo open failed for %d", + pidfd); + + while (getline(&line, &n, f) != -1) { + char *val; + + if (strncmp(line, prefix, prefix_len)) + continue; + + found = 1; + + val = line + prefix_len; + r = strcmp(val, buffer); + if (r != 0) { + trim_newline(line); + trim_newline(buffer); + error_set(err, PIDFD_FAIL, "%s '%s' != '%s'", + prefix, val, buffer); + } + break; + } + + free(line); + fclose(f); + + if (found == 0) + return error_set(err, PIDFD_FAIL, "%s not found for fd %d", + prefix, pidfd); + + return PIDFD_PASS; +} + +static int child_fdinfo_nspid_test(void *args) +{ + struct error err; + int pidfd; + int r; + + /* if we got no fd for the sibling, we are done */ + if (!args) + return PIDFD_PASS; + + /* verify that we can not resolve the pidfd for a process + * in a sibling pid namespace, i.e. a pid namespace it is + * not in our or a descended namespace + */ + r = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (r < 0) { + ksft_print_msg("Failed to remount / private\n"); + return PIDFD_ERROR; + } + + (void)umount2("/proc", MNT_DETACH); + r = mount("proc", "/proc", "proc", 0, NULL); + if (r < 0) { + ksft_print_msg("Failed to remount /proc\n"); + return PIDFD_ERROR; + } + + pidfd = *(int *)args; + r = verify_fdinfo(pidfd, &err, "NSpid:", 6, "\t0\n"); + + if (r != PIDFD_PASS) + ksft_print_msg("NSpid fdinfo check failed: %s\n", err.msg); + + return r; +} + +static void test_pidfd_fdinfo_nspid(void) +{ + struct child a, b; + struct error err = {0, }; + const char *test_name = "pidfd check for NSpid in fdinfo"; + + /* Create a new child in a new pid and mount namespace */ + a = clone_newns(child_fdinfo_nspid_test, NULL, &err); + error_check(&err, test_name); + + /* Pass the pidfd representing the first child to the + * second child, which will be in a sibling pid namespace, + * which means that the fdinfo NSpid entry for the pidfd + * should only contain '0'. + */ + b = clone_newns(child_fdinfo_nspid_test, &a.fd, &err); + error_check(&err, test_name); + + /* The children will have pid 1 in the new pid namespace, + * so the line must be 'NSPid:\t\t1'. + */ + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t%d\t%d\n", a.pid, 1); + verify_fdinfo(b.fd, &err, "NSpid:", 6, "\t%d\t%d\n", b.pid, 1); + + /* wait for the process, check the exit status and set + * 'err' accordingly, if it is not already set. + */ + child_join_close(&a, &err); + child_join_close(&b, &err); + + error_report(&err, test_name); +} + +static void test_pidfd_dead_fdinfo(void) +{ + struct child a; + struct error err = {0, }; + const char *test_name = "pidfd check fdinfo for dead process"; + + /* Create a new child in a new pid and mount namespace */ + a = clone_newns(child_fdinfo_nspid_test, NULL, &err); + error_check(&err, test_name); + child_join(&a, &err); + + verify_fdinfo(a.fd, &err, "Pid:", 4, "\t-1\n"); + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t-1\n"); + child_close(&a); + error_report(&err, test_name); +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(2); + + test_pidfd_fdinfo_nspid(); + test_pidfd_dead_fdinfo(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c new file mode 100644 index 000000000..0930e2411 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest_harness.h" + +/* + * UNKNOWN_FD is an fd number that should never exist in the child, as it is + * used to check the negative case. + */ +#define UNKNOWN_FD 111 +#define UID_NOBODY 65535 + +static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, + unsigned long idx2) +{ + return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2); +} + +static int __child(int sk, int memfd) +{ + int ret; + char buf; + + /* + * Ensure we don't leave around a bunch of orphaned children if our + * tests fail. + */ + ret = prctl(PR_SET_PDEATHSIG, SIGKILL); + if (ret) { + fprintf(stderr, "%s: Child could not set DEATHSIG\n", + strerror(errno)); + return -1; + } + + ret = send(sk, &memfd, sizeof(memfd), 0); + if (ret != sizeof(memfd)) { + fprintf(stderr, "%s: Child failed to send fd number\n", + strerror(errno)); + return -1; + } + + /* + * The fixture setup is completed at this point. The tests will run. + * + * This blocking recv enables the parent to message the child. + * Either we will read 'P' off of the sk, indicating that we need + * to disable ptrace, or we will read a 0, indicating that the other + * side has closed the sk. This occurs during fixture teardown time, + * indicating that the child should exit. + */ + while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) { + if (buf == 'P') { + ret = prctl(PR_SET_DUMPABLE, 0); + if (ret < 0) { + fprintf(stderr, + "%s: Child failed to disable ptrace\n", + strerror(errno)); + return -1; + } + } else { + fprintf(stderr, "Child received unknown command %c\n", + buf); + return -1; + } + ret = send(sk, &buf, sizeof(buf), 0); + if (ret != 1) { + fprintf(stderr, "%s: Child failed to ack\n", + strerror(errno)); + return -1; + } + } + if (ret < 0) { + fprintf(stderr, "%s: Child failed to read from socket\n", + strerror(errno)); + return -1; + } + + return 0; +} + +static int child(int sk) +{ + int memfd, ret; + + memfd = sys_memfd_create("test", 0); + if (memfd < 0) { + fprintf(stderr, "%s: Child could not create memfd\n", + strerror(errno)); + ret = -1; + } else { + ret = __child(sk, memfd); + close(memfd); + } + + close(sk); + return ret; +} + +FIXTURE(child) +{ + /* + * remote_fd is the number of the FD which we are trying to retrieve + * from the child. + */ + int remote_fd; + /* pid points to the child which we are fetching FDs from */ + pid_t pid; + /* pidfd is the pidfd of the child */ + int pidfd; + /* + * sk is our side of the socketpair used to communicate with the child. + * When it is closed, the child will exit. + */ + int sk; +}; + +FIXTURE_SETUP(child) +{ + int ret, sk_pair[2]; + + ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) { + TH_LOG("%s: failed to create socketpair", strerror(errno)); + } + self->sk = sk_pair[0]; + + self->pid = fork(); + ASSERT_GE(self->pid, 0); + + if (self->pid == 0) { + close(sk_pair[0]); + if (child(sk_pair[1])) + _exit(EXIT_FAILURE); + _exit(EXIT_SUCCESS); + } + + close(sk_pair[1]); + + self->pidfd = sys_pidfd_open(self->pid, 0); + ASSERT_GE(self->pidfd, 0); + + /* + * Wait for the child to complete setup. It'll send the remote memfd's + * number when ready. + */ + ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0); + ASSERT_EQ(sizeof(self->remote_fd), ret); +} + +FIXTURE_TEARDOWN(child) +{ + EXPECT_EQ(0, close(self->pidfd)); + EXPECT_EQ(0, close(self->sk)); + + EXPECT_EQ(0, wait_for_pid(self->pid)); +} + +TEST_F(child, disable_ptrace) +{ + int uid, fd; + char c; + + /* + * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE + * + * The tests should run in their own process, so even this test fails, + * it shouldn't result in subsequent tests failing. + */ + uid = getuid(); + if (uid == 0) + ASSERT_EQ(0, seteuid(UID_NOBODY)); + + ASSERT_EQ(1, send(self->sk, "P", 1, 0)); + ASSERT_EQ(1, recv(self->sk, &c, 1, 0)); + + fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); + EXPECT_EQ(-1, fd); + EXPECT_EQ(EPERM, errno); + + if (uid == 0) + ASSERT_EQ(0, seteuid(0)); +} + +TEST_F(child, fetch_fd) +{ + int fd, ret; + + fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); + ASSERT_GE(fd, 0); + + ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd); + if (ret < 0 && errno == ENOSYS) + SKIP(return, "kcmp() syscall not supported"); + EXPECT_EQ(ret, 0); + + ret = fcntl(fd, F_GETFD); + ASSERT_GE(ret, 0); + EXPECT_GE(ret & FD_CLOEXEC, 0); + + close(fd); +} + +TEST_F(child, test_unknown_fd) +{ + int fd; + + fd = sys_pidfd_getfd(self->pidfd, UNKNOWN_FD, 0); + EXPECT_EQ(-1, fd) { + TH_LOG("getfd succeeded while fetching unknown fd"); + }; + EXPECT_EQ(EBADF, errno) { + TH_LOG("%s: getfd did not get EBADF", strerror(errno)); + } +} + +TEST(flags_set) +{ + ASSERT_EQ(-1, sys_pidfd_getfd(0, 0, 1)); + EXPECT_EQ(errno, EINVAL); +} + +#if __NR_pidfd_getfd == -1 +int main(void) +{ + fprintf(stderr, "__NR_pidfd_getfd undefined. The pidfd_getfd syscall is unavailable. Test aborting\n"); + return KSFT_SKIP; +} +#else +TEST_HARNESS_MAIN +#endif diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c new file mode 100644 index 000000000..8a59438cc --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest.h" + +static int safe_int(const char *numstr, int *converted) +{ + char *err = NULL; + long sli; + + errno = 0; + sli = strtol(numstr, &err, 0); + if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN)) + return -ERANGE; + + if (errno != 0 && sli == 0) + return -EINVAL; + + if (err == numstr || *err != '\0') + return -EINVAL; + + if (sli > INT_MAX || sli < INT_MIN) + return -ERANGE; + + *converted = (int)sli; + return 0; +} + +static int char_left_gc(const char *buffer, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (buffer[i] == ' ' || + buffer[i] == '\t') + continue; + + return i; + } + + return 0; +} + +static int char_right_gc(const char *buffer, size_t len) +{ + int i; + + for (i = len - 1; i >= 0; i--) { + if (buffer[i] == ' ' || + buffer[i] == '\t' || + buffer[i] == '\n' || + buffer[i] == '\0') + continue; + + return i + 1; + } + + return 0; +} + +static char *trim_whitespace_in_place(char *buffer) +{ + buffer += char_left_gc(buffer, strlen(buffer)); + buffer[char_right_gc(buffer, strlen(buffer))] = '\0'; + return buffer; +} + +static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen) +{ + int ret; + char path[512]; + FILE *f; + size_t n = 0; + pid_t result = -1; + char *line = NULL; + + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd); + + f = fopen(path, "re"); + if (!f) + return -1; + + while (getline(&line, &n, f) != -1) { + char *numstr; + + if (strncmp(line, key, keylen)) + continue; + + numstr = trim_whitespace_in_place(line + 4); + ret = safe_int(numstr, &result); + if (ret < 0) + goto out; + + break; + } + +out: + free(line); + fclose(f); + return result; +} + +int main(int argc, char **argv) +{ + int pidfd = -1, ret = 1; + pid_t pid; + + ksft_set_plan(3); + + pidfd = sys_pidfd_open(-1, 0); + if (pidfd >= 0) { + ksft_print_msg( + "%s - succeeded to open pidfd for invalid pid -1\n", + strerror(errno)); + goto on_error; + } + ksft_test_result_pass("do not allow invalid pid test: passed\n"); + + pidfd = sys_pidfd_open(getpid(), 1); + if (pidfd >= 0) { + ksft_print_msg( + "%s - succeeded to open pidfd with invalid flag value specified\n", + strerror(errno)); + goto on_error; + } + ksft_test_result_pass("do not allow invalid flag test: passed\n"); + + pidfd = sys_pidfd_open(getpid(), 0); + if (pidfd < 0) { + ksft_print_msg("%s - failed to open pidfd\n", strerror(errno)); + goto on_error; + } + ksft_test_result_pass("open a new pidfd test: passed\n"); + + pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1); + ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid); + + ret = 0; + +on_error: + if (pidfd >= 0) + close(pidfd); + + return !ret ? ksft_exit_pass() : ksft_exit_fail(); +} diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c new file mode 100644 index 000000000..610811275 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest.h" + +static bool timeout; + +static void handle_alarm(int sig) +{ + timeout = true; +} + +int main(int argc, char **argv) +{ + struct pollfd fds; + int iter, nevents; + int nr_iterations = 10000; + + fds.events = POLLIN; + + if (argc > 2) + ksft_exit_fail_msg("Unexpected command line argument\n"); + + if (argc == 2) { + nr_iterations = atoi(argv[1]); + if (nr_iterations <= 0) + ksft_exit_fail_msg("invalid input parameter %s\n", + argv[1]); + } + + ksft_print_msg("running pidfd poll test for %d iterations\n", + nr_iterations); + + for (iter = 0; iter < nr_iterations; iter++) { + int pidfd; + int child_pid = fork(); + + if (child_pid < 0) { + if (errno == EAGAIN) { + iter--; + continue; + } + ksft_exit_fail_msg( + "%s - failed to fork a child process\n", + strerror(errno)); + } + + if (child_pid == 0) { + /* Child process just sleeps for a min and exits */ + sleep(60); + exit(EXIT_SUCCESS); + } + + /* Parent kills the child and waits for its death */ + pidfd = sys_pidfd_open(child_pid, 0); + if (pidfd < 0) + ksft_exit_fail_msg("%s - pidfd_open failed\n", + strerror(errno)); + + /* Setup 3 sec alarm - plenty of time */ + if (signal(SIGALRM, handle_alarm) == SIG_ERR) + ksft_exit_fail_msg("%s - signal failed\n", + strerror(errno)); + alarm(3); + + /* Send SIGKILL to the child */ + if (sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0)) + ksft_exit_fail_msg("%s - pidfd_send_signal failed\n", + strerror(errno)); + + /* Wait for the death notification */ + fds.fd = pidfd; + nevents = poll(&fds, 1, -1); + + /* Check for error conditions */ + if (nevents < 0) + ksft_exit_fail_msg("%s - poll failed\n", + strerror(errno)); + + if (nevents != 1) + ksft_exit_fail_msg("unexpected poll result: %d\n", + nevents); + + if (!(fds.revents & POLLIN)) + ksft_exit_fail_msg( + "unexpected event type received: 0x%x\n", + fds.revents); + + if (timeout) + ksft_exit_fail_msg( + "death notification wait timeout\n"); + + close(pidfd); + /* Wait for child to prevent zombies */ + if (waitpid(child_pid, NULL, 0) < 0) + ksft_exit_fail_msg("%s - waitpid failed\n", + strerror(errno)); + + } + + ksft_test_result_pass("pidfd poll test: pass\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c new file mode 100644 index 000000000..6e2f2cd40 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -0,0 +1,559 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../clone3/clone3_selftests.h" +#include "../kselftest_harness.h" + +enum { + PIDFD_NS_USER, + PIDFD_NS_MNT, + PIDFD_NS_PID, + PIDFD_NS_UTS, + PIDFD_NS_IPC, + PIDFD_NS_NET, + PIDFD_NS_CGROUP, + PIDFD_NS_PIDCLD, + PIDFD_NS_TIME, + PIDFD_NS_MAX +}; + +const struct ns_info { + const char *name; + int flag; +} ns_info[] = { + [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, + [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, + [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, + [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, + [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, + [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, + [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, + [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, + [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, +}; + +FIXTURE(current_nsset) +{ + pid_t pid; + int pidfd; + int nsfds[PIDFD_NS_MAX]; + + pid_t child_pid_exited; + int child_pidfd_exited; + + pid_t child_pid1; + int child_pidfd1; + int child_nsfds1[PIDFD_NS_MAX]; + + pid_t child_pid2; + int child_pidfd2; + int child_nsfds2[PIDFD_NS_MAX]; +}; + +static int sys_waitid(int which, pid_t pid, int options) +{ + return syscall(__NR_waitid, which, pid, NULL, options, NULL); +} + +pid_t create_child(int *pidfd, unsigned flags) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | flags, + .exit_signal = SIGCHLD, + .pidfd = ptr_to_u64(pidfd), + }; + + return sys_clone3(&args, sizeof(struct clone_args)); +} + +static bool switch_timens(void) +{ + int fd, ret; + + if (unshare(CLONE_NEWTIME)) + return false; + + fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC); + if (fd < 0) + return false; + + ret = setns(fd, CLONE_NEWTIME); + close(fd); + return ret == 0; +} + +static ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = read(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +FIXTURE_SETUP(current_nsset) +{ + int i, proc_fd, ret; + int ipc_sockets[2]; + char c; + + for (i = 0; i < PIDFD_NS_MAX; i++) { + self->nsfds[i] = -EBADF; + self->child_nsfds1[i] = -EBADF; + self->child_nsfds2[i] = -EBADF; + } + + proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); + ASSERT_GE(proc_fd, 0) { + TH_LOG("%m - Failed to open /proc/self/ns"); + } + + self->pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); + if (self->nsfds[i] < 0) { + EXPECT_EQ(errno, ENOENT) { + TH_LOG("%m - Failed to open %s namespace for process %d", + info->name, self->pid); + } + } + } + + self->pidfd = sys_pidfd_open(self->pid, 0); + EXPECT_GT(self->pidfd, 0) { + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + } + + /* Create task that exits right away. */ + self->child_pid_exited = create_child(&self->child_pidfd_exited, + CLONE_NEWUSER | CLONE_NEWNET); + EXPECT_GT(self->child_pid_exited, 0); + + if (self->child_pid_exited == 0) + _exit(EXIT_SUCCESS); + + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); + + self->pidfd = sys_pidfd_open(self->pid, 0); + EXPECT_GE(self->pidfd, 0) { + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + } + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + /* Create tasks that will be stopped. */ + self->child_pid1 = create_child(&self->child_pidfd1, + CLONE_NEWUSER | CLONE_NEWNS | + CLONE_NEWCGROUP | CLONE_NEWIPC | + CLONE_NEWUTS | CLONE_NEWPID | + CLONE_NEWNET); + EXPECT_GE(self->child_pid1, 0); + + if (self->child_pid1 == 0) { + close(ipc_sockets[0]); + + if (!switch_timens()) + _exit(EXIT_FAILURE); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid2 = create_child(&self->child_pidfd2, + CLONE_NEWUSER | CLONE_NEWNS | + CLONE_NEWCGROUP | CLONE_NEWIPC | + CLONE_NEWUTS | CLONE_NEWPID | + CLONE_NEWNET); + EXPECT_GE(self->child_pid2, 0); + + if (self->child_pid2 == 0) { + close(ipc_sockets[0]); + + if (!switch_timens()) + _exit(EXIT_FAILURE); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + for (i = 0; i < PIDFD_NS_MAX; i++) { + char p[100]; + + const struct ns_info *info = &ns_info[i]; + + self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); + if (self->nsfds[i] < 0) { + EXPECT_EQ(errno, ENOENT) { + TH_LOG("%m - Failed to open %s namespace for process %d", + info->name, self->pid); + } + } + + ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", + self->child_pid1, info->name); + EXPECT_GT(ret, 0); + EXPECT_LT(ret, sizeof(p)); + + self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC); + if (self->child_nsfds1[i] < 0) { + EXPECT_EQ(errno, ENOENT) { + TH_LOG("%m - Failed to open %s namespace for process %d", + info->name, self->child_pid1); + } + } + + ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s", + self->child_pid2, info->name); + EXPECT_GT(ret, 0); + EXPECT_LT(ret, sizeof(p)); + + self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC); + if (self->child_nsfds2[i] < 0) { + EXPECT_EQ(errno, ENOENT) { + TH_LOG("%m - Failed to open %s namespace for process %d", + info->name, self->child_pid1); + } + } + } + + close(proc_fd); +} + +FIXTURE_TEARDOWN(current_nsset) +{ + int i; + + ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1, + SIGKILL, NULL, 0), 0); + ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2, + SIGKILL, NULL, 0), 0); + + for (i = 0; i < PIDFD_NS_MAX; i++) { + if (self->nsfds[i] >= 0) + close(self->nsfds[i]); + if (self->child_nsfds1[i] >= 0) + close(self->child_nsfds1[i]); + if (self->child_nsfds2[i] >= 0) + close(self->child_nsfds2[i]); + } + + if (self->child_pidfd1 >= 0) + EXPECT_EQ(0, close(self->child_pidfd1)); + if (self->child_pidfd2 >= 0) + EXPECT_EQ(0, close(self->child_pidfd2)); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); +} + +static int preserve_ns(const int pid, const char *ns) +{ + int ret; + char path[50]; + + ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns); + if (ret < 0 || (size_t)ret >= sizeof(path)) + return -EIO; + + return open(path, O_RDONLY | O_CLOEXEC); +} + +static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns) +{ + int ns_fd2 = -EBADF; + int ret = -1; + struct stat ns_st1, ns_st2; + + ret = fstat(ns_fd1, &ns_st1); + if (ret < 0) + return -1; + + ns_fd2 = preserve_ns(pid2, ns); + if (ns_fd2 < 0) + return -1; + + ret = fstat(ns_fd2, &ns_st2); + close(ns_fd2); + if (ret < 0) + return -1; + + /* processes are in the same namespace */ + if ((ns_st1.st_dev == ns_st2.st_dev) && + (ns_st1.st_ino == ns_st2.st_ino)) + return 1; + + /* processes are in different namespaces */ + return 0; +} + +/* Test that we can't pass garbage to the kernel. */ +TEST_F(current_nsset, invalid_flags) +{ + ASSERT_NE(setns(self->pidfd, 0), 0); + EXPECT_EQ(errno, EINVAL); + + ASSERT_NE(setns(self->pidfd, -1), 0); + EXPECT_EQ(errno, EINVAL); + + ASSERT_NE(setns(self->pidfd, CLONE_VM), 0); + EXPECT_EQ(errno, EINVAL); + + ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0); + EXPECT_EQ(errno, EINVAL); +} + +/* Test that we can't attach to a task that has already exited. */ +TEST_F(current_nsset, pidfd_exited_child) +{ + int i; + pid_t pid; + + ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET), + 0); + EXPECT_EQ(errno, ESRCH); + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + /* Verify that we haven't changed any namespaces. */ + if (self->nsfds[i] >= 0) + ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1); + } +} + +TEST_F(current_nsset, pidfd_incremental_setns) +{ + int i; + pid_t pid; + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + int nsfd; + + if (self->child_nsfds1[i] < 0) + continue; + + if (info->flag) { + ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) { + TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d", + info->name, self->child_pid1, + self->child_pidfd1); + } + } + + /* Verify that we have changed to the correct namespaces. */ + if (info->flag == CLONE_NEWPID) + nsfd = self->nsfds[i]; + else + nsfd = self->child_nsfds1[i]; + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { + TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d", + info->name, self->child_pid1, + self->child_pidfd1); + } + TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d", + info->name, self->child_pid1, self->child_pidfd1); + } +} + +TEST_F(current_nsset, nsfd_incremental_setns) +{ + int i; + pid_t pid; + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + int nsfd; + + if (self->child_nsfds1[i] < 0) + continue; + + if (info->flag) { + ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) { + TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid1, + self->child_nsfds1[i]); + } + } + + /* Verify that we have changed to the correct namespaces. */ + if (info->flag == CLONE_NEWPID) + nsfd = self->nsfds[i]; + else + nsfd = self->child_nsfds1[i]; + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { + TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", + info->name, self->child_pid1, + self->child_nsfds1[i]); + } + TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid1, self->child_nsfds1[i]); + } +} + +TEST_F(current_nsset, pidfd_one_shot_setns) +{ + unsigned flags = 0; + int i; + pid_t pid; + + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_nsfds1[i] < 0) + continue; + + flags |= info->flag; + TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", + info->name, self->child_pid1); + } + + ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { + TH_LOG("%m - Failed to setns to namespaces of %d", + self->child_pid1); + } + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + int nsfd; + + if (self->child_nsfds1[i] < 0) + continue; + + /* Verify that we have changed to the correct namespaces. */ + if (info->flag == CLONE_NEWPID) + nsfd = self->nsfds[i]; + else + nsfd = self->child_nsfds1[i]; + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { + TH_LOG("setns failed to place us correctly into %s namespace of %d", + info->name, self->child_pid1); + } + TH_LOG("Managed to correctly setns to %s namespace of %d", + info->name, self->child_pid1); + } +} + +TEST_F(current_nsset, no_foul_play) +{ + unsigned flags = 0; + int i; + + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_nsfds1[i] < 0) + continue; + + flags |= info->flag; + if (info->flag) /* No use logging pid_for_children. */ + TH_LOG("Adding %s namespace of %d to list of namespaces to attach to", + info->name, self->child_pid1); + } + + ASSERT_EQ(setns(self->child_pidfd1, flags), 0) { + TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d", + self->child_pid1, self->child_pidfd1); + } + + /* + * Can't setns to a user namespace outside of our hierarchy since we + * don't have caps in there and didn't create it. That means that under + * no circumstances should we be able to setns to any of the other + * ones since they aren't owned by our user namespace. + */ + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_nsfds2[i] < 0 || !info->flag) + continue; + + ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) { + TH_LOG("Managed to setns to %s namespace of %d via pidfd %d", + info->name, self->child_pid2, + self->child_pidfd2); + } + TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d", + info->name, self->child_pid2, + self->child_pidfd2); + + ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) { + TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid2, + self->child_nsfds2[i]); + } + TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid2, + self->child_nsfds2[i]); + } +} + +TEST(setns_einval) +{ + int fd; + + fd = sys_memfd_create("rostock", 0); + EXPECT_GT(fd, 0); + + ASSERT_NE(setns(fd, 0), 0); + EXPECT_EQ(errno, EINVAL); + close(fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c new file mode 100644 index 000000000..cf4f3174c --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -0,0 +1,575 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest.h" + +#define str(s) _str(s) +#define _str(s) #s +#define CHILD_THREAD_MIN_WAIT 3 /* seconds */ + +#define MAX_EVENTS 5 + +static bool have_pidfd_send_signal; + +static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *)) +{ + size_t stack_size = 1024; + char *stack[1024] = { 0 }; + +#ifdef __ia64__ + return __clone2(fn, stack, stack_size, flags | SIGCHLD, NULL, pidfd); +#else + return clone(fn, stack + stack_size, flags | SIGCHLD, NULL, pidfd); +#endif +} + +static int signal_received; + +static void set_signal_received_on_sigusr1(int sig) +{ + if (sig == SIGUSR1) + signal_received = 1; +} + +/* + * Straightforward test to see whether pidfd_send_signal() works is to send + * a signal to ourself. + */ +static int test_pidfd_send_signal_simple_success(void) +{ + int pidfd, ret; + const char *test_name = "pidfd_send_signal send SIGUSR1"; + + if (!have_pidfd_send_signal) { + ksft_test_result_skip( + "%s test: pidfd_send_signal() syscall not supported\n", + test_name); + return 0; + } + + pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC); + if (pidfd < 0) + ksft_exit_fail_msg( + "%s test: Failed to open process file descriptor\n", + test_name); + + signal(SIGUSR1, set_signal_received_on_sigusr1); + + ret = sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0); + close(pidfd); + if (ret < 0) + ksft_exit_fail_msg("%s test: Failed to send signal\n", + test_name); + + if (signal_received != 1) + ksft_exit_fail_msg("%s test: Failed to receive signal\n", + test_name); + + signal_received = 0; + ksft_test_result_pass("%s test: Sent signal\n", test_name); + return 0; +} + +static int test_pidfd_send_signal_exited_fail(void) +{ + int pidfd, ret, saved_errno; + char buf[256]; + pid_t pid; + const char *test_name = "pidfd_send_signal signal exited process"; + + if (!have_pidfd_send_signal) { + ksft_test_result_skip( + "%s test: pidfd_send_signal() syscall not supported\n", + test_name); + return 0; + } + + pid = fork(); + if (pid < 0) + ksft_exit_fail_msg("%s test: Failed to create new process\n", + test_name); + + if (pid == 0) + _exit(EXIT_SUCCESS); + + snprintf(buf, sizeof(buf), "/proc/%d", pid); + + pidfd = open(buf, O_DIRECTORY | O_CLOEXEC); + + (void)wait_for_pid(pid); + + if (pidfd < 0) + ksft_exit_fail_msg( + "%s test: Failed to open process file descriptor\n", + test_name); + + ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0); + saved_errno = errno; + close(pidfd); + if (ret == 0) + ksft_exit_fail_msg( + "%s test: Managed to send signal to process even though it should have failed\n", + test_name); + + if (saved_errno != ESRCH) + ksft_exit_fail_msg( + "%s test: Expected to receive ESRCH as errno value but received %d instead\n", + test_name, saved_errno); + + ksft_test_result_pass("%s test: Failed to send signal as expected\n", + test_name); + return 0; +} + +/* + * Maximum number of cycles we allow. This is equivalent to PID_MAX_DEFAULT. + * If users set a higher limit or we have cycled PIDFD_MAX_DEFAULT number of + * times then we skip the test to not go into an infinite loop or block for a + * long time. + */ +#define PIDFD_MAX_DEFAULT 0x8000 + +static int test_pidfd_send_signal_recycled_pid_fail(void) +{ + int i, ret; + pid_t pid1; + const char *test_name = "pidfd_send_signal signal recycled pid"; + + if (!have_pidfd_send_signal) { + ksft_test_result_skip( + "%s test: pidfd_send_signal() syscall not supported\n", + test_name); + return 0; + } + + ret = unshare(CLONE_NEWPID); + if (ret < 0) { + if (errno == EPERM) { + ksft_test_result_skip("%s test: Unsharing pid namespace not permitted\n", + test_name); + return 0; + } + ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n", + test_name); + } + + ret = unshare(CLONE_NEWNS); + if (ret < 0) { + if (errno == EPERM) { + ksft_test_result_skip("%s test: Unsharing mount namespace not permitted\n", + test_name); + return 0; + } + ksft_exit_fail_msg("%s test: Failed to unshare mount namespace\n", + test_name); + } + + ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (ret < 0) + ksft_exit_fail_msg("%s test: Failed to remount / private\n", + test_name); + + /* pid 1 in new pid namespace */ + pid1 = fork(); + if (pid1 < 0) + ksft_exit_fail_msg("%s test: Failed to create new process\n", + test_name); + + if (pid1 == 0) { + char buf[256]; + pid_t pid2; + int pidfd = -1; + + (void)umount2("/proc", MNT_DETACH); + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret < 0) + _exit(PIDFD_ERROR); + + /* grab pid PID_RECYCLE */ + for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) { + pid2 = fork(); + if (pid2 < 0) + _exit(PIDFD_ERROR); + + if (pid2 == 0) + _exit(PIDFD_PASS); + + if (pid2 == PID_RECYCLE) { + snprintf(buf, sizeof(buf), "/proc/%d", pid2); + ksft_print_msg("pid to recycle is %d\n", pid2); + pidfd = open(buf, O_DIRECTORY | O_CLOEXEC); + } + + if (wait_for_pid(pid2)) + _exit(PIDFD_ERROR); + + if (pid2 >= PID_RECYCLE) + break; + } + + /* + * We want to be as predictable as we can so if we haven't been + * able to grab pid PID_RECYCLE skip the test. + */ + if (pid2 != PID_RECYCLE) { + /* skip test */ + close(pidfd); + _exit(PIDFD_SKIP); + } + + if (pidfd < 0) + _exit(PIDFD_ERROR); + + for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) { + char c; + int pipe_fds[2]; + pid_t recycled_pid; + int child_ret = PIDFD_PASS; + + ret = pipe2(pipe_fds, O_CLOEXEC); + if (ret < 0) + _exit(PIDFD_ERROR); + + recycled_pid = fork(); + if (recycled_pid < 0) + _exit(PIDFD_ERROR); + + if (recycled_pid == 0) { + close(pipe_fds[1]); + (void)read(pipe_fds[0], &c, 1); + close(pipe_fds[0]); + + _exit(PIDFD_PASS); + } + + /* + * Stop the child so we can inspect whether we have + * recycled pid PID_RECYCLE. + */ + close(pipe_fds[0]); + ret = kill(recycled_pid, SIGSTOP); + close(pipe_fds[1]); + if (ret) { + (void)wait_for_pid(recycled_pid); + _exit(PIDFD_ERROR); + } + + /* + * We have recycled the pid. Try to signal it. This + * needs to fail since this is a different process than + * the one the pidfd refers to. + */ + if (recycled_pid == PID_RECYCLE) { + ret = sys_pidfd_send_signal(pidfd, SIGCONT, + NULL, 0); + if (ret && errno == ESRCH) + child_ret = PIDFD_XFAIL; + else + child_ret = PIDFD_FAIL; + } + + /* let the process move on */ + ret = kill(recycled_pid, SIGCONT); + if (ret) + (void)kill(recycled_pid, SIGKILL); + + if (wait_for_pid(recycled_pid)) + _exit(PIDFD_ERROR); + + switch (child_ret) { + case PIDFD_FAIL: + /* fallthrough */ + case PIDFD_XFAIL: + _exit(child_ret); + case PIDFD_PASS: + break; + default: + /* not reached */ + _exit(PIDFD_ERROR); + } + + /* + * If the user set a custom pid_max limit we could be + * in the millions. + * Skip the test in this case. + */ + if (recycled_pid > PIDFD_MAX_DEFAULT) + _exit(PIDFD_SKIP); + } + + /* failed to recycle pid */ + _exit(PIDFD_SKIP); + } + + ret = wait_for_pid(pid1); + switch (ret) { + case PIDFD_FAIL: + ksft_exit_fail_msg( + "%s test: Managed to signal recycled pid %d\n", + test_name, PID_RECYCLE); + case PIDFD_PASS: + ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n", + test_name, PID_RECYCLE); + case PIDFD_SKIP: + ksft_test_result_skip("%s test: Skipping test\n", test_name); + ret = 0; + break; + case PIDFD_XFAIL: + ksft_test_result_pass( + "%s test: Failed to signal recycled pid as expected\n", + test_name); + ret = 0; + break; + default /* PIDFD_ERROR */: + ksft_exit_fail_msg("%s test: Error while running tests\n", + test_name); + } + + return ret; +} + +static int test_pidfd_send_signal_syscall_support(void) +{ + int pidfd, ret; + const char *test_name = "pidfd_send_signal check for support"; + + pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC); + if (pidfd < 0) + ksft_exit_fail_msg( + "%s test: Failed to open process file descriptor\n", + test_name); + + ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0); + if (ret < 0) { + if (errno == ENOSYS) { + ksft_test_result_skip( + "%s test: pidfd_send_signal() syscall not supported\n", + test_name); + return 0; + } + ksft_exit_fail_msg("%s test: Failed to send signal\n", + test_name); + } + + have_pidfd_send_signal = true; + close(pidfd); + ksft_test_result_pass( + "%s test: pidfd_send_signal() syscall is supported. Tests can be executed\n", + test_name); + return 0; +} + +static void *test_pidfd_poll_exec_thread(void *priv) +{ + ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n", + getpid(), syscall(SYS_gettid)); + ksft_print_msg("Child Thread: doing exec of sleep\n"); + + execl("/bin/sleep", "sleep", str(CHILD_THREAD_MIN_WAIT), (char *)NULL); + + ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n", + getpid(), syscall(SYS_gettid)); + return NULL; +} + +static void poll_pidfd(const char *test_name, int pidfd) +{ + int c; + int epoll_fd = epoll_create1(EPOLL_CLOEXEC); + struct epoll_event event, events[MAX_EVENTS]; + + if (epoll_fd == -1) + ksft_exit_fail_msg("%s test: Failed to create epoll file descriptor " + "(errno %d)\n", + test_name, errno); + + event.events = EPOLLIN; + event.data.fd = pidfd; + + if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pidfd, &event)) { + ksft_exit_fail_msg("%s test: Failed to add epoll file descriptor " + "(errno %d)\n", + test_name, errno); + } + + c = epoll_wait(epoll_fd, events, MAX_EVENTS, 5000); + if (c != 1 || !(events[0].events & EPOLLIN)) + ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) " + "(errno %d)\n", + test_name, c, events[0].events, errno); + + close(epoll_fd); + return; + +} + +static int child_poll_exec_test(void *args) +{ + pthread_t t1; + + ksft_print_msg("Child (pidfd): starting. pid %d tid %ld\n", getpid(), + syscall(SYS_gettid)); + pthread_create(&t1, NULL, test_pidfd_poll_exec_thread, NULL); + /* + * Exec in the non-leader thread will destroy the leader immediately. + * If the wait in the parent returns too soon, the test fails. + */ + while (1) + sleep(1); + + return 0; +} + +static void test_pidfd_poll_exec(int use_waitpid) +{ + int pid, pidfd = 0; + int status, ret; + time_t prog_start = time(NULL); + const char *test_name = "pidfd_poll check for premature notification on child thread exec"; + + ksft_print_msg("Parent: pid: %d\n", getpid()); + pid = pidfd_clone(CLONE_PIDFD, &pidfd, child_poll_exec_test); + if (pid < 0) + ksft_exit_fail_msg("%s test: pidfd_clone failed (ret %d, errno %d)\n", + test_name, pid, errno); + + ksft_print_msg("Parent: Waiting for Child (%d) to complete.\n", pid); + + if (use_waitpid) { + ret = waitpid(pid, &status, 0); + if (ret == -1) + ksft_print_msg("Parent: error\n"); + + if (ret == pid) + ksft_print_msg("Parent: Child process waited for.\n"); + } else { + poll_pidfd(test_name, pidfd); + } + + time_t prog_time = time(NULL) - prog_start; + + ksft_print_msg("Time waited for child: %lu\n", prog_time); + + close(pidfd); + + if (prog_time < CHILD_THREAD_MIN_WAIT || prog_time > CHILD_THREAD_MIN_WAIT + 2) + ksft_exit_fail_msg("%s test: Failed\n", test_name); + else + ksft_test_result_pass("%s test: Passed\n", test_name); +} + +static void *test_pidfd_poll_leader_exit_thread(void *priv) +{ + ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n", + getpid(), syscall(SYS_gettid)); + sleep(CHILD_THREAD_MIN_WAIT); + ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n", getpid(), syscall(SYS_gettid)); + return NULL; +} + +static time_t *child_exit_secs; +static int child_poll_leader_exit_test(void *args) +{ + pthread_t t1, t2; + + ksft_print_msg("Child: starting. pid %d tid %ld\n", getpid(), syscall(SYS_gettid)); + pthread_create(&t1, NULL, test_pidfd_poll_leader_exit_thread, NULL); + pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL); + + /* + * glibc exit calls exit_group syscall, so explicity call exit only + * so that only the group leader exits, leaving the threads alone. + */ + *child_exit_secs = time(NULL); + syscall(SYS_exit, 0); + /* Never reached, but appeases compiler thinking we should return. */ + exit(0); +} + +static void test_pidfd_poll_leader_exit(int use_waitpid) +{ + int pid, pidfd = 0; + int status, ret = 0; + const char *test_name = "pidfd_poll check for premature notification on non-empty" + "group leader exit"; + + child_exit_secs = mmap(NULL, sizeof *child_exit_secs, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + if (child_exit_secs == MAP_FAILED) + ksft_exit_fail_msg("%s test: mmap failed (errno %d)\n", + test_name, errno); + + ksft_print_msg("Parent: pid: %d\n", getpid()); + pid = pidfd_clone(CLONE_PIDFD, &pidfd, child_poll_leader_exit_test); + if (pid < 0) + ksft_exit_fail_msg("%s test: pidfd_clone failed (ret %d, errno %d)\n", + test_name, pid, errno); + + ksft_print_msg("Parent: Waiting for Child (%d) to complete.\n", pid); + + if (use_waitpid) { + ret = waitpid(pid, &status, 0); + if (ret == -1) + ksft_print_msg("Parent: error\n"); + } else { + /* + * This sleep tests for the case where if the child exits, and is in + * EXIT_ZOMBIE, but the thread group leader is non-empty, then the poll + * doesn't prematurely return even though there are active threads + */ + sleep(1); + poll_pidfd(test_name, pidfd); + } + + if (ret == pid) + ksft_print_msg("Parent: Child process waited for.\n"); + + time_t since_child_exit = time(NULL) - *child_exit_secs; + + ksft_print_msg("Time since child exit: %lu\n", since_child_exit); + + close(pidfd); + + if (since_child_exit < CHILD_THREAD_MIN_WAIT || + since_child_exit > CHILD_THREAD_MIN_WAIT + 2) + ksft_exit_fail_msg("%s test: Failed\n", test_name); + else + ksft_test_result_pass("%s test: Passed\n", test_name); +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(8); + + test_pidfd_poll_exec(0); + test_pidfd_poll_exec(1); + test_pidfd_poll_leader_exit(0); + test_pidfd_poll_leader_exit(1); + test_pidfd_send_signal_syscall_support(); + test_pidfd_send_signal_simple_success(); + test_pidfd_send_signal_exited_fail(); + test_pidfd_send_signal_recycled_pid_fail(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c new file mode 100644 index 000000000..0dcb8365d --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pidfd.h" +#include "../kselftest_harness.h" + +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) + +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + +static pid_t sys_clone3(struct clone_args *args) +{ + return syscall(__NR_clone3, args, sizeof(struct clone_args)); +} + +static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, + struct rusage *ru) +{ + return syscall(__NR_waitid, which, pid, info, options, ru); +} + +TEST(wait_simple) +{ + int pidfd = -1; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); + pidfd = -1; + + pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); + pidfd = -1; + + pid = sys_clone3(&args); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(EXIT_SUCCESS); + + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_GE(pid, 0); + ASSERT_EQ(WIFEXITED(info.si_status), true); + ASSERT_EQ(WEXITSTATUS(info.si_status), 0); + EXPECT_EQ(close(pidfd), 0); + + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); +} + +TEST(wait_states) +{ + int pidfd = -1; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int pfd[2]; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + ASSERT_EQ(pipe(pfd), 0); + pid = sys_clone3(&args); + ASSERT_GE(pid, 0); + + if (pid == 0) { + char buf[2]; + + close(pfd[1]); + kill(getpid(), SIGSTOP); + ASSERT_EQ(read(pfd[0], buf, 1), 1); + close(pfd[0]); + kill(getpid(), SIGSTOP); + exit(EXIT_SUCCESS); + } + + close(pfd[0]); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(write(pfd[1], "C", 1), 1); + close(pfd[1]); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_CONTINUED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_KILLED); + ASSERT_EQ(info.si_pid, parent_tid); + + EXPECT_EQ(close(pidfd), 0); +} + +TEST(wait_nonblock) +{ + int pidfd; + unsigned int flags = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .flags = CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + /* + * Callers need to see ECHILD with non-blocking pidfds when no child + * processes exists. + */ + pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, ECHILD); + EXPECT_EQ(close(pidfd), 0); + + pid = sys_clone3(&args); + ASSERT_GE(pid, 0); + + if (pid == 0) { + kill(getpid(), SIGSTOP); + exit(EXIT_SUCCESS); + } + + pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + flags = fcntl(pidfd, F_GETFL, 0); + ASSERT_GT(flags, 0); + ASSERT_GT((flags & O_NONBLOCK), 0); + + /* + * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when + * child processes exist but none have exited. + */ + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EAGAIN); + + /* + * Callers need to continue seeing 0 with non-blocking pidfd and + * WNOHANG raised explicitly when child processes exist but none have + * exited. + */ + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); + ASSERT_EQ(ret, 0); + + ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); + + EXPECT_EQ(close(pidfd), 0); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3