summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/clone3
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/clone3')
-rw-r--r--tools/testing/selftests/clone3/.gitignore5
-rw-r--r--tools/testing/selftests/clone3/Makefile8
-rw-r--r--tools/testing/selftests/clone3/clone3.c210
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c182
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c128
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h82
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c397
7 files changed, 1012 insertions, 0 deletions
diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore
new file mode 100644
index 0000000000..83c0f62460
--- /dev/null
+++ b/tools/testing/selftests/clone3/.gitignore
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+clone3
+clone3_clear_sighand
+clone3_set_tid
+clone3_cap_checkpoint_restore
diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile
new file mode 100644
index 0000000000..84832c369a
--- /dev/null
+++ b/tools/testing/selftests/clone3/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -g -std=gnu99 $(KHDR_INCLUDES)
+LDLIBS += -lcap
+
+TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \
+ clone3_cap_checkpoint_restore
+
+include ../lib.mk
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
new file mode 100644
index 0000000000..1c61e3c022
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Based on Christian Brauner's clone3() example */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include "../kselftest.h"
+#include "clone3_selftests.h"
+
+enum test_mode {
+ CLONE3_ARGS_NO_TEST,
+ CLONE3_ARGS_ALL_0,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG,
+ CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG,
+};
+
+static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
+{
+ struct __clone_args args = {
+ .flags = flags,
+ .exit_signal = SIGCHLD,
+ };
+
+ struct clone_args_extended {
+ struct __clone_args args;
+ __aligned_u64 excess_space[2];
+ } args_ext;
+
+ pid_t pid = -1;
+ int status;
+
+ memset(&args_ext, 0, sizeof(args_ext));
+ if (size > sizeof(struct __clone_args))
+ args_ext.excess_space[1] = 1;
+
+ if (size == 0)
+ size = sizeof(struct __clone_args);
+
+ switch (test_mode) {
+ case CLONE3_ARGS_NO_TEST:
+ /*
+ * Uses default 'flags' and 'SIGCHLD'
+ * assignment.
+ */
+ break;
+ case CLONE3_ARGS_ALL_0:
+ args.flags = 0;
+ args.exit_signal = 0;
+ break;
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG:
+ args.exit_signal = 0xbadc0ded00000000ULL;
+ break;
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG:
+ args.exit_signal = 0x0000000080000000ULL;
+ break;
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG:
+ args.exit_signal = 0x0000000000000100ULL;
+ break;
+ case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG:
+ args.exit_signal = 0x00000000000000f0ULL;
+ break;
+ }
+
+ memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
+
+ pid = sys_clone3((struct __clone_args *)&args_ext, size);
+ if (pid < 0) {
+ ksft_print_msg("%s - Failed to create new process\n",
+ strerror(errno));
+ return -errno;
+ }
+
+ if (pid == 0) {
+ ksft_print_msg("I am the child, my PID is %d\n", getpid());
+ _exit(EXIT_SUCCESS);
+ }
+
+ ksft_print_msg("I am the parent (%d). My child's pid is %d\n",
+ getpid(), pid);
+
+ if (waitpid(-1, &status, __WALL) < 0) {
+ ksft_print_msg("Child returned %s\n", strerror(errno));
+ return -errno;
+ }
+ if (WEXITSTATUS(status))
+ return WEXITSTATUS(status);
+
+ return 0;
+}
+
+static void test_clone3(uint64_t flags, size_t size, int expected,
+ enum test_mode test_mode)
+{
+ int ret;
+
+ ksft_print_msg(
+ "[%d] Trying clone3() with flags %#" PRIx64 " (size %zu)\n",
+ getpid(), flags, size);
+ ret = call_clone3(flags, size, test_mode);
+ ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n",
+ getpid(), ret, expected);
+ if (ret != expected)
+ ksft_test_result_fail(
+ "[%d] Result (%d) is different than expected (%d)\n",
+ getpid(), ret, expected);
+ else
+ ksft_test_result_pass(
+ "[%d] Result (%d) matches expectation (%d)\n",
+ getpid(), ret, expected);
+}
+
+int main(int argc, char *argv[])
+{
+ uid_t uid = getuid();
+
+ ksft_print_header();
+ ksft_set_plan(19);
+ test_clone3_supported();
+
+ /* Just a simple clone3() should return 0.*/
+ test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() in a new PID NS.*/
+ if (uid == 0)
+ test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST);
+ else
+ ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
+ test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
+ test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with sizeof(struct clone_args) + 8 */
+ test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with exit_signal having highest 32 bits non-zero */
+ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
+
+ /* Do a clone3() with negative 32-bit exit_signal */
+ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG);
+
+ /* Do a clone3() with exit_signal not fitting into CSIGNAL mask */
+ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG);
+
+ /* Do a clone3() with NSIG < exit_signal < CSIG */
+ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
+
+ test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+
+ test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
+ CLONE3_ARGS_ALL_0);
+
+ test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
+ CLONE3_ARGS_ALL_0);
+
+ /* Do a clone3() with > page size */
+ test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
+ if (uid == 0)
+ test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
+ CLONE3_ARGS_NO_TEST);
+ else
+ ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
+ test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
+ CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
+ if (uid == 0)
+ test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
+ CLONE3_ARGS_NO_TEST);
+ else
+ ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+
+ /* Do a clone3() with > page size in a new PID NS */
+ test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG,
+ CLONE3_ARGS_NO_TEST);
+
+ /* Do a clone3() in a new time namespace */
+ if (access("/proc/self/ns/time", F_OK) == 0) {
+ test_clone3(CLONE_NEWTIME, 0, 0, CLONE3_ARGS_NO_TEST);
+ } else {
+ ksft_print_msg("Time namespaces are not supported\n");
+ ksft_test_result_skip("Skipping clone3() with CLONE_NEWTIME\n");
+ }
+
+ /* Do a clone3() with exit signal (SIGCHLD) in flags */
+ test_clone3(SIGCHLD, 0, -EINVAL, CLONE3_ARGS_NO_TEST);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
new file mode 100644
index 0000000000..52d3f0364b
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Based on Christian Brauner's clone3() example.
+ * These tests are assuming to be running in the host's
+ * PID namespace.
+ */
+
+/* capabilities related code based on selftests/bpf/test_verifier.c */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/capability.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include "../kselftest_harness.h"
+#include "clone3_selftests.h"
+
+#ifndef MAX_PID_NS_LEVEL
+#define MAX_PID_NS_LEVEL 32
+#endif
+
+static void child_exit(int ret)
+{
+ fflush(stdout);
+ fflush(stderr);
+ _exit(ret);
+}
+
+static int call_clone3_set_tid(struct __test_metadata *_metadata,
+ pid_t *set_tid, size_t set_tid_size)
+{
+ int status;
+ pid_t pid = -1;
+
+ struct __clone_args args = {
+ .exit_signal = SIGCHLD,
+ .set_tid = ptr_to_u64(set_tid),
+ .set_tid_size = set_tid_size,
+ };
+
+ pid = sys_clone3(&args, sizeof(args));
+ if (pid < 0) {
+ TH_LOG("%s - Failed to create new process", strerror(errno));
+ return -errno;
+ }
+
+ if (pid == 0) {
+ int ret;
+ char tmp = 0;
+
+ TH_LOG("I am the child, my PID is %d (expected %d)", getpid(), set_tid[0]);
+
+ if (set_tid[0] != getpid())
+ child_exit(EXIT_FAILURE);
+ child_exit(EXIT_SUCCESS);
+ }
+
+ TH_LOG("I am the parent (%d). My child's pid is %d", getpid(), pid);
+
+ if (waitpid(pid, &status, 0) < 0) {
+ TH_LOG("Child returned %s", strerror(errno));
+ return -errno;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int test_clone3_set_tid(struct __test_metadata *_metadata,
+ pid_t *set_tid, size_t set_tid_size)
+{
+ int ret;
+
+ TH_LOG("[%d] Trying clone3() with CLONE_SET_TID to %d", getpid(), set_tid[0]);
+ ret = call_clone3_set_tid(_metadata, set_tid, set_tid_size);
+ TH_LOG("[%d] clone3() with CLONE_SET_TID %d says:%d", getpid(), set_tid[0], ret);
+ return ret;
+}
+
+struct libcap {
+ struct __user_cap_header_struct hdr;
+ struct __user_cap_data_struct data[2];
+};
+
+static int set_capability(void)
+{
+ cap_value_t cap_values[] = { CAP_SETUID, CAP_SETGID };
+ struct libcap *cap;
+ int ret = -1;
+ cap_t caps;
+
+ caps = cap_get_proc();
+ if (!caps) {
+ perror("cap_get_proc");
+ return -1;
+ }
+
+ /* Drop all capabilities */
+ if (cap_clear(caps)) {
+ perror("cap_clear");
+ goto out;
+ }
+
+ cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_values, CAP_SET);
+ cap_set_flag(caps, CAP_PERMITTED, 2, cap_values, CAP_SET);
+
+ cap = (struct libcap *) caps;
+
+ /* 40 -> CAP_CHECKPOINT_RESTORE */
+ cap->data[1].effective |= 1 << (40 - 32);
+ cap->data[1].permitted |= 1 << (40 - 32);
+
+ if (cap_set_proc(caps)) {
+ perror("cap_set_proc");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (cap_free(caps))
+ perror("cap_free");
+ return ret;
+}
+
+TEST(clone3_cap_checkpoint_restore)
+{
+ pid_t pid;
+ int status;
+ int ret = 0;
+ pid_t set_tid[1];
+
+ test_clone3_supported();
+
+ EXPECT_EQ(getuid(), 0)
+ SKIP(return, "Skipping all tests as non-root");
+
+ memset(&set_tid, 0, sizeof(set_tid));
+
+ /* Find the current active PID */
+ pid = fork();
+ if (pid == 0) {
+ TH_LOG("Child has PID %d", getpid());
+ child_exit(EXIT_SUCCESS);
+ }
+ ASSERT_GT(waitpid(pid, &status, 0), 0)
+ TH_LOG("Waiting for child %d failed", pid);
+
+ /* After the child has finished, its PID should be free. */
+ set_tid[0] = pid;
+
+ ASSERT_EQ(set_capability(), 0)
+ TH_LOG("Could not set CAP_CHECKPOINT_RESTORE");
+
+ ASSERT_EQ(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), 0);
+
+ EXPECT_EQ(setgid(65534), 0)
+ TH_LOG("Failed to setgid(65534)");
+ ASSERT_EQ(setuid(65534), 0);
+
+ set_tid[0] = pid;
+ /* This would fail without CAP_CHECKPOINT_RESTORE */
+ ASSERT_EQ(test_clone3_set_tid(_metadata, set_tid, 1), -EPERM);
+ ASSERT_EQ(set_capability(), 0)
+ TH_LOG("Could not set CAP_CHECKPOINT_RESTORE");
+ /* This should work as we have CAP_CHECKPOINT_RESTORE as non-root */
+ ASSERT_EQ(test_clone3_set_tid(_metadata, set_tid, 1), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
new file mode 100644
index 0000000000..47a8c0fc36
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+#include "clone3_selftests.h"
+
+#ifndef CLONE_CLEAR_SIGHAND
+#define CLONE_CLEAR_SIGHAND 0x100000000ULL
+#endif
+
+static void nop_handler(int signo)
+{
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void test_clone3_clear_sighand(void)
+{
+ int ret;
+ pid_t pid;
+ struct __clone_args args = {};
+ struct sigaction act;
+
+ /*
+ * Check that CLONE_CLEAR_SIGHAND and CLONE_SIGHAND are mutually
+ * exclusive.
+ */
+ args.flags |= CLONE_CLEAR_SIGHAND | CLONE_SIGHAND;
+ args.exit_signal = SIGCHLD;
+ pid = sys_clone3(&args, sizeof(args));
+ if (pid > 0)
+ ksft_exit_fail_msg(
+ "clone3(CLONE_CLEAR_SIGHAND | CLONE_SIGHAND) succeeded\n");
+
+ act.sa_handler = nop_handler;
+ ret = sigemptyset(&act.sa_mask);
+ if (ret < 0)
+ ksft_exit_fail_msg("%s - sigemptyset() failed\n",
+ strerror(errno));
+
+ act.sa_flags = 0;
+
+ /* Register signal handler for SIGUSR1 */
+ ret = sigaction(SIGUSR1, &act, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s - sigaction(SIGUSR1, &act, NULL) failed\n",
+ strerror(errno));
+
+ /* Register signal handler for SIGUSR2 */
+ ret = sigaction(SIGUSR2, &act, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg(
+ "%s - sigaction(SIGUSR2, &act, NULL) failed\n",
+ strerror(errno));
+
+ /* Check that CLONE_CLEAR_SIGHAND works. */
+ args.flags = CLONE_CLEAR_SIGHAND;
+ pid = sys_clone3(&args, sizeof(args));
+ if (pid < 0)
+ ksft_exit_fail_msg("%s - clone3(CLONE_CLEAR_SIGHAND) failed\n",
+ strerror(errno));
+
+ if (pid == 0) {
+ ret = sigaction(SIGUSR1, NULL, &act);
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ if (act.sa_handler != SIG_DFL)
+ exit(EXIT_FAILURE);
+
+ ret = sigaction(SIGUSR2, NULL, &act);
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ if (act.sa_handler != SIG_DFL)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ ret = wait_for_pid(pid);
+ if (ret)
+ ksft_exit_fail_msg(
+ "Failed to clear signal handler for child process\n");
+
+ ksft_test_result_pass("Cleared signal handlers for child process\n");
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(1);
+ test_clone3_supported();
+
+ test_clone3_clear_sighand();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
new file mode 100644
index 0000000000..e81ffaaee0
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _CLONE3_SELFTESTS_H
+#define _CLONE3_SELFTESTS_H
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <syscall.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+
+#ifndef CLONE_INTO_CGROUP
+#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
+#endif
+
+#ifndef __NR_clone3
+#define __NR_clone3 -1
+#endif
+
+struct __clone_args {
+ __aligned_u64 flags;
+ __aligned_u64 pidfd;
+ __aligned_u64 child_tid;
+ __aligned_u64 parent_tid;
+ __aligned_u64 exit_signal;
+ __aligned_u64 stack;
+ __aligned_u64 stack_size;
+ __aligned_u64 tls;
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+#endif
+ __aligned_u64 set_tid;
+ __aligned_u64 set_tid_size;
+#ifndef CLONE_ARGS_SIZE_VER1
+#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+#endif
+ __aligned_u64 cgroup;
+#ifndef CLONE_ARGS_SIZE_VER2
+#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
+#endif
+};
+
+static pid_t sys_clone3(struct __clone_args *args, size_t size)
+{
+ fflush(stdout);
+ fflush(stderr);
+ return syscall(__NR_clone3, args, size);
+}
+
+static inline void test_clone3_supported(void)
+{
+ pid_t pid;
+ struct __clone_args args = {};
+
+ if (__NR_clone3 < 0)
+ ksft_exit_skip("clone3() syscall is not supported\n");
+
+ /* Set to something that will always cause EINVAL. */
+ args.exit_signal = -1;
+ pid = sys_clone3(&args, sizeof(args));
+ if (!pid)
+ exit(EXIT_SUCCESS);
+
+ if (pid > 0) {
+ wait(NULL);
+ ksft_exit_fail_msg(
+ "Managed to create child process with invalid exit_signal\n");
+ }
+
+ if (errno == ENOSYS)
+ ksft_exit_skip("clone3() syscall is not supported\n");
+
+ ksft_print_msg("clone3() syscall supported\n");
+}
+
+#endif /* _CLONE3_SELFTESTS_H */
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
new file mode 100644
index 0000000000..0229e9ebb9
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Based on Christian Brauner's clone3() example.
+ * These tests are assuming to be running in the host's
+ * PID namespace.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include "../kselftest.h"
+#include "clone3_selftests.h"
+
+#ifndef MAX_PID_NS_LEVEL
+#define MAX_PID_NS_LEVEL 32
+#endif
+
+static int pipe_1[2];
+static int pipe_2[2];
+
+static void child_exit(int ret)
+{
+ fflush(stdout);
+ fflush(stderr);
+ _exit(ret);
+}
+
+static int call_clone3_set_tid(pid_t *set_tid,
+ size_t set_tid_size,
+ int flags,
+ int expected_pid,
+ bool wait_for_it)
+{
+ int status;
+ pid_t pid = -1;
+
+ struct __clone_args args = {
+ .flags = flags,
+ .exit_signal = SIGCHLD,
+ .set_tid = ptr_to_u64(set_tid),
+ .set_tid_size = set_tid_size,
+ };
+
+ pid = sys_clone3(&args, sizeof(args));
+ if (pid < 0) {
+ ksft_print_msg("%s - Failed to create new process\n",
+ strerror(errno));
+ return -errno;
+ }
+
+ if (pid == 0) {
+ int ret;
+ char tmp = 0;
+ int exit_code = EXIT_SUCCESS;
+
+ ksft_print_msg("I am the child, my PID is %d (expected %d)\n",
+ getpid(), set_tid[0]);
+ if (wait_for_it) {
+ ksft_print_msg("[%d] Child is ready and waiting\n",
+ getpid());
+
+ /* Signal the parent that the child is ready */
+ close(pipe_1[0]);
+ ret = write(pipe_1[1], &tmp, 1);
+ if (ret != 1) {
+ ksft_print_msg(
+ "Writing to pipe returned %d", ret);
+ exit_code = EXIT_FAILURE;
+ }
+ close(pipe_1[1]);
+ close(pipe_2[1]);
+ ret = read(pipe_2[0], &tmp, 1);
+ if (ret != 1) {
+ ksft_print_msg(
+ "Reading from pipe returned %d", ret);
+ exit_code = EXIT_FAILURE;
+ }
+ close(pipe_2[0]);
+ }
+
+ if (set_tid[0] != getpid())
+ child_exit(EXIT_FAILURE);
+ child_exit(exit_code);
+ }
+
+ if (expected_pid == 0 || expected_pid == pid) {
+ ksft_print_msg("I am the parent (%d). My child's pid is %d\n",
+ getpid(), pid);
+ } else {
+ ksft_print_msg(
+ "Expected child pid %d does not match actual pid %d\n",
+ expected_pid, pid);
+ return -1;
+ }
+
+ if (waitpid(pid, &status, 0) < 0) {
+ ksft_print_msg("Child returned %s\n", strerror(errno));
+ return -errno;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void test_clone3_set_tid(pid_t *set_tid,
+ size_t set_tid_size,
+ int flags,
+ int expected,
+ int expected_pid,
+ bool wait_for_it)
+{
+ int ret;
+
+ ksft_print_msg(
+ "[%d] Trying clone3() with CLONE_SET_TID to %d and 0x%x\n",
+ getpid(), set_tid[0], flags);
+ ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
+ wait_for_it);
+ ksft_print_msg(
+ "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
+ getpid(), set_tid[0], ret, expected);
+ if (ret != expected)
+ ksft_test_result_fail(
+ "[%d] Result (%d) is different than expected (%d)\n",
+ getpid(), ret, expected);
+ else
+ ksft_test_result_pass(
+ "[%d] Result (%d) matches expectation (%d)\n",
+ getpid(), ret, expected);
+}
+int main(int argc, char *argv[])
+{
+ FILE *f;
+ char buf;
+ char *line;
+ int status;
+ int ret = -1;
+ size_t len = 0;
+ int pid_max = 0;
+ uid_t uid = getuid();
+ char proc_path[100] = {0};
+ pid_t pid, ns1, ns2, ns3, ns_pid;
+ pid_t set_tid[MAX_PID_NS_LEVEL * 2];
+
+ ksft_print_header();
+ ksft_set_plan(29);
+ test_clone3_supported();
+
+ if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0)
+ ksft_exit_fail_msg("pipe() failed\n");
+
+ f = fopen("/proc/sys/kernel/pid_max", "r");
+ if (f == NULL)
+ ksft_exit_fail_msg(
+ "%s - Could not open /proc/sys/kernel/pid_max\n",
+ strerror(errno));
+ fscanf(f, "%d", &pid_max);
+ fclose(f);
+ ksft_print_msg("/proc/sys/kernel/pid_max %d\n", pid_max);
+
+ /* Try invalid settings */
+ memset(&set_tid, 0, sizeof(set_tid));
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+
+ /*
+ * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
+ * nested PID namespace.
+ */
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+
+ memset(&set_tid, 0xff, sizeof(set_tid));
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
+
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+
+ /*
+ * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
+ * nested PID namespace.
+ */
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+
+ memset(&set_tid, 0, sizeof(set_tid));
+ /* Try with an invalid PID */
+ set_tid[0] = 0;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ set_tid[0] = -1;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ /* Claim that the set_tid array actually contains 2 elements. */
+ test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+
+ /* Try it in a new PID namespace */
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ else
+ ksft_test_result_skip("Clone3() with set_tid requires root\n");
+
+ /* Try with a valid PID (1) this should return -EEXIST. */
+ set_tid[0] = 1;
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
+ else
+ ksft_test_result_skip("Clone3() with set_tid requires root\n");
+
+ /* Try it in a new PID namespace */
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
+ else
+ ksft_test_result_skip("Clone3() with set_tid requires root\n");
+
+ /* pid_max should fail everywhere */
+ set_tid[0] = pid_max;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ else
+ ksft_test_result_skip("Clone3() with set_tid requires root\n");
+
+ if (uid != 0) {
+ /*
+ * All remaining tests require root. Tell the framework
+ * that all those tests are skipped as non-root.
+ */
+ ksft_cnt.ksft_xskip += ksft_plan - ksft_test_num();
+ goto out;
+ }
+
+ /* Find the current active PID */
+ pid = fork();
+ if (pid == 0) {
+ ksft_print_msg("Child has PID %d\n", getpid());
+ child_exit(EXIT_SUCCESS);
+ }
+ if (waitpid(pid, &status, 0) < 0)
+ ksft_exit_fail_msg("Waiting for child %d failed", pid);
+
+ /* After the child has finished, its PID should be free. */
+ set_tid[0] = pid;
+ test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
+
+ /* This should fail as there is no PID 1 in that namespace */
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+
+ /*
+ * Creating a process with PID 1 in the newly created most nested
+ * PID namespace and PID 'pid' in the parent PID namespace. This
+ * needs to work.
+ */
+ set_tid[0] = 1;
+ set_tid[1] = pid;
+ test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
+
+ ksft_print_msg("unshare PID namespace\n");
+ if (unshare(CLONE_NEWPID) == -1)
+ ksft_exit_fail_msg("unshare(CLONE_NEWPID) failed: %s\n",
+ strerror(errno));
+
+ set_tid[0] = pid;
+
+ /* This should fail as there is no PID 1 in that namespace */
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ /* Let's create a PID 1 */
+ ns_pid = fork();
+ if (ns_pid == 0) {
+ /*
+ * This and the next test cases check that all pid-s are
+ * released on error paths.
+ */
+ set_tid[0] = 43;
+ set_tid[1] = -1;
+ test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+
+ set_tid[0] = 43;
+ set_tid[1] = pid;
+ test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0);
+
+ ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
+ set_tid[0] = 2;
+ test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
+
+ set_tid[0] = 1;
+ set_tid[1] = -1;
+ set_tid[2] = pid;
+ /* This should fail as there is invalid PID at level '1'. */
+ test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
+
+ set_tid[0] = 1;
+ set_tid[1] = 42;
+ set_tid[2] = pid;
+ /*
+ * This should fail as there are not enough active PID
+ * namespaces. Again assuming this is running in the host's
+ * PID namespace. Not yet nested.
+ */
+ test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
+
+ /*
+ * This should work and from the parent we should see
+ * something like 'NSpid: pid 42 1'.
+ */
+ test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true);
+
+ child_exit(ksft_cnt.ksft_fail);
+ }
+
+ close(pipe_1[1]);
+ close(pipe_2[0]);
+ while (read(pipe_1[0], &buf, 1) > 0) {
+ ksft_print_msg("[%d] Child is ready and waiting\n", getpid());
+ break;
+ }
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/status", pid);
+ f = fopen(proc_path, "r");
+ if (f == NULL)
+ ksft_exit_fail_msg(
+ "%s - Could not open %s\n",
+ strerror(errno), proc_path);
+
+ while (getline(&line, &len, f) != -1) {
+ if (strstr(line, "NSpid")) {
+ int i;
+
+ /* Verify that all generated PIDs are as expected. */
+ i = sscanf(line, "NSpid:\t%d\t%d\t%d",
+ &ns3, &ns2, &ns1);
+ if (i != 3) {
+ ksft_print_msg(
+ "Unexpected 'NSPid:' entry: %s",
+ line);
+ ns1 = ns2 = ns3 = 0;
+ }
+ break;
+ }
+ }
+ fclose(f);
+ free(line);
+ close(pipe_2[0]);
+
+ /* Tell the clone3()'d child to finish. */
+ write(pipe_2[1], &buf, 1);
+ close(pipe_2[1]);
+
+ if (waitpid(ns_pid, &status, 0) < 0) {
+ ksft_print_msg("Child returned %s\n", strerror(errno));
+ ret = -errno;
+ goto out;
+ }
+
+ if (!WIFEXITED(status))
+ ksft_test_result_fail("Child error\n");
+
+ ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status));
+ ksft_cnt.ksft_fail = WEXITSTATUS(status);
+
+ if (ns3 == pid && ns2 == 42 && ns1 == 1)
+ ksft_test_result_pass(
+ "PIDs in all namespaces as expected (%d,%d,%d)\n",
+ ns3, ns2, ns1);
+ else
+ ksft_test_result_fail(
+ "PIDs in all namespaces not as expected (%d,%d,%d)\n",
+ ns3, ns2, ns1);
+out:
+ ret = 0;
+
+ return !ret ? ksft_exit_pass() : ksft_exit_fail();
+}