summaryrefslogtreecommitdiffstats
path: root/src/system.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/system.c')
-rw-r--r--src/system.c554
1 files changed, 554 insertions, 0 deletions
diff --git a/src/system.c b/src/system.c
new file mode 100644
index 0000000..ae445bf
--- /dev/null
+++ b/src/system.c
@@ -0,0 +1,554 @@
+/**
+ * Seccomp System Interfaces
+ *
+ * Copyright (c) 2014 Red Hat <pmoore@redhat.com>
+ * Author: Paul Moore <paul@paul-moore.com>
+ */
+
+/*
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses>.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/prctl.h>
+
+#define _GNU_SOURCE
+#include <unistd.h>
+
+#include "system.h"
+
+#include <seccomp.h>
+
+#include "arch.h"
+#include "db.h"
+#include "gen_bpf.h"
+#include "helper.h"
+
+/* NOTE: the seccomp syscall allowlist is currently disabled for testing
+ * purposes, but unless we can verify all of the supported ABIs before
+ * our next release we may have to enable the allowlist */
+#define SYSCALL_ALLOWLIST_ENABLE 0
+
+/* task global state */
+struct task_state {
+ /* seccomp(2) syscall */
+ int nr_seccomp;
+
+ /* userspace notification fd */
+ int notify_fd;
+
+ /* runtime support flags */
+ int sup_syscall;
+ int sup_flag_tsync;
+ int sup_flag_log;
+ int sup_action_log;
+ int sup_kill_process;
+ int sup_flag_spec_allow;
+ int sup_flag_new_listener;
+ int sup_user_notif;
+ int sup_flag_tsync_esrch;
+};
+static struct task_state state = {
+ .nr_seccomp = -1,
+
+ .notify_fd = -1,
+
+ .sup_syscall = -1,
+ .sup_flag_tsync = -1,
+ .sup_flag_log = -1,
+ .sup_action_log = -1,
+ .sup_kill_process = -1,
+ .sup_flag_spec_allow = -1,
+ .sup_flag_new_listener = -1,
+ .sup_user_notif = -1,
+ .sup_flag_tsync_esrch = -1,
+};
+
+/**
+ * Reset the task state
+ *
+ * This function fully resets the library's global "system task state".
+ *
+ */
+void sys_reset_state(void)
+{
+ state.nr_seccomp = -1;
+
+ if (state.notify_fd > 0)
+ close(state.notify_fd);
+ state.notify_fd = -1;
+
+ state.sup_syscall = -1;
+ state.sup_flag_tsync = -1;
+ state.sup_flag_log = -1;
+ state.sup_action_log = -1;
+ state.sup_kill_process = -1;
+ state.sup_flag_spec_allow = -1;
+ state.sup_flag_new_listener = -1;
+ state.sup_user_notif = -1;
+ state.sup_flag_tsync_esrch = -1;
+}
+
+/**
+ * Check to see if the seccomp() syscall is supported
+ *
+ * This function attempts to see if the system supports the seccomp() syscall.
+ * Unfortunately, there are a few reasons why this check may fail, including
+ * a previously loaded seccomp filter, so it is hard to say for certain.
+ * Return one if the syscall is supported, zero otherwise.
+ *
+ */
+int sys_chk_seccomp_syscall(void)
+{
+ int rc;
+ int nr_seccomp;
+
+ /* NOTE: it is reasonably safe to assume that we should be able to call
+ * seccomp() when the caller first starts, but we can't rely on
+ * it later so we need to cache our findings for use later */
+ if (state.sup_syscall >= 0)
+ return state.sup_syscall;
+
+#if SYSCALL_ALLOWLIST_ENABLE
+ /* architecture allowlist */
+ switch (arch_def_native->token) {
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_ARM:
+ case SCMP_ARCH_AARCH64:
+ case SCMP_ARCH_PPC64:
+ case SCMP_ARCH_PPC64LE:
+ case SCMP_ARCH_S390:
+ case SCMP_ARCH_S390X:
+ case SCMP_ARCH_RISCV64:
+ break;
+ default:
+ goto unsupported;
+ }
+#endif
+
+ nr_seccomp = arch_syscall_resolve_name(arch_def_native, "seccomp");
+ if (nr_seccomp < 0)
+ goto unsupported;
+
+ /* this is an invalid call because the second argument is non-zero, but
+ * depending on the errno value of ENOSYS or EINVAL we can guess if the
+ * seccomp() syscall is supported or not */
+ rc = syscall(nr_seccomp, SECCOMP_SET_MODE_STRICT, 1, NULL);
+ if (rc < 0 && errno == EINVAL)
+ goto supported;
+
+unsupported:
+ state.sup_syscall = 0;
+ return 0;
+supported:
+ state.nr_seccomp = nr_seccomp;
+ state.sup_syscall = 1;
+ return 1;
+}
+
+/**
+ * Force the seccomp() syscall support setting
+ * @param enable the intended support state
+ *
+ * This function overrides the current seccomp() syscall support setting; this
+ * is very much a "use at your own risk" function.
+ *
+ */
+void sys_set_seccomp_syscall(bool enable)
+{
+ state.sup_syscall = (enable ? 1 : 0);
+}
+
+/**
+ * Check to see if a seccomp action is supported
+ * @param action the seccomp action
+ *
+ * This function checks to see if a seccomp action is supported by the system.
+ * Return one if the action is supported, zero otherwise.
+ *
+ */
+int sys_chk_seccomp_action(uint32_t action)
+{
+ if (action == SCMP_ACT_KILL_PROCESS) {
+ if (state.sup_kill_process < 0) {
+ if (sys_chk_seccomp_syscall() == 1 &&
+ syscall(state.nr_seccomp,
+ SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0)
+ state.sup_kill_process = 1;
+ else
+ state.sup_kill_process = 0;
+ }
+
+ return state.sup_kill_process;
+ } else if (action == SCMP_ACT_KILL_THREAD) {
+ return 1;
+ } else if (action == SCMP_ACT_TRAP) {
+ return 1;
+ } else if ((action == SCMP_ACT_ERRNO(action & 0x0000ffff)) &&
+ ((action & 0x0000ffff) < MAX_ERRNO)) {
+ return 1;
+ } else if (action == SCMP_ACT_TRACE(action & 0x0000ffff)) {
+ return 1;
+ } else if (action == SCMP_ACT_LOG) {
+ if (state.sup_action_log < 0) {
+ if (sys_chk_seccomp_syscall() == 1 &&
+ syscall(state.nr_seccomp,
+ SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0)
+ state.sup_action_log = 1;
+ else
+ state.sup_action_log = 0;
+ }
+
+ return state.sup_action_log;
+ } else if (action == SCMP_ACT_ALLOW) {
+ return 1;
+ } else if (action == SCMP_ACT_NOTIFY) {
+ if (state.sup_user_notif < 0) {
+ struct seccomp_notif_sizes sizes;
+ if (sys_chk_seccomp_syscall() == 1 &&
+ syscall(state.nr_seccomp,
+ SECCOMP_GET_NOTIF_SIZES, 0, &sizes) == 0)
+ state.sup_user_notif = 1;
+ else
+ state.sup_user_notif = 0;
+ }
+
+ return state.sup_user_notif;
+ }
+
+ return 0;
+}
+
+/**
+ * Force a seccomp action support setting
+ * @param action the seccomp action
+ * @param enable the intended support state
+ *
+ * This function overrides the current seccomp action support setting; this
+ * is very much a "use at your own risk" function.
+ */
+void sys_set_seccomp_action(uint32_t action, bool enable)
+{
+ switch (action) {
+ case SCMP_ACT_LOG:
+ state.sup_action_log = (enable ? 1 : 0);
+ break;
+ case SCMP_ACT_KILL_PROCESS:
+ state.sup_kill_process = (enable ? 1 : 0);
+ break;
+ case SCMP_ACT_NOTIFY:
+ state.sup_user_notif = (enable ? 1 : 0);
+ break;
+ }
+}
+
+/**
+ * Check to see if a seccomp() flag is supported by the kernel
+ * @param flag the seccomp() flag
+ *
+ * This function checks to see if a seccomp() flag is supported by the kernel.
+ * Return one if the flag is supported, zero otherwise.
+ *
+ */
+static int _sys_chk_flag_kernel(int flag)
+{
+ /* this is an invalid seccomp(2) call because the last argument
+ * is NULL, but depending on the errno value of EFAULT we can
+ * guess if the filter flag is supported or not */
+ if (sys_chk_seccomp_syscall() == 1 &&
+ syscall(state.nr_seccomp,
+ SECCOMP_SET_MODE_FILTER, flag, NULL) == -1 &&
+ errno == EFAULT)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * Check to see if a seccomp() flag is supported
+ * @param flag the seccomp() flag
+ *
+ * This function checks to see if a seccomp() flag is supported by the system.
+ * Return one if the syscall is supported, zero if unsupported, negative values
+ * on error.
+ *
+ */
+int sys_chk_seccomp_flag(int flag)
+{
+ switch (flag) {
+ case SECCOMP_FILTER_FLAG_TSYNC:
+ if (state.sup_flag_tsync < 0)
+ state.sup_flag_tsync = _sys_chk_flag_kernel(flag);
+ return state.sup_flag_tsync;
+ case SECCOMP_FILTER_FLAG_LOG:
+ if (state.sup_flag_log < 0)
+ state.sup_flag_log = _sys_chk_flag_kernel(flag);
+ return state.sup_flag_log;
+ case SECCOMP_FILTER_FLAG_SPEC_ALLOW:
+ if (state.sup_flag_spec_allow < 0)
+ state.sup_flag_spec_allow = _sys_chk_flag_kernel(flag);
+ return state.sup_flag_spec_allow;
+ case SECCOMP_FILTER_FLAG_NEW_LISTENER:
+ if (state.sup_flag_new_listener < 0)
+ state.sup_flag_new_listener = _sys_chk_flag_kernel(flag);
+ return state.sup_flag_new_listener;
+ case SECCOMP_FILTER_FLAG_TSYNC_ESRCH:
+ if (state.sup_flag_tsync_esrch < 0)
+ state.sup_flag_tsync_esrch = _sys_chk_flag_kernel(flag);
+ return state.sup_flag_tsync_esrch;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * Force a seccomp() syscall flag support setting
+ * @param flag the seccomp() flag
+ * @param enable the intended support state
+ *
+ * This function overrides the current seccomp() syscall support setting for a
+ * given flag; this is very much a "use at your own risk" function.
+ *
+ */
+void sys_set_seccomp_flag(int flag, bool enable)
+{
+ switch (flag) {
+ case SECCOMP_FILTER_FLAG_TSYNC:
+ state.sup_flag_tsync = (enable ? 1 : 0);
+ break;
+ case SECCOMP_FILTER_FLAG_LOG:
+ state.sup_flag_log = (enable ? 1 : 0);
+ break;
+ case SECCOMP_FILTER_FLAG_SPEC_ALLOW:
+ state.sup_flag_spec_allow = (enable ? 1 : 0);
+ break;
+ case SECCOMP_FILTER_FLAG_NEW_LISTENER:
+ state.sup_flag_new_listener = (enable ? 1 : 0);
+ break;
+ case SECCOMP_FILTER_FLAG_TSYNC_ESRCH:
+ state.sup_flag_tsync_esrch = (enable ? 1 : 0);
+ break;
+ }
+}
+
+/**
+ * Loads the filter into the kernel
+ * @param col the filter collection
+ * @param rawrc pass the raw return code if true
+ *
+ * This function loads the given seccomp filter context into the kernel. If
+ * the filter was loaded correctly, the kernel will be enforcing the filter
+ * when this function returns. Returns zero on success, negative values on
+ * error.
+ *
+ */
+int sys_filter_load(struct db_filter_col *col, bool rawrc)
+{
+ int rc;
+ bool tsync_notify;
+ bool listener_req;
+ struct bpf_program *prgm = NULL;
+
+ rc = gen_bpf_generate(col, &prgm);
+ if (rc < 0)
+ return rc;
+
+ /* attempt to set NO_NEW_PRIVS */
+ if (col->attr.nnp_enable) {
+ rc = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ if (rc < 0)
+ goto filter_load_out;
+ }
+
+ tsync_notify = state.sup_flag_tsync_esrch > 0 && state.notify_fd == -1;
+ listener_req = state.sup_user_notif > 0 && \
+ col->notify_used && state.notify_fd == -1;
+
+ /* load the filter into the kernel */
+ if (sys_chk_seccomp_syscall() == 1) {
+ int flgs = 0;
+ if (tsync_notify) {
+ if (col->attr.tsync_enable)
+ flgs |= SECCOMP_FILTER_FLAG_TSYNC | \
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+ if (listener_req)
+ flgs |= SECCOMP_FILTER_FLAG_NEW_LISTENER;
+ } else if (col->attr.tsync_enable) {
+ if (listener_req) {
+ /* NOTE: we _should_ catch this in db.c */
+ rc = -EFAULT;
+ goto filter_load_out;
+ }
+ flgs |= SECCOMP_FILTER_FLAG_TSYNC;
+ } else if (listener_req)
+ flgs |= SECCOMP_FILTER_FLAG_NEW_LISTENER;
+ if (col->attr.log_enable)
+ flgs |= SECCOMP_FILTER_FLAG_LOG;
+ if (col->attr.spec_allow)
+ flgs |= SECCOMP_FILTER_FLAG_SPEC_ALLOW;
+ rc = syscall(state.nr_seccomp,
+ SECCOMP_SET_MODE_FILTER, flgs, prgm);
+ if (tsync_notify && rc > 0) {
+ /* return 0 on NEW_LISTENER success, but save the fd */
+ state.notify_fd = rc;
+ rc = 0;
+ } else if (rc > 0 && col->attr.tsync_enable) {
+ /* always return -ESRCH if we fail to sync threads */
+ errno = ESRCH;
+ rc = -errno;
+ } else if (rc > 0 && state.sup_user_notif > 0) {
+ /* return 0 on NEW_LISTENER success, but save the fd */
+ state.notify_fd = rc;
+ rc = 0;
+ }
+ } else
+ rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prgm);
+
+filter_load_out:
+ /* cleanup and return */
+ gen_bpf_release(prgm);
+ if (rc == -ESRCH)
+ return -ESRCH;
+ if (rc < 0)
+ return (rawrc ? -errno : -ECANCELED);
+ return rc;
+}
+
+/**
+ * Return the userspace notification fd
+ *
+ * This function returns the userspace notification fd from
+ * SECCOMP_FILTER_FLAG_NEW_LISTENER. If the notification fd has not yet been
+ * set, or an error has occurred, -1 is returned.
+ *
+ */
+int sys_notify_fd(void)
+{
+ return state.notify_fd;
+}
+
+/**
+ * Allocate a pair of notification request/response structures
+ * @param req the request location
+ * @param resp the response location
+ *
+ * This function allocates a pair of request/response structure by computing
+ * the correct sized based on the currently running kernel. It returns zero on
+ * success, and negative values on failure.
+ *
+ */
+int sys_notify_alloc(struct seccomp_notif **req,
+ struct seccomp_notif_resp **resp)
+{
+ int rc;
+ static struct seccomp_notif_sizes sizes = { 0, 0, 0 };
+
+ if (state.sup_syscall <= 0)
+ return -EOPNOTSUPP;
+
+ if (sizes.seccomp_notif == 0 && sizes.seccomp_notif_resp == 0) {
+ rc = syscall(__NR_seccomp, SECCOMP_GET_NOTIF_SIZES, 0, &sizes);
+ if (rc < 0)
+ return -ECANCELED;
+ }
+ if (sizes.seccomp_notif == 0 || sizes.seccomp_notif_resp == 0)
+ return -EFAULT;
+
+ if (req) {
+ *req = zmalloc(sizes.seccomp_notif);
+ if (!*req)
+ return -ENOMEM;
+ }
+
+ if (resp) {
+ *resp = zmalloc(sizes.seccomp_notif_resp);
+ if (!*resp) {
+ if (req)
+ free(*req);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Receive a notification from a seccomp notification fd
+ * @param fd the notification fd
+ * @param req the request buffer to save into
+ *
+ * Blocks waiting for a notification on this fd. This function is thread safe
+ * (synchronization is performed in the kernel). Returns zero on success,
+ * negative values on error.
+ *
+ */
+int sys_notify_receive(int fd, struct seccomp_notif *req)
+{
+ if (state.sup_user_notif <= 0)
+ return -EOPNOTSUPP;
+
+ if (ioctl(fd, SECCOMP_IOCTL_NOTIF_RECV, req) < 0)
+ return -ECANCELED;
+
+ return 0;
+}
+
+/**
+ * Send a notification response to a seccomp notification fd
+ * @param fd the notification fd
+ * @param resp the response buffer to use
+ *
+ * Sends a notification response on this fd. This function is thread safe
+ * (synchronization is performed in the kernel). Returns zero on success,
+ * negative values on error.
+ *
+ */
+int sys_notify_respond(int fd, struct seccomp_notif_resp *resp)
+{
+ if (state.sup_user_notif <= 0)
+ return -EOPNOTSUPP;
+
+ if (ioctl(fd, SECCOMP_IOCTL_NOTIF_SEND, resp) < 0)
+ return -ECANCELED;
+ return 0;
+}
+
+/**
+ * Check if a notification id is still valid
+ * @param fd the notification fd
+ * @param id the id to test
+ *
+ * Checks to see if a notification id is still valid. Returns 0 on success, and
+ * negative values on failure.
+ *
+ */
+int sys_notify_id_valid(int fd, uint64_t id)
+{
+ int rc;
+ if (state.sup_user_notif <= 0)
+ return -EOPNOTSUPP;
+
+ rc = ioctl(fd, SECCOMP_IOCTL_NOTIF_ID_VALID, &id);
+ if (rc < 0 && errno == EINVAL)
+ /* It is possible that libseccomp was built against newer kernel
+ * headers than the kernel it is running on. If so, the older
+ * runtime kernel may not support the "fixed"
+ * SECCOMP_IOCTL_NOTIF_ID_VALID ioctl number which was introduced in
+ * kernel commit 47e33c05f9f0 ("seccomp: Fix ioctl number for
+ * SECCOMP_IOCTL_NOTIF_ID_VALID"). Try the old value. */
+ rc = ioctl(fd, SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR, &id);
+ if (rc < 0)
+ return -ENOENT;
+ return 0;
+}