diff options
Diffstat (limited to 'security/sandbox/chromium/sandbox/linux/seccomp-bpf')
14 files changed, 2372 insertions, 0 deletions
diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tester_compatibility_delegate.h b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tester_compatibility_delegate.h new file mode 100644 index 0000000000..a4315ba3c2 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tester_compatibility_delegate.h @@ -0,0 +1,56 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTER_COMPATIBILITY_DELEGATE_H_ +#define SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTER_COMPATIBILITY_DELEGATE_H_ + +#include <memory> + +#include "base/macros.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.h" + +namespace sandbox { + +// This templated class allows building a BPFTesterDelegate from a +// deprecated-style BPF policy (that is a SyscallEvaluator function pointer, +// instead of a SandboxBPFPolicy class), specified in |policy_function| and a +// function pointer to a test in |test_function|. +// This allows both the policy and the test function to take a pointer to an +// object of type "Aux" as a parameter. This is used to implement the BPF_TEST +// macro and should generally not be used directly. +template <class Policy, class Aux> +class BPFTesterCompatibilityDelegate : public BPFTesterDelegate { + public: + typedef void (*TestFunction)(Aux*); + + explicit BPFTesterCompatibilityDelegate(TestFunction test_function) + : aux_(), test_function_(test_function) {} + + ~BPFTesterCompatibilityDelegate() override {} + + std::unique_ptr<bpf_dsl::Policy> GetSandboxBPFPolicy() override { + // The current method is guaranteed to only run in the child process + // running the test. In this process, the current object is guaranteed + // to live forever. So it's ok to pass aux_pointer_for_policy_ to + // the policy, which could in turn pass it to the kernel via Trap(). + return std::unique_ptr<bpf_dsl::Policy>(new Policy(&aux_)); + } + + void RunTestFunction() override { + // Run the actual test. + // The current object is guaranteed to live forever in the child process + // where this will run. + test_function_(&aux_); + } + + private: + Aux aux_; + TestFunction test_function_; + + DISALLOW_COPY_AND_ASSIGN(BPFTesterCompatibilityDelegate); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTER_COMPATIBILITY_DELEGATE_H_ diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tests.h b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tests.h new file mode 100644 index 0000000000..8b2b12afd8 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tests.h @@ -0,0 +1,124 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTS_H__ +#define SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTS_H__ + +#include <memory> + +#include "base/logging.h" +#include "base/macros.h" +#include "build/build_config.h" +#include "sandbox/linux/seccomp-bpf/bpf_tester_compatibility_delegate.h" +#include "sandbox/linux/tests/unit_tests.h" + +namespace sandbox { + +// BPF_TEST_C() is a special version of SANDBOX_TEST(). It runs a test function +// in a sub-process, under a seccomp-bpf policy specified in +// |bpf_policy_class_name| without failing on configurations that are allowed +// to not support seccomp-bpf in their kernels. +// This is the preferred format for new BPF tests. |bpf_policy_class_name| is a +// class name (which will be default-constructed) that implements the +// Policy interface. +// The test function's body can simply follow. Test functions should use +// the BPF_ASSERT macros defined below, not GTEST's macros. The use of +// CHECK* macros is supported but less robust. +#define BPF_TEST_C(test_case_name, test_name, bpf_policy_class_name) \ + BPF_DEATH_TEST_C( \ + test_case_name, test_name, DEATH_SUCCESS(), bpf_policy_class_name) + +// Identical to BPF_TEST_C but allows to specify the nature of death. +#define BPF_DEATH_TEST_C( \ + test_case_name, test_name, death, bpf_policy_class_name) \ + void BPF_TEST_C_##test_name(); \ + TEST(test_case_name, DISABLE_ON_TSAN(test_name)) { \ + sandbox::SandboxBPFTestRunner bpf_test_runner( \ + new sandbox::BPFTesterSimpleDelegate<bpf_policy_class_name>( \ + BPF_TEST_C_##test_name)); \ + sandbox::UnitTests::RunTestInProcess(&bpf_test_runner, death); \ + } \ + void BPF_TEST_C_##test_name() + +// This form of BPF_TEST is a little verbose and should be reserved for complex +// tests where a lot of control is required. +// |bpf_tester_delegate_class| must be a classname implementing the +// BPFTesterDelegate interface. +#define BPF_TEST_D(test_case_name, test_name, bpf_tester_delegate_class) \ + BPF_DEATH_TEST_D( \ + test_case_name, test_name, DEATH_SUCCESS(), bpf_tester_delegate_class) + +// Identical to BPF_TEST_D but allows to specify the nature of death. +#define BPF_DEATH_TEST_D( \ + test_case_name, test_name, death, bpf_tester_delegate_class) \ + TEST(test_case_name, DISABLE_ON_TSAN(test_name)) { \ + sandbox::SandboxBPFTestRunner bpf_test_runner( \ + new bpf_tester_delegate_class()); \ + sandbox::UnitTests::RunTestInProcess(&bpf_test_runner, death); \ + } + +// Assertions are handled exactly the same as with a normal SANDBOX_TEST() +#define BPF_ASSERT SANDBOX_ASSERT +#define BPF_ASSERT_EQ(x, y) BPF_ASSERT((x) == (y)) +#define BPF_ASSERT_NE(x, y) BPF_ASSERT((x) != (y)) +#define BPF_ASSERT_LT(x, y) BPF_ASSERT((x) < (y)) +#define BPF_ASSERT_GT(x, y) BPF_ASSERT((x) > (y)) +#define BPF_ASSERT_LE(x, y) BPF_ASSERT((x) <= (y)) +#define BPF_ASSERT_GE(x, y) BPF_ASSERT((x) >= (y)) + +// This form of BPF_TEST is now discouraged (but still allowed) in favor of +// BPF_TEST_D and BPF_TEST_C. +// The |policy| parameter should be a Policy subclass. +// BPF_TEST() takes a C++ data type as an fourth parameter. A variable +// of this type will be allocated and a pointer to it will be +// available within the test function as "BPF_AUX". The pointer will +// also be passed as an argument to the policy's constructor. Policies +// would typically use it as an argument to SandboxBPF::Trap(), if +// they want to communicate data between the BPF_TEST() and a Trap() +// function. The life-time of this object is the same as the life-time +// of the process running under the seccomp-bpf policy. +// |aux| must not be void. +#define BPF_TEST(test_case_name, test_name, policy, aux) \ + BPF_DEATH_TEST(test_case_name, test_name, DEATH_SUCCESS(), policy, aux) + +// A BPF_DEATH_TEST is just the same as a BPF_TEST, but it assumes that the +// test will fail with a particular known error condition. Use the DEATH_XXX() +// macros from unit_tests.h to specify the expected error condition. +#define BPF_DEATH_TEST(test_case_name, test_name, death, policy, aux) \ + void BPF_TEST_##test_name(aux* BPF_AUX); \ + TEST(test_case_name, DISABLE_ON_TSAN(test_name)) { \ + sandbox::SandboxBPFTestRunner bpf_test_runner( \ + new sandbox::BPFTesterCompatibilityDelegate<policy, aux>( \ + BPF_TEST_##test_name)); \ + sandbox::UnitTests::RunTestInProcess(&bpf_test_runner, death); \ + } \ + void BPF_TEST_##test_name(aux* BPF_AUX) + +// This class takes a simple function pointer as a constructor parameter and a +// class name as a template parameter to implement the BPFTesterDelegate +// interface which can be used to build BPF unittests with +// the SandboxBPFTestRunner class. +template <class PolicyClass> +class BPFTesterSimpleDelegate : public BPFTesterDelegate { + public: + explicit BPFTesterSimpleDelegate(void (*test_function)(void)) + : test_function_(test_function) {} + ~BPFTesterSimpleDelegate() override {} + + std::unique_ptr<bpf_dsl::Policy> GetSandboxBPFPolicy() override { + return std::unique_ptr<bpf_dsl::Policy>(new PolicyClass()); + } + void RunTestFunction() override { + DCHECK(test_function_); + test_function_(); + } + + private: + void (*test_function_)(void); + DISALLOW_COPY_AND_ASSIGN(BPFTesterSimpleDelegate); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_BPF_TESTS_H__ diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc new file mode 100644 index 0000000000..d45bc87292 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc @@ -0,0 +1,155 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/bpf_tests.h" + +#include <errno.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <memory> + +#include "base/logging.h" +#include "base/macros.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +using sandbox::bpf_dsl::Allow; +using sandbox::bpf_dsl::Error; +using sandbox::bpf_dsl::ResultExpr; + +namespace sandbox { + +namespace { + +class FourtyTwo { + public: + static const int kMagicValue = 42; + FourtyTwo() : value_(kMagicValue) {} + int value() { return value_; } + + private: + int value_; + DISALLOW_COPY_AND_ASSIGN(FourtyTwo); +}; + +class EmptyClassTakingPolicy : public bpf_dsl::Policy { + public: + explicit EmptyClassTakingPolicy(FourtyTwo* fourty_two) { + BPF_ASSERT(fourty_two); + BPF_ASSERT(FourtyTwo::kMagicValue == fourty_two->value()); + } + ~EmptyClassTakingPolicy() override {} + + ResultExpr EvaluateSyscall(int sysno) const override { + DCHECK(SandboxBPF::IsValidSyscallNumber(sysno)); + return Allow(); + } +}; + +BPF_TEST(BPFTest, + BPFAUXPointsToClass, + EmptyClassTakingPolicy, + FourtyTwo /* *BPF_AUX */) { + // BPF_AUX should point to an instance of FourtyTwo. + BPF_ASSERT(BPF_AUX); + BPF_ASSERT(FourtyTwo::kMagicValue == BPF_AUX->value()); +} + +void DummyTestFunction(FourtyTwo *fourty_two) { +} + +TEST(BPFTest, BPFTesterCompatibilityDelegateLeakTest) { + // Don't do anything, simply gives dynamic tools an opportunity to detect + // leaks. + { + BPFTesterCompatibilityDelegate<EmptyClassTakingPolicy, FourtyTwo> + simple_delegate(DummyTestFunction); + } + { + // Test polymorphism. + std::unique_ptr<BPFTesterDelegate> simple_delegate( + new BPFTesterCompatibilityDelegate<EmptyClassTakingPolicy, FourtyTwo>( + DummyTestFunction)); + } +} + +class EnosysPtracePolicy : public bpf_dsl::Policy { + public: + EnosysPtracePolicy() { my_pid_ = sys_getpid(); } + ~EnosysPtracePolicy() override { + // Policies should be able to bind with the process on which they are + // created. They should never be created in a parent process. + BPF_ASSERT_EQ(my_pid_, sys_getpid()); + } + + ResultExpr EvaluateSyscall(int system_call_number) const override { + CHECK(SandboxBPF::IsValidSyscallNumber(system_call_number)); + if (system_call_number == __NR_ptrace) { + // The EvaluateSyscall function should run in the process that created + // the current object. + BPF_ASSERT_EQ(my_pid_, sys_getpid()); + return Error(ENOSYS); + } else { + return Allow(); + } + } + + private: + pid_t my_pid_; + DISALLOW_COPY_AND_ASSIGN(EnosysPtracePolicy); +}; + +class BasicBPFTesterDelegate : public BPFTesterDelegate { + public: + BasicBPFTesterDelegate() {} + ~BasicBPFTesterDelegate() override {} + + std::unique_ptr<bpf_dsl::Policy> GetSandboxBPFPolicy() override { + return std::unique_ptr<bpf_dsl::Policy>(new EnosysPtracePolicy()); + } + void RunTestFunction() override { + errno = 0; + int ret = ptrace(PTRACE_TRACEME, -1, NULL, NULL); + BPF_ASSERT(-1 == ret); + BPF_ASSERT(ENOSYS == errno); + } + + private: + DISALLOW_COPY_AND_ASSIGN(BasicBPFTesterDelegate); +}; + +// This is the most powerful and complex way to create a BPF test, but it +// requires a full class definition (BasicBPFTesterDelegate). +BPF_TEST_D(BPFTest, BPFTestWithDelegateClass, BasicBPFTesterDelegate) + +// This is the simplest form of BPF tests. +BPF_TEST_C(BPFTest, BPFTestWithInlineTest, EnosysPtracePolicy) { + errno = 0; + int ret = ptrace(PTRACE_TRACEME, -1, NULL, NULL); + BPF_ASSERT(-1 == ret); + BPF_ASSERT(ENOSYS == errno); +} + +const char kHelloMessage[] = "Hello"; + +BPF_DEATH_TEST_C(BPFTest, + BPFDeathTestWithInlineTest, + DEATH_MESSAGE(kHelloMessage), + EnosysPtracePolicy) { + LOG(ERROR) << kHelloMessage; + _exit(1); +} + +} // namespace + +} // namespace sandbox diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/die.cc b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/die.cc new file mode 100644 index 0000000000..3baf1f13d9 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/die.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/die.h" + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include <string> + +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_signal.h" + +namespace sandbox { + +void Die::ExitGroup() { + // exit_group() should exit our program. After all, it is defined as a + // function that doesn't return. But things can theoretically go wrong. + // Especially, since we are dealing with system call filters. Continuing + // execution would be very bad in most cases where ExitGroup() gets called. + // So, we'll try a few other strategies too. + Syscall::Call(__NR_exit_group, 1); + + // We have no idea what our run-time environment looks like. So, signal + // handlers might or might not do the right thing. Try to reset settings + // to a defined state; but we have not way to verify whether we actually + // succeeded in doing so. Nonetheless, triggering a fatal signal could help + // us terminate. + struct sigaction sa = {}; + sa.sa_handler = LINUX_SIG_DFL; + sa.sa_flags = LINUX_SA_RESTART; + sys_sigaction(LINUX_SIGSEGV, &sa, nullptr); + Syscall::Call(__NR_prctl, PR_SET_DUMPABLE, (void*)0, (void*)0, (void*)0); + if (*(volatile char*)0) { + } + + // If there is no way for us to ask for the program to exit, the next + // best thing we can do is to loop indefinitely. Maybe, somebody will notice + // and file a bug... + // We in fact retry the system call inside of our loop so that it will + // stand out when somebody tries to diagnose the problem by using "strace". + for (;;) { + Syscall::Call(__NR_exit_group, 1); + } +} + +void Die::SandboxDie(const char* msg, const char* file, int line) { + if (simple_exit_) { + LogToStderr(msg, file, line); + } else { + logging::LogMessage(file, line, logging::LOG_FATAL).stream() << msg; + } + ExitGroup(); +} + +void Die::RawSandboxDie(const char* msg) { + if (!msg) + msg = ""; + RAW_LOG(FATAL, msg); + ExitGroup(); +} + +void Die::SandboxInfo(const char* msg, const char* file, int line) { + if (!suppress_info_) { + logging::LogMessage(file, line, logging::LOG_INFO).stream() << msg; + } +} + +void Die::LogToStderr(const char* msg, const char* file, int line) { + if (msg) { + char buf[40]; + snprintf(buf, sizeof(buf), "%d", line); + std::string s = std::string(file) + ":" + buf + ":" + msg + "\n"; + + // No need to loop. Short write()s are unlikely and if they happen we + // probably prefer them over a loop that blocks. + ignore_result( + HANDLE_EINTR(Syscall::Call(__NR_write, 2, s.c_str(), s.length()))); + } +} + +bool Die::simple_exit_ = false; +bool Die::suppress_info_ = false; + +} // namespace sandbox diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/die.h b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/die.h new file mode 100644 index 0000000000..b3f3f72c2f --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/die.h @@ -0,0 +1,68 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_DIE_H__ +#define SANDBOX_LINUX_SECCOMP_BPF_DIE_H__ + +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// This is the main API for using this file. Prints a error message and +// exits with a fatal error. This is not async-signal safe. +#define SANDBOX_DIE(m) sandbox::Die::SandboxDie(m, __FILE__, __LINE__) + +// An async signal safe version of the same API. Won't print the filename +// and line numbers. +#define RAW_SANDBOX_DIE(m) sandbox::Die::RawSandboxDie(m) + +// Adds an informational message to the log file or stderr as appropriate. +#define SANDBOX_INFO(m) sandbox::Die::SandboxInfo(m, __FILE__, __LINE__) + +class SANDBOX_EXPORT Die { + public: + // Terminate the program, even if the current sandbox policy prevents some + // of the more commonly used functions used for exiting. + // Most users would want to call SANDBOX_DIE() instead, as it logs extra + // information. But calling ExitGroup() is correct and in some rare cases + // preferable. So, we make it part of the public API. + static void ExitGroup() __attribute__((noreturn)); + + // This method gets called by SANDBOX_DIE(). There is normally no reason + // to call it directly unless you are defining your own exiting macro. + static void SandboxDie(const char* msg, const char* file, int line) + __attribute__((noreturn)); + + static void RawSandboxDie(const char* msg) __attribute__((noreturn)); + + // This method gets called by SANDBOX_INFO(). There is normally no reason + // to call it directly unless you are defining your own logging macro. + static void SandboxInfo(const char* msg, const char* file, int line); + + // Writes a message to stderr. Used as a fall-back choice, if we don't have + // any other way to report an error. + static void LogToStderr(const char* msg, const char* file, int line); + + // We generally want to run all exit handlers. This means, on SANDBOX_DIE() + // we should be calling LOG(FATAL). But there are some situations where + // we just need to print a message and then terminate. This would typically + // happen in cases where we consume the error message internally (e.g. in + // unit tests or in the supportsSeccompSandbox() method). + static void EnableSimpleExit() { simple_exit_ = true; } + + // Sometimes we need to disable all informational messages (e.g. from within + // unittests). + static void SuppressInfoMessages(bool flag) { suppress_info_ = flag; } + + private: + static bool simple_exit_; + static bool suppress_info_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(Die); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_DIE_H__ diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf.cc new file mode 100644 index 0000000000..72a79670d3 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf.cc @@ -0,0 +1,259 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" + +#include <errno.h> +#include <stdint.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <unistd.h> + +#include "base/compiler_specific.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/macros.h" +#include "base/posix/eintr_wrapper.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/bpf_dsl/policy_compiler.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" +#include "sandbox/linux/bpf_dsl/syscall_set.h" +#include "sandbox/linux/seccomp-bpf/die.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/seccomp-bpf/trap.h" +#include "sandbox/linux/services/proc_util.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/services/thread_helpers.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "sandbox/linux/system_headers/linux_seccomp.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" + +namespace sandbox { + +namespace { + +// Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via +// prctl(). +bool KernelSupportsSeccompBPF() { + errno = 0; + const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr); + + if (rv == -1 && EFAULT == errno) { + return true; + } + return false; +} + +// LG introduced a buggy syscall, sys_set_media_ext, with the same number as +// seccomp. Return true if the current kernel has this buggy syscall. +// +// We want this to work with upcoming versions of seccomp, so we pass bogus +// flags that are unlikely to ever be used by the kernel. A normal kernel would +// return -EINVAL, but a buggy LG kernel would return 1. +bool KernelHasLGBug() { +#if defined(OS_ANDROID) + // sys_set_media will see this as NULL, which should be a safe (non-crashing) + // way to invoke it. A genuine seccomp syscall will see it as + // SECCOMP_SET_MODE_STRICT. + const unsigned int operation = 0; + // Chosen by fair dice roll. Guaranteed to be random. + const unsigned int flags = 0xf7a46a5c; + const int rv = sys_seccomp(operation, flags, nullptr); + // A genuine kernel would return -EINVAL (which would set rv to -1 and errno + // to EINVAL), or at the very least return some kind of error (which would + // set rv to -1). Any other behavior indicates that whatever code received + // our syscall was not the real seccomp. + if (rv != -1) { + return true; + } +#endif // defined(OS_ANDROID) + + return false; +} + +// Check if the kernel supports seccomp-filter via the seccomp system call +// and the TSYNC feature to enable seccomp on all threads. +bool KernelSupportsSeccompTsync() { + if (KernelHasLGBug()) { + return false; + } + + errno = 0; + const int rv = + sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr); + + if (rv == -1 && errno == EFAULT) { + return true; + } + + DCHECK_EQ(-1, rv); + DCHECK(ENOSYS == errno || EINVAL == errno); + return false; +} + +uint64_t EscapePC() { + intptr_t rv = Syscall::Call(-1); + if (rv == -1 && errno == ENOSYS) { + return 0; + } + return static_cast<uint64_t>(static_cast<uintptr_t>(rv)); +} + +intptr_t SandboxPanicTrap(const struct arch_seccomp_data&, void* aux) { + SANDBOX_DIE(static_cast<const char*>(aux)); +} + +bpf_dsl::ResultExpr SandboxPanic(const char* error) { + return bpf_dsl::Trap(SandboxPanicTrap, error); +} + +} // namespace + +SandboxBPF::SandboxBPF(std::unique_ptr<bpf_dsl::Policy> policy) + : proc_fd_(), sandbox_has_started_(false), policy_(std::move(policy)) {} + +SandboxBPF::~SandboxBPF() { +} + +// static +bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) { + switch (level) { + case SeccompLevel::SINGLE_THREADED: + return KernelSupportsSeccompBPF(); + case SeccompLevel::MULTI_THREADED: + return KernelSupportsSeccompTsync(); + } + NOTREACHED(); + return false; +} + +bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) { + DCHECK(policy_); + CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED || + seccomp_level == SeccompLevel::MULTI_THREADED); + + if (sandbox_has_started_) { + SANDBOX_DIE( + "Cannot repeatedly start sandbox. Create a separate Sandbox " + "object instead."); + return false; + } + + if (!proc_fd_.is_valid()) { + SetProcFd(ProcUtil::OpenProc()); + } + + const bool supports_tsync = KernelSupportsSeccompTsync(); + + if (seccomp_level == SeccompLevel::SINGLE_THREADED) { + // Wait for /proc/self/task/ to update if needed and assert the + // process is single threaded. + ThreadHelpers::AssertSingleThreaded(proc_fd_.get()); + } else if (seccomp_level == SeccompLevel::MULTI_THREADED) { + if (!supports_tsync) { + SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing " + "filters for a threadgroup"); + return false; + } + } + + // We no longer need access to any files in /proc. We want to do this + // before installing the filters, just in case that our policy denies + // close(). + if (proc_fd_.is_valid()) { + proc_fd_.reset(); + } + + // Install the filters. + InstallFilter(supports_tsync || + seccomp_level == SeccompLevel::MULTI_THREADED); + + return true; +} + +void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) { + proc_fd_.swap(proc_fd); +} + +// static +bool SandboxBPF::IsValidSyscallNumber(int sysnum) { + return SyscallSet::IsValid(sysnum); +} + +// static +bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) { + return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno); +} + +// static +intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) { + return Syscall::Call( + args.nr, static_cast<intptr_t>(args.args[0]), + static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]), + static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]), + static_cast<intptr_t>(args.args[5])); +} + +CodeGen::Program SandboxBPF::AssembleFilter() { + DCHECK(policy_); + + bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry()); + if (Trap::SandboxDebuggingAllowedByUser()) { + compiler.DangerousSetEscapePC(EscapePC()); + } + compiler.SetPanicFunc(SandboxPanic); + return compiler.Compile(); +} + +void SandboxBPF::InstallFilter(bool must_sync_threads) { + // We want to be very careful in not imposing any requirements on the + // policies that are set with SetSandboxPolicy(). This means, as soon as + // the sandbox is active, we shouldn't be relying on libraries that could + // be making system calls. This, for example, means we should avoid + // using the heap and we should avoid using STL functions. + // Temporarily copy the contents of the "program" vector into a + // stack-allocated array; and then explicitly destroy that object. + // This makes sure we don't ex- or implicitly call new/delete after we + // installed the BPF filter program in the kernel. Depending on the + // system memory allocator that is in effect, these operators can result + // in system calls to things like munmap() or brk(). + CodeGen::Program program = AssembleFilter(); + + struct sock_filter bpf[program.size()]; + const struct sock_fprog prog = {static_cast<unsigned short>(program.size()), + bpf}; + memcpy(bpf, &program[0], sizeof(bpf)); + CodeGen::Program().swap(program); // vector swap trick + + // Make an attempt to release memory that is no longer needed here, rather + // than in the destructor. Try to avoid as much as possible to presume of + // what will be possible to do in the new (sandboxed) execution environment. + policy_.reset(); + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + SANDBOX_DIE("Kernel refuses to enable no-new-privs"); + } + + // Install BPF filter program. If the thread state indicates multi-threading + // support, then the kernel hass the seccomp system call. Otherwise, fall + // back on prctl, which requires the process to be single-threaded. + if (must_sync_threads) { + int rv = + sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog); + if (rv) { + SANDBOX_DIE( + "Kernel refuses to turn on and synchronize threads for BPF filters"); + } + } else { + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + SANDBOX_DIE("Kernel refuses to turn on BPF filters"); + } + } + + sandbox_has_started_ = true; +} + +} // namespace sandbox diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf.h new file mode 100644 index 0000000000..282852992b --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf.h @@ -0,0 +1,113 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_ +#define SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_ + +#include <stdint.h> + +#include <memory> + +#include "base/files/scoped_file.h" +#include "base/macros.h" +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { +struct arch_seccomp_data; + +// This class can be used to apply a syscall sandboxing policy expressed in a +// bpf_dsl::Policy object to the current process. +// Syscall sandboxing policies get inherited by subprocesses and, once applied, +// can never be removed for the lifetime of the process. +class SANDBOX_EXPORT SandboxBPF { + public: + enum class SeccompLevel { + SINGLE_THREADED, + MULTI_THREADED, + }; + + // Ownership of |policy| is transfered here to the sandbox object. + // nullptr is allowed for unit tests. + explicit SandboxBPF(std::unique_ptr<bpf_dsl::Policy> policy); + // NOTE: Setting a policy and starting the sandbox is a one-way operation. + // The kernel does not provide any option for unloading a loaded sandbox. The + // sandbox remains engaged even when the object is destructed. + ~SandboxBPF(); + + // Detect if the kernel supports the specified seccomp level. + // See StartSandbox() for a description of these. + static bool SupportsSeccompSandbox(SeccompLevel level); + + // This is the main public entry point. It sets up the resources needed by + // the sandbox, and enters Seccomp mode. + // The calling process must provide a |level| to tell the sandbox which type + // of kernel support it should engage. + // SINGLE_THREADED will only sandbox the calling thread. Since it would be a + // security risk, the sandbox will also check that the current process is + // single threaded and crash if it isn't the case. + // MULTI_THREADED requires more recent kernel support and allows to sandbox + // all the threads of the current process. Be mindful of potential races, + // with other threads using disallowed system calls either before or after + // the sandbox is engaged. + // + // It is possible to stack multiple sandboxes by creating separate "Sandbox" + // objects and calling "StartSandbox()" on each of them. Please note, that + // this requires special care, though, as newly stacked sandboxes can never + // relax restrictions imposed by earlier sandboxes. Furthermore, installing + // a new policy requires making system calls, that might already be + // disallowed. + // Finally, stacking does add more kernel overhead than having a single + // combined policy. So, it should only be used if there are no alternatives. + bool StartSandbox(SeccompLevel level) WARN_UNUSED_RESULT; + + // The sandbox needs to be able to access files in "/proc/self/". If + // this directory is not accessible when "StartSandbox()" gets called, the + // caller must provide an already opened file descriptor by calling + // "SetProcFd()". + // The sandbox becomes the new owner of this file descriptor and will + // close it when "StartSandbox()" executes or when the sandbox object + // disappears. + void SetProcFd(base::ScopedFD proc_fd); + + // Checks whether a particular system call number is valid on the current + // architecture. + static bool IsValidSyscallNumber(int sysnum); + + // UnsafeTraps require some syscalls to always be allowed. + // This helper function returns true for these calls. + static bool IsRequiredForUnsafeTrap(int sysno); + + // From within an UnsafeTrap() it is often useful to be able to execute + // the system call that triggered the trap. The ForwardSyscall() method + // makes this easy. It is more efficient than calling glibc's syscall() + // function, as it avoid the extra round-trip to the signal handler. And + // it automatically does the correct thing to report kernel-style error + // conditions, rather than setting errno. See the comments for TrapFnc for + // details. In other words, the return value from ForwardSyscall() is + // directly suitable as a return value for a trap handler. + static intptr_t ForwardSyscall(const struct arch_seccomp_data& args); + + private: + friend class SandboxBPFTestRunner; + + // Assembles a BPF filter program from the current policy. After calling this + // function, you must not call any other sandboxing function. + CodeGen::Program AssembleFilter(); + + // Assembles and installs a filter based on the policy that has previously + // been configured with SetSandboxPolicy(). + void InstallFilter(bool must_sync_threads); + + base::ScopedFD proc_fd_; + bool sandbox_has_started_; + std::unique_ptr<bpf_dsl::Policy> policy_; + + DISALLOW_COPY_AND_ASSIGN(SandboxBPF); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_ diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.cc b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.cc new file mode 100644 index 0000000000..36f3744b76 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.cc @@ -0,0 +1,66 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.h" + +#include <fcntl.h> + +#include <memory> + +#include "base/logging.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/seccomp-bpf/die.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "sandbox/linux/tests/unit_tests.h" + +namespace sandbox { + +SandboxBPFTestRunner::SandboxBPFTestRunner( + BPFTesterDelegate* bpf_tester_delegate) + : bpf_tester_delegate_(bpf_tester_delegate) { +} + +SandboxBPFTestRunner::~SandboxBPFTestRunner() { +} + +void SandboxBPFTestRunner::Run() { + DCHECK(bpf_tester_delegate_); + sandbox::Die::EnableSimpleExit(); + + std::unique_ptr<bpf_dsl::Policy> policy = + bpf_tester_delegate_->GetSandboxBPFPolicy(); + + if (sandbox::SandboxBPF::SupportsSeccompSandbox( + SandboxBPF::SeccompLevel::SINGLE_THREADED)) { + // Initialize and then start the sandbox with our custom policy + sandbox::SandboxBPF sandbox(std::move(policy)); + SANDBOX_ASSERT(sandbox.StartSandbox( + sandbox::SandboxBPF::SeccompLevel::SINGLE_THREADED)); + + // Run the actual test. + bpf_tester_delegate_->RunTestFunction(); + } else { + printf("This BPF test is not fully running in this configuration!\n"); + // Android is the only configuration where we accept not having kernel + // BPF support. + if (!IsAndroid()) { + const bool seccomp_bpf_is_supported = false; + SANDBOX_ASSERT(seccomp_bpf_is_supported); + } + // Call the compiler and verify the policy. That's the least we can do, + // if we don't have kernel support. + sandbox::SandboxBPF sandbox(std::move(policy)); + sandbox.AssembleFilter(); + sandbox::UnitTests::IgnoreThisTest(); + } +} + +bool SandboxBPFTestRunner::ShouldCheckForLeaks() const { + // LSAN requires being able to use ptrace() and other system calls that could + // be denied. + return false; +} + +} // namespace sandbox diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.h b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.h new file mode 100644 index 0000000000..4fc3c5d169 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/sandbox_bpf_test_runner.h @@ -0,0 +1,62 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_TEST_RUNNER_H_ +#define SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_TEST_RUNNER_H_ + +#include <memory> + +#include "base/macros.h" +#include "sandbox/linux/tests/sandbox_test_runner.h" + +namespace sandbox { +namespace bpf_dsl { +class Policy; +} + +// To create a SandboxBPFTestRunner object, one needs to implement this +// interface and pass an instance to the SandboxBPFTestRunner constructor. +// In the child process running the test, the BPFTesterDelegate object is +// guaranteed to not be destroyed until the child process terminates. +class BPFTesterDelegate { + public: + BPFTesterDelegate() {} + virtual ~BPFTesterDelegate() {} + + // This will instanciate a policy suitable for the test we want to run. It is + // guaranteed to only be called from the child process that will run the + // test. + virtual std::unique_ptr<bpf_dsl::Policy> GetSandboxBPFPolicy() = 0; + // This will be called from a child process with the BPF sandbox turned on. + virtual void RunTestFunction() = 0; + + private: + DISALLOW_COPY_AND_ASSIGN(BPFTesterDelegate); +}; + +// This class implements the SandboxTestRunner interface and Run() will +// initialize a seccomp-bpf sandbox (specified by |bpf_tester_delegate|) and +// run a test function (via |bpf_tester_delegate|) if the current kernel +// configuration allows it. If it can not run the test under seccomp-bpf, +// Run() will still compile the policy which should allow to get some coverage +// under tools that behave like Valgrind. +class SandboxBPFTestRunner : public SandboxTestRunner { + public: + // This constructor takes ownership of the |bpf_tester_delegate| object. + // (It doesn't take a std::unique_ptr since they make polymorphism verbose). + explicit SandboxBPFTestRunner(BPFTesterDelegate* bpf_tester_delegate); + ~SandboxBPFTestRunner() override; + + void Run() override; + + bool ShouldCheckForLeaks() const override; + + private: + std::unique_ptr<BPFTesterDelegate> bpf_tester_delegate_; + DISALLOW_COPY_AND_ASSIGN(SandboxBPFTestRunner); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_TEST_RUNNER_H_ diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall.cc b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall.cc new file mode 100644 index 0000000000..34edabd2b8 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall.cc @@ -0,0 +1,481 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/syscall.h" + +#include <errno.h> +#include <stdint.h> + +#include "base/logging.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" + +namespace sandbox { + +namespace { + +#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \ + defined(ARCH_CPU_MIPS_FAMILY) +// Number that's not currently used by any Linux kernel ABIs. +const int kInvalidSyscallNumber = 0x351d3; +#else +#error Unrecognized architecture +#endif + +asm(// We need to be able to tell the kernel exactly where we made a + // system call. The C++ compiler likes to sometimes clone or + // inline code, which would inadvertently end up duplicating + // the entry point. + // "gcc" can suppress code duplication with suitable function + // attributes, but "clang" doesn't have this ability. + // The "clang" developer mailing list suggested that the correct + // and portable solution is a file-scope assembly block. + // N.B. We do mark our code as a proper function so that backtraces + // work correctly. But we make absolutely no attempt to use the + // ABI's calling conventions for passing arguments. We will only + // ever be called from assembly code and thus can pick more + // suitable calling conventions. +#if defined(__i386__) + ".text\n" + ".align 16, 0x90\n" + ".type SyscallAsm, @function\n" + "SyscallAsm:.cfi_startproc\n" + // Check if "%eax" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "int $0x80". This address can be + // used as a marker that BPF code inspects. + "test %eax, %eax\n" + "jge 1f\n" + // Always, make sure that our code is position-independent, or + // address space randomization might not work on i386. This means, + // we can't use "lea", but instead have to rely on "call/pop". + "call 0f; .cfi_adjust_cfa_offset 4\n" + "0:pop %eax; .cfi_adjust_cfa_offset -4\n" + "addl $2f-0b, %eax\n" + "ret\n" + // Save register that we don't want to clobber. On i386, we need to + // save relatively aggressively, as there are a couple or registers + // that are used internally (e.g. %ebx for position-independent + // code, and %ebp for the frame pointer), and as we need to keep at + // least a few registers available for the register allocator. + "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n" + "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n" + "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n" + "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n" + // Copy entries from the array holding the arguments into the + // correct CPU registers. + "movl 0(%edi), %ebx\n" + "movl 4(%edi), %ecx\n" + "movl 8(%edi), %edx\n" + "movl 12(%edi), %esi\n" + "movl 20(%edi), %ebp\n" + "movl 16(%edi), %edi\n" + // Enter the kernel. + "int $0x80\n" + // This is our "magic" return address that the BPF filter sees. + "2:" + // Restore any clobbered registers that we didn't declare to the + // compiler. + "pop %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n" + "pop %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n" + "pop %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n" + "pop %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n" + "ret\n" + ".cfi_endproc\n" + "9:.size SyscallAsm, 9b-SyscallAsm\n" +#elif defined(__x86_64__) + ".text\n" + ".align 16, 0x90\n" + ".type SyscallAsm, @function\n" + "SyscallAsm:.cfi_startproc\n" + // Check if "%rdi" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "syscall". This address can be + // used as a marker that BPF code inspects. + "test %rdi, %rdi\n" + "jge 1f\n" + // Always make sure that our code is position-independent, or the + // linker will throw a hissy fit on x86-64. + "lea 2f(%rip), %rax\n" + "ret\n" + // Now we load the registers used to pass arguments to the system + // call: system call number in %rax, and arguments in %rdi, %rsi, + // %rdx, %r10, %r8, %r9. Note: These are all caller-save registers + // (only %rbx, %rbp, %rsp, and %r12-%r15 are callee-save), so no + // need to worry here about spilling registers or CFI directives. + "1:movq %rdi, %rax\n" + "movq 0(%rsi), %rdi\n" + "movq 16(%rsi), %rdx\n" + "movq 24(%rsi), %r10\n" + "movq 32(%rsi), %r8\n" + "movq 40(%rsi), %r9\n" + "movq 8(%rsi), %rsi\n" + // Enter the kernel. + "syscall\n" + // This is our "magic" return address that the BPF filter sees. + "2:ret\n" + ".cfi_endproc\n" + "9:.size SyscallAsm, 9b-SyscallAsm\n" +#elif defined(__arm__) + // Throughout this file, we use the same mode (ARM vs. thumb) + // that the C++ compiler uses. This means, when transfering control + // from C++ to assembly code, we do not need to switch modes (e.g. + // by using the "bx" instruction). It also means that our assembly + // code should not be invoked directly from code that lives in + // other compilation units, as we don't bother implementing thumb + // interworking. That's OK, as we don't make any of the assembly + // symbols public. They are all local to this file. + ".text\n" + ".align 2\n" + ".type SyscallAsm, %function\n" +#if defined(__thumb__) + ".thumb_func\n" +#else + ".arm\n" +#endif + "SyscallAsm:\n" +#if !defined(__native_client_nonsfi__) + // .fnstart and .fnend pseudo operations creates unwind table. + // It also creates a reference to the symbol __aeabi_unwind_cpp_pr0, which + // is not provided by PNaCl toolchain. Disable it. + ".fnstart\n" +#endif + "@ args = 0, pretend = 0, frame = 8\n" + "@ frame_needed = 1, uses_anonymous_args = 0\n" +#if defined(__thumb__) + ".cfi_startproc\n" + "push {r7, lr}\n" + ".save {r7, lr}\n" + ".cfi_offset 14, -4\n" + ".cfi_offset 7, -8\n" + ".cfi_def_cfa_offset 8\n" +#else + "stmfd sp!, {fp, lr}\n" + "add fp, sp, #4\n" +#endif + // Check if "r0" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "swi 0". This address can be + // used as a marker that BPF code inspects. + "cmp r0, #0\n" + "bge 1f\n" + "adr r0, 2f\n" + "b 2f\n" + // We declared (almost) all clobbered registers to the compiler. On + // ARM there is no particular register pressure. So, we can go + // ahead and directly copy the entries from the arguments array + // into the appropriate CPU registers. + "1:ldr r5, [r6, #20]\n" + "ldr r4, [r6, #16]\n" + "ldr r3, [r6, #12]\n" + "ldr r2, [r6, #8]\n" + "ldr r1, [r6, #4]\n" + "mov r7, r0\n" + "ldr r0, [r6, #0]\n" + // Enter the kernel + "swi 0\n" +// Restore the frame pointer. Also restore the program counter from +// the link register; this makes us return to the caller. +#if defined(__thumb__) + "2:pop {r7, pc}\n" + ".cfi_endproc\n" +#else + "2:ldmfd sp!, {fp, pc}\n" +#endif +#if !defined(__native_client_nonsfi__) + // Do not use .fnstart and .fnend for PNaCl toolchain. See above comment, + // for more details. + ".fnend\n" +#endif + "9:.size SyscallAsm, 9b-SyscallAsm\n" +#elif (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS)) + ".text\n" + ".option pic2\n" + ".align 4\n" + ".global SyscallAsm\n" + ".type SyscallAsm, @function\n" + "SyscallAsm:.ent SyscallAsm\n" + ".frame $sp, 40, $ra\n" + ".set push\n" + ".set noreorder\n" + ".cpload $t9\n" + "addiu $sp, $sp, -40\n" + "sw $ra, 36($sp)\n" + // Check if "v0" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "syscall". This address can be + // used as a marker that BPF code inspects. + "bgez $v0, 1f\n" + " nop\n" + // This is equivalent to "la $v0, 2f". + // LA macro has to be avoided since LLVM-AS has issue with LA in PIC mode + // https://llvm.org/bugs/show_bug.cgi?id=27644 + "lw $v0, %got(2f)($gp)\n" + "addiu $v0, $v0, %lo(2f)\n" + "b 2f\n" + " nop\n" + // On MIPS first four arguments go to registers a0 - a3 and any + // argument after that goes to stack. We can go ahead and directly + // copy the entries from the arguments array into the appropriate + // CPU registers and on the stack. + "1:lw $a3, 28($a0)\n" + "lw $a2, 24($a0)\n" + "lw $a1, 20($a0)\n" + "lw $t0, 16($a0)\n" + "sw $a3, 28($sp)\n" + "sw $a2, 24($sp)\n" + "sw $a1, 20($sp)\n" + "sw $t0, 16($sp)\n" + "lw $a3, 12($a0)\n" + "lw $a2, 8($a0)\n" + "lw $a1, 4($a0)\n" + "lw $a0, 0($a0)\n" + // Enter the kernel + "syscall\n" + // This is our "magic" return address that the BPF filter sees. + // Restore the return address from the stack. + "2:lw $ra, 36($sp)\n" + "jr $ra\n" + " addiu $sp, $sp, 40\n" + ".set pop\n" + ".end SyscallAsm\n" + ".size SyscallAsm,.-SyscallAsm\n" +#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS) + ".text\n" + ".option pic2\n" + ".global SyscallAsm\n" + ".type SyscallAsm, @function\n" + "SyscallAsm:.ent SyscallAsm\n" + ".frame $sp, 16, $ra\n" + ".set push\n" + ".set noreorder\n" + "daddiu $sp, $sp, -16\n" + ".cpsetup $25, 0, SyscallAsm\n" + "sd $ra, 8($sp)\n" + // Check if "v0" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "syscall". This address can be + // used as a marker that BPF code inspects. + "bgez $v0, 1f\n" + " nop\n" + // This is equivalent to "la $v0, 2f". + // LA macro has to be avoided since LLVM-AS has issue with LA in PIC mode + // https://llvm.org/bugs/show_bug.cgi?id=27644 + "ld $v0, %got(2f)($gp)\n" + "daddiu $v0, $v0, %lo(2f)\n" + "b 2f\n" + " nop\n" + // On MIPS N64 all eight arguments go to registers a0 - a7 + // We can go ahead and directly copy the entries from the arguments array + // into the appropriate CPU registers. + "1:ld $a7, 56($a0)\n" + "ld $a6, 48($a0)\n" + "ld $a5, 40($a0)\n" + "ld $a4, 32($a0)\n" + "ld $a3, 24($a0)\n" + "ld $a2, 16($a0)\n" + "ld $a1, 8($a0)\n" + "ld $a0, 0($a0)\n" + // Enter the kernel + "syscall\n" + // This is our "magic" return address that the BPF filter sees. + // Restore the return address from the stack. + "2:ld $ra, 8($sp)\n" + ".cpreturn\n" + "jr $ra\n" + "daddiu $sp, $sp, 16\n" + ".set pop\n" + ".end SyscallAsm\n" + ".size SyscallAsm,.-SyscallAsm\n" +#elif defined(__aarch64__) + ".text\n" + ".align 2\n" + ".type SyscallAsm, %function\n" + "SyscallAsm:\n" + ".cfi_startproc\n" + "cmp x0, #0\n" + "b.ge 1f\n" + "adr x0,2f\n" + "b 2f\n" + "1:ldr x5, [x6, #40]\n" + "ldr x4, [x6, #32]\n" + "ldr x3, [x6, #24]\n" + "ldr x2, [x6, #16]\n" + "ldr x1, [x6, #8]\n" + "mov x8, x0\n" + "ldr x0, [x6, #0]\n" + // Enter the kernel + "svc 0\n" + "2:ret\n" + ".cfi_endproc\n" + ".size SyscallAsm, .-SyscallAsm\n" +#endif + ); // asm + +#if defined(__x86_64__) +extern "C" { +intptr_t SyscallAsm(intptr_t nr, const intptr_t args[6]); +} +#elif defined(__mips__) +extern "C" { +intptr_t SyscallAsm(intptr_t nr, const intptr_t args[8]); +} +#endif + +} // namespace + +intptr_t Syscall::InvalidCall() { + // Explicitly pass eight zero arguments just in case. + return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0); +} + +intptr_t Syscall::Call(int nr, + intptr_t p0, + intptr_t p1, + intptr_t p2, + intptr_t p3, + intptr_t p4, + intptr_t p5, + intptr_t p6, + intptr_t p7) { + // We rely on "intptr_t" to be the exact size as a "void *". This is + // typically true, but just in case, we add a check. The language + // specification allows platforms some leeway in cases, where + // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect + // that this would only be an issue for IA64, which we are currently not + // planning on supporting. And it is even possible that this would work + // on IA64, but for lack of actual hardware, I cannot test. + static_assert(sizeof(void*) == sizeof(intptr_t), + "pointer types and intptr_t must be exactly the same size"); + + // TODO(nedeljko): Enable use of more than six parameters on architectures + // where that makes sense. +#if defined(__mips__) + const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7}; +#else + DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not " + "added for this architecture"; + DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not " + "added for this architecture"; + const intptr_t args[6] = {p0, p1, p2, p3, p4, p5}; +#endif // defined(__mips__) + +// Invoke our file-scope assembly code. The constraints have been picked +// carefully to match what the rest of the assembly code expects in input, +// output, and clobbered registers. +#if defined(__i386__) + intptr_t ret = nr; + asm volatile( + "call SyscallAsm\n" + // N.B. These are not the calling conventions normally used by the ABI. + : "=a"(ret) + : "0"(ret), "D"(args) + : "cc", "esp", "memory", "ecx", "edx"); +#elif defined(__x86_64__) + intptr_t ret = SyscallAsm(nr, args); +#elif defined(__arm__) + intptr_t ret; + { + register intptr_t inout __asm__("r0") = nr; + register const intptr_t* data __asm__("r6") = args; + asm volatile( + "bl SyscallAsm\n" + // N.B. These are not the calling conventions normally used by the ABI. + : "=r"(inout) + : "0"(inout), "r"(data) + : "cc", + "lr", + "memory", + "r1", + "r2", + "r3", + "r4", + "r5" +#if !defined(__thumb__) + // In thumb mode, we cannot use "r7" as a general purpose register, as + // it is our frame pointer. We have to manually manage and preserve + // it. + // In ARM mode, we have a dedicated frame pointer register and "r7" is + // thus available as a general purpose register. We don't preserve it, + // but instead mark it as clobbered. + , + "r7" +#endif // !defined(__thumb__) + ); + ret = inout; + } +#elif defined(__mips__) + intptr_t err_status; + intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status); + + if (err_status) { + // On error, MIPS returns errno from syscall instead of -errno. + // The purpose of this negation is for SandboxSyscall() to behave + // more like it would on other architectures. + ret = -ret; + } +#elif defined(__aarch64__) + intptr_t ret; + { + register intptr_t inout __asm__("x0") = nr; + register const intptr_t* data __asm__("x6") = args; + asm volatile("bl SyscallAsm\n" + : "=r"(inout) + : "0"(inout), "r"(data) + : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30"); + ret = inout; + } + +#else +#error "Unimplemented architecture" +#endif + return ret; +} + +void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) { +#if defined(__mips__) + // Mips ABI states that on error a3 CPU register has non zero value and if + // there is no error, it should be zero. + if (ret_val <= -1 && ret_val >= -4095) { + // |ret_val| followes the Syscall::Call() convention of being -errno on + // errors. In order to write correct value to return register this sign + // needs to be changed back. + ret_val = -ret_val; + SECCOMP_PARM4(ctx) = 1; + } else + SECCOMP_PARM4(ctx) = 0; +#endif + SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val); +} + +#if defined(__mips__) +intptr_t Syscall::SandboxSyscallRaw(int nr, + const intptr_t* args, + intptr_t* err_ret) { + register intptr_t ret __asm__("v0") = nr; + register intptr_t syscallasm __asm__("t9") = (intptr_t) &SyscallAsm; + // a3 register becomes non zero on error. + register intptr_t err_stat __asm__("a3") = 0; + { + register const intptr_t* data __asm__("a0") = args; + asm volatile( + "jalr $t9\n" + " nop\n" + : "=r"(ret), "=r"(err_stat) + : "0"(ret), + "r"(data), + "r"(syscallasm) + // a2 is in the clober list so inline assembly can not change its + // value. + : "memory", "ra", "a2"); + } + + // Set an error status so it can be used outside of this function + *err_ret = err_stat; + + return ret; +} +#endif // defined(__mips__) + +} // namespace sandbox diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall.h b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall.h new file mode 100644 index 0000000000..3b02a6723f --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall.h @@ -0,0 +1,166 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ +#define SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ + +#include <signal.h> +#include <stdint.h> + +#include "base/macros.h" +#include "sandbox/linux/system_headers/linux_signal.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// This purely static class can be used to perform system calls with some +// low-level control. +class SANDBOX_EXPORT Syscall { + public: + // InvalidCall() invokes Call() with a platform-appropriate syscall + // number that is guaranteed to not be implemented (i.e., normally + // returns -ENOSYS). + // This is primarily meant to be useful for writing sandbox policy + // unit tests. + static intptr_t InvalidCall(); + + // System calls can take up to six parameters (up to eight on some + // architectures). Traditionally, glibc + // implements this property by using variadic argument lists. This works, but + // confuses tools that behave like Valgrind, because we are nominally passing + // uninitialized data whenever we call through this function and pass less + // than the full six arguments. + // So, instead, we use C++'s template system to achieve a very similar + // effect. C++ automatically sets the unused parameters to zero for us, and + // it also does the correct type expansion (e.g. from 32bit to 64bit) where + // necessary. + // We have to use C-style cast operators as we want to be able to accept both + // integer and pointer types. + template <class T0, + class T1, + class T2, + class T3, + class T4, + class T5, + class T6, + class T7> + static inline intptr_t + Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5, T6 p6, T7 p7) { + return Call(nr, + (intptr_t)p0, + (intptr_t)p1, + (intptr_t)p2, + (intptr_t)p3, + (intptr_t)p4, + (intptr_t)p5, + (intptr_t)p6, + (intptr_t)p7); + } + + template <class T0, + class T1, + class T2, + class T3, + class T4, + class T5, + class T6> + static inline intptr_t + Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5, T6 p6) { + return Call(nr, + (intptr_t)p0, + (intptr_t)p1, + (intptr_t)p2, + (intptr_t)p3, + (intptr_t)p4, + (intptr_t)p5, + (intptr_t)p6, + 0); + } + + template <class T0, class T1, class T2, class T3, class T4, class T5> + static inline intptr_t + Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5) { + return Call(nr, + (intptr_t)p0, + (intptr_t)p1, + (intptr_t)p2, + (intptr_t)p3, + (intptr_t)p4, + (intptr_t)p5, + 0, + 0); + } + + template <class T0, class T1, class T2, class T3, class T4> + static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4) { + return Call(nr, p0, p1, p2, p3, p4, 0, 0, 0); + } + + template <class T0, class T1, class T2, class T3> + static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3) { + return Call(nr, p0, p1, p2, p3, 0, 0, 0, 0); + } + + template <class T0, class T1, class T2> + static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2) { + return Call(nr, p0, p1, p2, 0, 0, 0, 0, 0); + } + + template <class T0, class T1> + static inline intptr_t Call(int nr, T0 p0, T1 p1) { + return Call(nr, p0, p1, 0, 0, 0, 0, 0, 0); + } + + template <class T0> + static inline intptr_t Call(int nr, T0 p0) { + return Call(nr, p0, 0, 0, 0, 0, 0, 0, 0); + } + + static inline intptr_t Call(int nr) { + return Call(nr, 0, 0, 0, 0, 0, 0, 0, 0); + } + + // Set the registers in |ctx| to match what they would be after a system call + // returning |ret_val|. |ret_val| must follow the Syscall::Call() convention + // of being -errno on errors. + static void PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx); + + private: + // This performs system call |nr| with the arguments p0 to p7 from a constant + // userland address, which is for instance observable by seccomp-bpf filters. + // The constant userland address from which these system calls are made will + // be returned if |nr| is passed as -1. + // On error, this function will return a value between -1 and -4095 which + // should be interpreted as -errno. + static intptr_t Call(int nr, + intptr_t p0, + intptr_t p1, + intptr_t p2, + intptr_t p3, + intptr_t p4, + intptr_t p5, + intptr_t p6, + intptr_t p7); + +#if defined(__mips__) + // This function basically does on MIPS what SandboxSyscall() is doing on + // other architectures. However, because of specificity of MIPS regarding + // handling syscall errors, SandboxSyscall() is made as a wrapper for this + // function in order for SandboxSyscall() to behave more like on other + // architectures on places where return value from SandboxSyscall() is used + // directly (like in most tests). + // The syscall "nr" is called with arguments that are set in an array on which + // pointer "args" points to and an information weather there is an error or no + // is returned to SandboxSyscall() by err_stat. + static intptr_t SandboxSyscallRaw(int nr, + const intptr_t* args, + intptr_t* err_stat); +#endif // defined(__mips__) + + DISALLOW_IMPLICIT_CONSTRUCTORS(Syscall); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall_unittest.cc b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall_unittest.cc new file mode 100644 index 0000000000..2b776d287b --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall_unittest.cc @@ -0,0 +1,249 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/syscall.h" + +#include <asm/unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <vector> + +#include "base/posix/eintr_wrapper.h" +#include "base/process/process_metrics.h" +#include "base/stl_util.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/seccomp-bpf/bpf_tests.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +using sandbox::bpf_dsl::Allow; +using sandbox::bpf_dsl::ResultExpr; +using sandbox::bpf_dsl::Trap; + +namespace sandbox { + +namespace { + +TEST(Syscall, InvalidCallReturnsENOSYS) { + EXPECT_EQ(-ENOSYS, Syscall::InvalidCall()); +} + +TEST(Syscall, WellKnownEntryPoint) { +// Test that Syscall::Call(-1) is handled specially. Don't do this on ARM, +// where syscall(-1) crashes with SIGILL. Not running the test is fine, as we +// are still testing ARM code in the next set of tests. +#if !defined(__arm__) && !defined(__aarch64__) + EXPECT_NE(Syscall::Call(-1), syscall(-1)); +#endif + +// If possible, test that Syscall::Call(-1) returns the address right +// after +// a kernel entry point. +#if defined(__i386__) + EXPECT_EQ(0x80CDu, ((uint16_t*)Syscall::Call(-1))[-1]); // INT 0x80 +#elif defined(__x86_64__) + EXPECT_EQ(0x050Fu, ((uint16_t*)Syscall::Call(-1))[-1]); // SYSCALL +#elif defined(__arm__) +#if defined(__thumb__) + EXPECT_EQ(0xDF00u, ((uint16_t*)Syscall::Call(-1))[-1]); // SWI 0 +#else + EXPECT_EQ(0xEF000000u, ((uint32_t*)Syscall::Call(-1))[-1]); // SVC 0 +#endif +#elif defined(__mips__) + // Opcode for MIPS sycall is in the lower 16-bits + EXPECT_EQ(0x0cu, (((uint32_t*)Syscall::Call(-1))[-1]) & 0x0000FFFF); +#elif defined(__aarch64__) + EXPECT_EQ(0xD4000001u, ((uint32_t*)Syscall::Call(-1))[-1]); // SVC 0 +#else +#warning Incomplete test case; need port for target platform +#endif +} + +TEST(Syscall, TrivialSyscallNoArgs) { + // Test that we can do basic system calls + EXPECT_EQ(Syscall::Call(__NR_getpid), syscall(__NR_getpid)); +} + +TEST(Syscall, TrivialSyscallOneArg) { + int new_fd; + // Duplicate standard error and close it. + ASSERT_GE(new_fd = Syscall::Call(__NR_dup, 2), 0); + int close_return_value = IGNORE_EINTR(Syscall::Call(__NR_close, new_fd)); + ASSERT_EQ(close_return_value, 0); +} + +TEST(Syscall, TrivialFailingSyscall) { + errno = -42; + int ret = Syscall::Call(__NR_dup, -1); + ASSERT_EQ(-EBADF, ret); + // Verify that Syscall::Call does not touch errno. + ASSERT_EQ(-42, errno); +} + +// SIGSYS trap handler that will be called on __NR_uname. +intptr_t CopySyscallArgsToAux(const struct arch_seccomp_data& args, void* aux) { + // |aux| is our BPF_AUX pointer. + std::vector<uint64_t>* const seen_syscall_args = + static_cast<std::vector<uint64_t>*>(aux); + BPF_ASSERT(base::size(args.args) == 6); + seen_syscall_args->assign(args.args, args.args + base::size(args.args)); + return -ENOMEM; +} + +class CopyAllArgsOnUnamePolicy : public bpf_dsl::Policy { + public: + explicit CopyAllArgsOnUnamePolicy(std::vector<uint64_t>* aux) : aux_(aux) {} + ~CopyAllArgsOnUnamePolicy() override {} + + ResultExpr EvaluateSyscall(int sysno) const override { + DCHECK(SandboxBPF::IsValidSyscallNumber(sysno)); + if (sysno == __NR_uname) { + return Trap(CopySyscallArgsToAux, aux_); + } else { + return Allow(); + } + } + + private: + std::vector<uint64_t>* aux_; + + DISALLOW_COPY_AND_ASSIGN(CopyAllArgsOnUnamePolicy); +}; + +// We are testing Syscall::Call() by making use of a BPF filter that +// allows us +// to inspect the system call arguments that the kernel saw. +BPF_TEST(Syscall, + SyntheticSixArgs, + CopyAllArgsOnUnamePolicy, + std::vector<uint64_t> /* (*BPF_AUX) */) { + const int kExpectedValue = 42; + // In this test we only pass integers to the kernel. We might want to make + // additional tests to try other types. What we will see depends on + // implementation details of kernel BPF filters and we will need to document + // the expected behavior very clearly. + int syscall_args[6]; + for (size_t i = 0; i < base::size(syscall_args); ++i) { + syscall_args[i] = kExpectedValue + i; + } + + // We could use pretty much any system call we don't need here. uname() is + // nice because it doesn't have any dangerous side effects. + BPF_ASSERT(Syscall::Call(__NR_uname, + syscall_args[0], + syscall_args[1], + syscall_args[2], + syscall_args[3], + syscall_args[4], + syscall_args[5]) == -ENOMEM); + + // We expect the trap handler to have copied the 6 arguments. + BPF_ASSERT(BPF_AUX->size() == 6); + + // Don't loop here so that we can see which argument does cause the failure + // easily from the failing line. + // uint64_t is the type passed to our SIGSYS handler. + BPF_ASSERT((*BPF_AUX)[0] == static_cast<uint64_t>(syscall_args[0])); + BPF_ASSERT((*BPF_AUX)[1] == static_cast<uint64_t>(syscall_args[1])); + BPF_ASSERT((*BPF_AUX)[2] == static_cast<uint64_t>(syscall_args[2])); + BPF_ASSERT((*BPF_AUX)[3] == static_cast<uint64_t>(syscall_args[3])); + BPF_ASSERT((*BPF_AUX)[4] == static_cast<uint64_t>(syscall_args[4])); + BPF_ASSERT((*BPF_AUX)[5] == static_cast<uint64_t>(syscall_args[5])); +} + +TEST(Syscall, ComplexSyscallSixArgs) { + int fd; + const size_t kPageSize = base::GetPageSize(); + + ASSERT_LE(0, + fd = Syscall::Call(__NR_openat, AT_FDCWD, "/dev/null", O_RDWR, 0L)); + + // Use mmap() to allocate some read-only memory + char* addr0; + ASSERT_NE( + (char*)NULL, + addr0 = reinterpret_cast<char*>(Syscall::Call(kMMapNr, + (void*)NULL, + kPageSize, + PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, + fd, + 0L))); + + // Try to replace the existing mapping with a read-write mapping + char* addr1; + ASSERT_EQ(addr0, + addr1 = reinterpret_cast<char*>( + Syscall::Call(kMMapNr, + addr0, + kPageSize, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + fd, + 0L))); + ++*addr1; // This should not seg fault + + // Clean up + EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr1, kPageSize)); + EXPECT_EQ(0, IGNORE_EINTR(Syscall::Call(__NR_close, fd))); + + // Check that the offset argument (i.e. the sixth argument) is processed + // correctly. + ASSERT_GE( + fd = Syscall::Call(__NR_openat, AT_FDCWD, "/proc/self/exe", O_RDONLY, 0L), + 0); + char* addr2, *addr3; + ASSERT_NE((char*)NULL, + addr2 = reinterpret_cast<char*>(Syscall::Call(kMMapNr, + (void*)NULL, + 2 * kPageSize, + PROT_READ, + MAP_PRIVATE, + fd, + 0L + ))); + ASSERT_NE((char*)NULL, + addr3 = reinterpret_cast<char*>(Syscall::Call(kMMapNr, + (void*)NULL, + kPageSize, + PROT_READ, + MAP_PRIVATE, + fd, +#if defined(__NR_mmap2) + 1L +#else + kPageSize +#endif + ))); + EXPECT_EQ(0, memcmp(addr2 + kPageSize, addr3, kPageSize)); + + // Just to be absolutely on the safe side, also verify that the file + // contents matches what we are getting from a read() operation. + char buf[2 * kPageSize]; + EXPECT_EQ(2 * kPageSize, static_cast<size_t>(Syscall::Call(__NR_read, + fd, + buf, + 2 * kPageSize + ))); + EXPECT_EQ(0, memcmp(addr2, buf, 2 * kPageSize)); + + // Clean up + EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr2, 2 * kPageSize)); + EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr3, kPageSize)); + EXPECT_EQ(0, IGNORE_EINTR(Syscall::Call(__NR_close, fd))); +} + +} // namespace + +} // namespace sandbox diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/trap.cc b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/trap.cc new file mode 100644 index 0000000000..9884be8bb2 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/trap.cc @@ -0,0 +1,394 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/trap.h" + +#include <errno.h> +#include <signal.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <sys/syscall.h> + +#include <algorithm> +#include <limits> +#include <tuple> + +#include "base/compiler_specific.h" +#include "base/logging.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" +#include "sandbox/linux/seccomp-bpf/die.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_seccomp.h" +#include "sandbox/linux/system_headers/linux_signal.h" + +namespace { + +struct arch_sigsys { + void* ip; + int nr; + unsigned int arch; +}; + +const int kCapacityIncrement = 20; + +// Unsafe traps can only be turned on, if the user explicitly allowed them +// by setting the CHROME_SANDBOX_DEBUGGING environment variable. +const char kSandboxDebuggingEnv[] = "CHROME_SANDBOX_DEBUGGING"; + +// We need to tell whether we are performing a "normal" callback, or +// whether we were called recursively from within a UnsafeTrap() callback. +// This is a little tricky to do, because we need to somehow get access to +// per-thread data from within a signal context. Normal TLS storage is not +// safely accessible at this time. We could roll our own, but that involves +// a lot of complexity. Instead, we co-opt one bit in the signal mask. +// If BUS is blocked, we assume that we have been called recursively. +// There is a possibility for collision with other code that needs to do +// this, but in practice the risks are low. +// If SIGBUS turns out to be a problem, we could instead co-opt one of the +// realtime signals. There are plenty of them. Unfortunately, there is no +// way to mark a signal as allocated. So, the potential for collision is +// possibly even worse. +bool GetIsInSigHandler(const ucontext_t* ctx) { + // Note: on Android, sigismember does not take a pointer to const. + return sigismember(const_cast<sigset_t*>(&ctx->uc_sigmask), LINUX_SIGBUS); +} + +void SetIsInSigHandler() { + sigset_t mask; + if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGBUS) || + sandbox::sys_sigprocmask(LINUX_SIG_BLOCK, &mask, NULL)) { + SANDBOX_DIE("Failed to block SIGBUS"); + } +} + +bool IsDefaultSignalAction(const struct sigaction& sa) { + if (sa.sa_flags & SA_SIGINFO || sa.sa_handler != SIG_DFL) { + return false; + } + return true; +} + +} // namespace + +namespace sandbox { + +Trap::Trap() + : trap_array_(NULL), + trap_array_size_(0), + trap_array_capacity_(0), + has_unsafe_traps_(false) { + // Set new SIGSYS handler + struct sigaction sa = {}; + // In some toolchain, sa_sigaction is not declared in struct sigaction. + // So, here cast the pointer to the sa_handler's type. This works because + // |sa_handler| and |sa_sigaction| shares the same memory. + sa.sa_handler = reinterpret_cast<void (*)(int)>(SigSysAction); + sa.sa_flags = LINUX_SA_SIGINFO | LINUX_SA_NODEFER; + struct sigaction old_sa = {}; + if (sys_sigaction(LINUX_SIGSYS, &sa, &old_sa) < 0) { + SANDBOX_DIE("Failed to configure SIGSYS handler"); + } + + if (!IsDefaultSignalAction(old_sa)) { + static const char kExistingSIGSYSMsg[] = + "Existing signal handler when trying to install SIGSYS. SIGSYS needs " + "to be reserved for seccomp-bpf."; + DLOG(FATAL) << kExistingSIGSYSMsg; + LOG(ERROR) << kExistingSIGSYSMsg; + } + + // Unmask SIGSYS + sigset_t mask; + if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGSYS) || + sys_sigprocmask(LINUX_SIG_UNBLOCK, &mask, NULL)) { + SANDBOX_DIE("Failed to configure SIGSYS handler"); + } +} + +bpf_dsl::TrapRegistry* Trap::Registry() { + // Note: This class is not thread safe. It is the caller's responsibility + // to avoid race conditions. Normally, this is a non-issue as the sandbox + // can only be initialized if there are no other threads present. + // Also, this is not a normal singleton. Once created, the global trap + // object must never be destroyed again. + if (!global_trap_) { + global_trap_ = new Trap(); + if (!global_trap_) { + SANDBOX_DIE("Failed to allocate global trap handler"); + } + } + return global_trap_; +} + +void Trap::SigSysAction(int nr, LinuxSigInfo* info, void* void_context) { + if (info) { + MSAN_UNPOISON(info, sizeof(*info)); + } + + // Obtain the signal context. This, most notably, gives us access to + // all CPU registers at the time of the signal. + ucontext_t* ctx = reinterpret_cast<ucontext_t*>(void_context); + if (ctx) { + MSAN_UNPOISON(ctx, sizeof(*ctx)); + } + + if (!global_trap_) { + RAW_SANDBOX_DIE( + "This can't happen. Found no global singleton instance " + "for Trap() handling."); + } + global_trap_->SigSys(nr, info, ctx); +} + +void Trap::SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) { + // Signal handlers should always preserve "errno". Otherwise, we could + // trigger really subtle bugs. + const int old_errno = errno; + + // Various sanity checks to make sure we actually received a signal + // triggered by a BPF filter. If something else triggered SIGSYS + // (e.g. kill()), there is really nothing we can do with this signal. + if (nr != LINUX_SIGSYS || info->si_code != SYS_SECCOMP || !ctx || + info->si_errno <= 0 || + static_cast<size_t>(info->si_errno) > trap_array_size_) { + // ATI drivers seem to send SIGSYS, so this cannot be FATAL. + // See crbug.com/178166. + // TODO(jln): add a DCHECK or move back to FATAL. + RAW_LOG(ERROR, "Unexpected SIGSYS received."); + errno = old_errno; + return; + } + + + // Obtain the siginfo information that is specific to SIGSYS. + struct arch_sigsys sigsys; +#if defined(si_call_addr) && !defined(__native_client_nonsfi__) + sigsys.ip = info->si_call_addr; + sigsys.nr = info->si_syscall; + sigsys.arch = info->si_arch; +#else + // If the version of glibc doesn't include this information in + // siginfo_t (older than 2.17), we need to explicitly copy it + // into an arch_sigsys structure. + memcpy(&sigsys, &info->_sifields, sizeof(sigsys)); +#endif + +#if defined(__mips__) + // When indirect syscall (syscall(__NR_foo, ...)) is made on Mips, the + // number in register SECCOMP_SYSCALL(ctx) is always __NR_syscall and the + // real number of a syscall (__NR_foo) is in SECCOMP_PARM1(ctx) + bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) && + sigsys.nr != static_cast<int>(SECCOMP_PARM1(ctx)); +#else + bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)); +#endif + + // Some more sanity checks. + if (sigsys.ip != reinterpret_cast<void*>(SECCOMP_IP(ctx)) || + sigsys_nr_is_bad || sigsys.arch != SECCOMP_ARCH) { + // TODO(markus): + // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal + // safe and can lead to bugs. We should eventually implement a different + // logging and reporting mechanism that is safe to be called from + // the sigSys() handler. + RAW_SANDBOX_DIE("Sanity checks are failing after receiving SIGSYS."); + } + + intptr_t rc; + if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { + errno = old_errno; + if (sigsys.nr == __NR_clone) { + RAW_SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler."); + } +#if defined(__mips__) + // Mips supports up to eight arguments for syscall. + // However, seccomp bpf can filter only up to six arguments, so using eight + // arguments has sense only when using UnsafeTrap() handler. + rc = Syscall::Call(SECCOMP_SYSCALL(ctx), + SECCOMP_PARM1(ctx), + SECCOMP_PARM2(ctx), + SECCOMP_PARM3(ctx), + SECCOMP_PARM4(ctx), + SECCOMP_PARM5(ctx), + SECCOMP_PARM6(ctx), + SECCOMP_PARM7(ctx), + SECCOMP_PARM8(ctx)); +#else + rc = Syscall::Call(SECCOMP_SYSCALL(ctx), + SECCOMP_PARM1(ctx), + SECCOMP_PARM2(ctx), + SECCOMP_PARM3(ctx), + SECCOMP_PARM4(ctx), + SECCOMP_PARM5(ctx), + SECCOMP_PARM6(ctx)); +#endif // defined(__mips__) + } else { + const TrapKey& trap = trap_array_[info->si_errno - 1]; + if (!trap.safe) { + SetIsInSigHandler(); + } + + // Copy the seccomp-specific data into a arch_seccomp_data structure. This + // is what we are showing to TrapFnc callbacks that the system call + // evaluator registered with the sandbox. + struct arch_seccomp_data data = { + static_cast<int>(SECCOMP_SYSCALL(ctx)), + SECCOMP_ARCH, + reinterpret_cast<uint64_t>(sigsys.ip), + {static_cast<uint64_t>(SECCOMP_PARM1(ctx)), + static_cast<uint64_t>(SECCOMP_PARM2(ctx)), + static_cast<uint64_t>(SECCOMP_PARM3(ctx)), + static_cast<uint64_t>(SECCOMP_PARM4(ctx)), + static_cast<uint64_t>(SECCOMP_PARM5(ctx)), + static_cast<uint64_t>(SECCOMP_PARM6(ctx))}}; + + // Now call the TrapFnc callback associated with this particular instance + // of SECCOMP_RET_TRAP. + rc = trap.fnc(data, const_cast<void*>(trap.aux)); + } + + // Update the CPU register that stores the return code of the system call + // that we just handled, and restore "errno" to the value that it had + // before entering the signal handler. + Syscall::PutValueInUcontext(rc, ctx); + errno = old_errno; + + return; +} + +bool Trap::TrapKey::operator<(const TrapKey& o) const { + return std::tie(fnc, aux, safe) < std::tie(o.fnc, o.aux, o.safe); +} + +uint16_t Trap::Add(TrapFnc fnc, const void* aux, bool safe) { + if (!safe && !SandboxDebuggingAllowedByUser()) { + // Unless the user set the CHROME_SANDBOX_DEBUGGING environment variable, + // we never return an ErrorCode that is marked as "unsafe". This also + // means, the BPF compiler will never emit code that allow unsafe system + // calls to by-pass the filter (because they use the magic return address + // from Syscall::Call(-1)). + + // This SANDBOX_DIE() can optionally be removed. It won't break security, + // but it might make error messages from the BPF compiler a little harder + // to understand. Removing the SANDBOX_DIE() allows callers to easily check + // whether unsafe traps are supported (by checking whether the returned + // ErrorCode is ET_INVALID). + SANDBOX_DIE( + "Cannot use unsafe traps unless CHROME_SANDBOX_DEBUGGING " + "is enabled"); + + return 0; + } + + // Each unique pair of TrapFnc and auxiliary data make up a distinct instance + // of a SECCOMP_RET_TRAP. + TrapKey key(fnc, aux, safe); + + // We return unique identifiers together with SECCOMP_RET_TRAP. This allows + // us to associate trap with the appropriate handler. The kernel allows us + // identifiers in the range from 0 to SECCOMP_RET_DATA (0xFFFF). We want to + // avoid 0, as it could be confused for a trap without any specific id. + // The nice thing about sequentially numbered identifiers is that we can also + // trivially look them up from our signal handler without making any system + // calls that might be async-signal-unsafe. + // In order to do so, we store all of our traps in a C-style trap_array_. + + TrapIds::const_iterator iter = trap_ids_.find(key); + if (iter != trap_ids_.end()) { + // We have seen this pair before. Return the same id that we assigned + // earlier. + return iter->second; + } + + // This is a new pair. Remember it and assign a new id. + if (trap_array_size_ >= SECCOMP_RET_DATA /* 0xFFFF */ || + trap_array_size_ >= std::numeric_limits<uint16_t>::max()) { + // In practice, this is pretty much impossible to trigger, as there + // are other kernel limitations that restrict overall BPF program sizes. + SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); + } + + // Our callers ensure that there are no other threads accessing trap_array_ + // concurrently (typically this is done by ensuring that we are single- + // threaded while the sandbox is being set up). But we nonetheless are + // modifying a live data structure that could be accessed any time a + // system call is made; as system calls could be triggering SIGSYS. + // So, we have to be extra careful that we update trap_array_ atomically. + // In particular, this means we shouldn't be using realloc() to resize it. + // Instead, we allocate a new array, copy the values, and then switch the + // pointer. We only really care about the pointer being updated atomically + // and the data that is pointed to being valid, as these are the only + // values accessed from the signal handler. It is OK if trap_array_size_ + // is inconsistent with the pointer, as it is monotonously increasing. + // Also, we only care about compiler barriers, as the signal handler is + // triggered synchronously from a system call. We don't have to protect + // against issues with the memory model or with completely asynchronous + // events. + if (trap_array_size_ >= trap_array_capacity_) { + trap_array_capacity_ += kCapacityIncrement; + TrapKey* old_trap_array = trap_array_; + TrapKey* new_trap_array = new TrapKey[trap_array_capacity_]; + std::copy_n(old_trap_array, trap_array_size_, new_trap_array); + + // Language specs are unclear on whether the compiler is allowed to move + // the "delete[]" above our preceding assignments and/or memory moves, + // iff the compiler believes that "delete[]" doesn't have any other + // global side-effects. + // We insert optimization barriers to prevent this from happening. + // The first barrier is probably not needed, but better be explicit in + // what we want to tell the compiler. + // The clang developer mailing list couldn't answer whether this is a + // legitimate worry; but they at least thought that the barrier is + // sufficient to prevent the (so far hypothetical) problem of re-ordering + // of instructions by the compiler. + // + // TODO(mdempsky): Try to clean this up using base/atomicops or C++11 + // atomics; see crbug.com/414363. + asm volatile("" : "=r"(new_trap_array) : "0"(new_trap_array) : "memory"); + trap_array_ = new_trap_array; + asm volatile("" : "=r"(trap_array_) : "0"(trap_array_) : "memory"); + + delete[] old_trap_array; + } + + uint16_t id = trap_array_size_ + 1; + trap_ids_[key] = id; + trap_array_[trap_array_size_] = key; + trap_array_size_++; + return id; +} + +bool Trap::SandboxDebuggingAllowedByUser() { + const char* debug_flag = getenv(kSandboxDebuggingEnv); + return debug_flag && *debug_flag; +} + +bool Trap::EnableUnsafeTraps() { + if (!has_unsafe_traps_) { + // Unsafe traps are a one-way fuse. Once enabled, they can never be turned + // off again. + // We only allow enabling unsafe traps, if the user explicitly set an + // appropriate environment variable. This prevents bugs that accidentally + // disable all sandboxing for all users. + if (SandboxDebuggingAllowedByUser()) { + // We only ever print this message once, when we enable unsafe traps the + // first time. + SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes"); + has_unsafe_traps_ = true; + } else { + SANDBOX_INFO( + "Cannot disable sandbox and use unsafe traps unless " + "CHROME_SANDBOX_DEBUGGING is turned on first"); + } + } + // Returns the, possibly updated, value of has_unsafe_traps_. + return has_unsafe_traps_; +} + +Trap* Trap::global_trap_; + +} // namespace sandbox diff --git a/security/sandbox/chromium/sandbox/linux/seccomp-bpf/trap.h b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/trap.h new file mode 100644 index 0000000000..a73d2064b4 --- /dev/null +++ b/security/sandbox/chromium/sandbox/linux/seccomp-bpf/trap.h @@ -0,0 +1,86 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__ +#define SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__ + +#include <stddef.h> +#include <stdint.h> + +#include <map> + +#include "base/macros.h" +#include "sandbox/linux/bpf_dsl/trap_registry.h" +#include "sandbox/linux/system_headers/linux_signal.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// The Trap class allows a BPF filter program to branch out to user space by +// raising a SIGSYS signal. +// N.B.: This class does not perform any synchronization operations. If +// modifications are made to any of the traps, it is the caller's +// responsibility to ensure that this happens in a thread-safe fashion. +// Preferably, that means that no other threads should be running at that +// time. For the purposes of our sandbox, this assertion should always be +// true. Threads are incompatible with the seccomp sandbox anyway. +class SANDBOX_EXPORT Trap : public bpf_dsl::TrapRegistry { + public: + uint16_t Add(TrapFnc fnc, const void* aux, bool safe) override; + + bool EnableUnsafeTraps() override; + + // Registry returns the trap registry used by Trap's SIGSYS handler, + // creating it if necessary. + static bpf_dsl::TrapRegistry* Registry(); + + // SandboxDebuggingAllowedByUser returns whether the + // "CHROME_SANDBOX_DEBUGGING" environment variable is set. + static bool SandboxDebuggingAllowedByUser(); + + private: + struct TrapKey { + TrapKey() : fnc(NULL), aux(NULL), safe(false) {} + TrapKey(TrapFnc f, const void* a, bool s) : fnc(f), aux(a), safe(s) {} + TrapFnc fnc; + const void* aux; + bool safe; + bool operator<(const TrapKey&) const; + }; + typedef std::map<TrapKey, uint16_t> TrapIds; + + // Our constructor is private. A shared global instance is created + // automatically as needed. + Trap(); + + // The destructor is unimplemented as destroying this object would + // break subsequent system calls that trigger a SIGSYS. + ~Trap() = delete; + + static void SigSysAction(int nr, LinuxSigInfo* info, void* void_context); + + // Make sure that SigSys is not inlined in order to get slightly better crash + // dumps. + void SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) + __attribute__((noinline)); + // We have a global singleton that handles all of our SIGSYS traps. This + // variable must never be deallocated after it has been set up initially, as + // there is no way to reset in-kernel BPF filters that generate SIGSYS + // events. + static Trap* global_trap_; + + TrapIds trap_ids_; // Maps from TrapKeys to numeric ids + TrapKey* trap_array_; // Array of TrapKeys indexed by ids + size_t trap_array_size_; // Currently used size of array + size_t trap_array_capacity_; // Currently allocated capacity of array + bool has_unsafe_traps_; // Whether unsafe traps have been enabled + + // Copying and assigning is unimplemented. It doesn't make sense for a + // singleton. + DISALLOW_COPY_AND_ASSIGN(Trap); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__ |