diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /security/sandbox/linux/launch/SandboxLaunch.cpp | |
parent | Initial commit. (diff) | |
download | firefox-esr-upstream.tar.xz firefox-esr-upstream.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'security/sandbox/linux/launch/SandboxLaunch.cpp')
-rw-r--r-- | security/sandbox/linux/launch/SandboxLaunch.cpp | 715 |
1 files changed, 715 insertions, 0 deletions
diff --git a/security/sandbox/linux/launch/SandboxLaunch.cpp b/security/sandbox/linux/launch/SandboxLaunch.cpp new file mode 100644 index 0000000000..d6f3ec9c77 --- /dev/null +++ b/security/sandbox/linux/launch/SandboxLaunch.cpp @@ -0,0 +1,715 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SandboxLaunch.h" + +#include <fcntl.h> +#include <sched.h> +#include <setjmp.h> +#include <signal.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include <utility> + +#include "LinuxCapabilities.h" +#include "LinuxSched.h" +#include "SandboxChrootProto.h" +#include "SandboxInfo.h" +#include "SandboxLogging.h" +#include "base/eintr_wrapper.h" +#include "base/strings/safe_sprintf.h" +#include "mozilla/Array.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/Preferences.h" +#include "mozilla/SandboxReporter.h" +#include "mozilla/SandboxSettings.h" +#include "mozilla/Components.h" +#include "mozilla/StaticPrefs_media.h" +#include "mozilla/StaticPrefs_security.h" +#include "mozilla/Unused.h" +#include "nsCOMPtr.h" +#include "nsDebug.h" +#include "nsIGfxInfo.h" +#include "nsString.h" +#include "nsThreadUtils.h" +#include "prenv.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" + +#ifdef MOZ_X11 +# ifndef MOZ_WIDGET_GTK +# error "Unknown toolkit" +# endif +# include "mozilla/WidgetUtilsGtk.h" +# include <gdk/gdk.h> +# include <gdk/gdkx.h> +# include "X11UndefineNone.h" +# include "gfxPlatform.h" +#endif + +namespace mozilla { + +// Returns true if graphics will work from a content process +// started in a new network namespace. Specifically, named +// Unix-domain sockets will work, but TCP/IP will not, even if it's a +// connection to localhost: the child process has its own private +// loopback interface. +// +// (Longer-term we intend to either proxy or remove X11 access from +// content processes, at which point this will stop being an issue.) +static bool IsGraphicsOkWithoutNetwork() { + // For X11, check whether the parent's connection is a Unix-domain + // socket. This is done instead of trying to parse the display name + // because an empty hostname (e.g., ":0") will fall back to TCP in + // case of failure to connect using Unix-domain sockets. +#ifdef MOZ_X11 + // First, ensure that the parent process's graphics are initialized. + DebugOnly<gfxPlatform*> gfxPlatform = gfxPlatform::GetPlatform(); + + const auto display = gdk_display_get_default(); + if (!display) { + // In this case, the browser is headless, but WebGL could still + // try to use X11. However, WebGL isn't supported with remote + // X11, and in any case these connections are made after sandbox + // startup (lazily when WebGL is used), so they aren't being done + // directly by the process anyway. (For local X11, they're + // brokered.) + MOZ_ASSERT(gfxPlatform->IsHeadless()); + return true; + } + if (mozilla::widget::GdkIsX11Display(display)) { + const int xSocketFd = ConnectionNumber(GDK_DISPLAY_XDISPLAY(display)); + if (NS_WARN_IF(xSocketFd < 0)) { + return false; + } + + int domain; + socklen_t optlen = static_cast<socklen_t>(sizeof(domain)); + int rv = getsockopt(xSocketFd, SOL_SOCKET, SO_DOMAIN, &domain, &optlen); + if (NS_WARN_IF(rv != 0)) { + return false; + } + MOZ_RELEASE_ASSERT(static_cast<size_t>(optlen) == sizeof(domain)); + if (domain != AF_LOCAL) { + return false; + } + // There's one more complication: Xorg listens on named sockets + // (actual filesystem nodes) as well as abstract addresses (opaque + // octet strings scoped to the network namespace; this is a Linux + // extension). + // + // Inside a container environment (e.g., when running as a Snap + // package), it's possible that only the abstract addresses are + // accessible. In that case, the display must be considered + // remote. See also bug 1450740. + // + // Unfortunately, the Xorg client libraries prefer the abstract + // addresses, so this isn't directly detectable by inspecting the + // parent process's socket. Instead, parse the DISPLAY env var + // (which was updated if necessary in nsAppRunner.cpp) to get the + // display number and construct the socket path, falling back to + // testing the directory in case that doesn't work. (See bug + // 1565972 and bug 1559368 for cases where we need to test the + // specific socket.) + const char* const displayStr = PR_GetEnv("DISPLAY"); + nsAutoCString socketPath("/tmp/.X11-unix"); + int accessFlags = X_OK; + int displayNum; + // sscanf ignores trailing text, so display names with a screen + // number (e.g., ":0.2") will parse correctly. + if (displayStr && (sscanf(displayStr, ":%d", &displayNum) == 1 || + sscanf(displayStr, "unix:%d", &displayNum) == 1)) { + socketPath.AppendPrintf("/X%d", displayNum); + accessFlags = R_OK | W_OK; + } + if (access(socketPath.get(), accessFlags) != 0) { + SANDBOX_LOG_ERRNO( + "%s is inaccessible; can't isolate network namespace in" + " content processes", + socketPath.get()); + return false; + } + } +#endif + + // Assume that other backends (e.g., Wayland) will not use the + // network namespace. + return true; +} + +bool HasAtiDrivers() { + nsCOMPtr<nsIGfxInfo> gfxInfo = components::GfxInfo::Service(); + nsAutoString vendorID; + static const Array<nsresult (nsIGfxInfo::*)(nsAString&), 2> kMethods = { + &nsIGfxInfo::GetAdapterVendorID, + &nsIGfxInfo::GetAdapterVendorID2, + }; + for (const auto method : kMethods) { + if (NS_SUCCEEDED((gfxInfo->*method)(vendorID))) { + // This test is based on telemetry data. The proprietary ATI + // drivers seem to use this vendor string, including for some + // newer devices that have AMD branding in the device name, such + // as those using AMDGPU-PRO drivers. + // The open-source drivers integrated into Mesa appear to use + // the vendor ID "X.Org" instead. + if (vendorID.EqualsLiteral("ATI Technologies Inc.")) { + return true; + } + } + } + + return false; +} + +// Content processes may need direct access to SysV IPC in certain +// uncommon use cases. +static bool ContentNeedsSysVIPC() { + // The ALSA dmix plugin uses SysV semaphores and shared memory to + // coordinate software mixing. +#ifdef MOZ_ALSA + if (!StaticPrefs::media_cubeb_sandbox()) { + return true; + } +#endif + + if (!StaticPrefs::security_sandbox_content_headless_AtStartup()) { + // Bug 1438391: VirtualGL uses SysV shm for images and configuration. + if (PR_GetEnv("VGL_ISACTIVE") != nullptr) { + return true; + } + + // The fglrx (ATI Catalyst) GPU drivers use SysV IPC. + if (HasAtiDrivers()) { + return true; + } + } + + return false; +} + +static void PreloadSandboxLib(base::environment_map* aEnv) { + // Preload libmozsandbox.so so that sandbox-related interpositions + // can be defined there instead of in the executable. + // (This could be made conditional on intent to use sandboxing, but + // it's harmless for non-sandboxed processes.) + nsAutoCString preload; + // Prepend this, because people can and do preload libpthread. + // (See bug 1222500.) + preload.AssignLiteral("libmozsandbox.so"); + if (const char* oldPreload = PR_GetEnv("LD_PRELOAD")) { + // Doesn't matter if oldPreload is ""; extra separators are ignored. + preload.Append(' '); + preload.Append(oldPreload); + (*aEnv)["MOZ_ORIG_LD_PRELOAD"] = oldPreload; + } + MOZ_ASSERT(aEnv->count("LD_PRELOAD") == 0); + (*aEnv)["LD_PRELOAD"] = preload.get(); +} + +static void AttachSandboxReporter(base::file_handle_mapping_vector* aFdMap) { + int srcFd, dstFd; + SandboxReporter::Singleton()->GetClientFileDescriptorMapping(&srcFd, &dstFd); + aFdMap->push_back({srcFd, dstFd}); +} + +class SandboxFork : public base::LaunchOptions::ForkDelegate { + public: + explicit SandboxFork(int aFlags, bool aChroot, int aServerFd = -1, + int aClientFd = -1); + virtual ~SandboxFork(); + + void PrepareMapping(base::file_handle_mapping_vector* aMap); + pid_t Fork() override; + + private: + int mFlags; + int mChrootServer; + int mChrootClient; + + void StartChrootServer(); + SandboxFork(const SandboxFork&) = delete; + SandboxFork& operator=(const SandboxFork&) = delete; +}; + +static int GetEffectiveSandboxLevel(GeckoProcessType aType) { + auto info = SandboxInfo::Get(); + switch (aType) { + case GeckoProcessType_GMPlugin: + if (info.Test(SandboxInfo::kEnabledForMedia)) { + return 1; + } + return 0; + case GeckoProcessType_Content: +#ifdef MOZ_ENABLE_FORKSERVER + // With this env MOZ_SANDBOXED will be set, and mozsandbox will + // be preloaded for the fork server. The content processes rely + // on wrappers defined by mozsandbox to work properly. + case GeckoProcessType_ForkServer: +#endif + // GetEffectiveContentSandboxLevel is main-thread-only due to prefs. + MOZ_ASSERT(NS_IsMainThread()); + if (info.Test(SandboxInfo::kEnabledForContent)) { + return GetEffectiveContentSandboxLevel(); + } + return 0; + case GeckoProcessType_RDD: + return PR_GetEnv("MOZ_DISABLE_RDD_SANDBOX") == nullptr ? 1 : 0; + case GeckoProcessType_Socket: + // GetEffectiveSocketProcessSandboxLevel is main-thread-only due to prefs. + MOZ_ASSERT(NS_IsMainThread()); + return GetEffectiveSocketProcessSandboxLevel(); + case GeckoProcessType_Utility: + return PR_GetEnv("MOZ_DISABLE_UTILITY_SANDBOX") == nullptr ? 1 : 0; + default: + return 0; + } +} + +void SandboxLaunchPrepare(GeckoProcessType aType, + base::LaunchOptions* aOptions) { + auto info = SandboxInfo::Get(); + + // We won't try any kind of sandboxing without seccomp-bpf. + if (!info.Test(SandboxInfo::kHasSeccompBPF)) { + return; + } + + // Check prefs (and env vars) controlling sandbox use. + int level = GetEffectiveSandboxLevel(aType); + if (level == 0) { + return; + } + + // At this point, we know we'll be using sandboxing; generic + // sandboxing support goes here. The MOZ_SANDBOXED env var tells + // the child process whether this is the case. + aOptions->env_map["MOZ_SANDBOXED"] = "1"; + PreloadSandboxLib(&aOptions->env_map); + AttachSandboxReporter(&aOptions->fds_to_remap); + + bool canChroot = false; + int flags = 0; + + if (aType == GeckoProcessType_Content && level >= 1) { + static const bool needSysV = ContentNeedsSysVIPC(); + if (needSysV) { + // Tell the child process so it can adjust its seccomp-bpf + // policy. + aOptions->env_map["MOZ_SANDBOX_ALLOW_SYSV"] = "1"; + } else { + flags |= CLONE_NEWIPC; + } + + if (StaticPrefs::security_sandbox_content_headless_AtStartup()) { + aOptions->env_map["MOZ_HEADLESS"] = "1"; + } + } + + // Anything below this requires unprivileged user namespaces. + if (!info.Test(SandboxInfo::kHasUserNamespaces)) { + return; + } + + switch (aType) { + case GeckoProcessType_Socket: + if (level >= 1) { + canChroot = true; + flags |= CLONE_NEWIPC; + } + break; + case GeckoProcessType_GMPlugin: + case GeckoProcessType_RDD: + if (level >= 1) { + canChroot = true; + // Can't use CLONE_NEWIPC because of intel-media-driver. + flags |= CLONE_NEWNET; + } + break; + case GeckoProcessType_Content: + if (level >= 4) { + canChroot = true; + + // Unshare network namespace if allowed by graphics; see + // function definition above for details. (The display + // local-ness is cached because it won't change.) + static const bool canCloneNet = + StaticPrefs::security_sandbox_content_headless_AtStartup() || + (IsGraphicsOkWithoutNetwork() && + !PR_GetEnv("RENDERDOC_CAPTUREOPTS")); + + if (canCloneNet) { + flags |= CLONE_NEWNET; + } + } + // Hidden pref to allow testing user namespaces separately, even + // if there's nothing that would require them. + if (Preferences::GetBool("security.sandbox.content.force-namespace", + false)) { + flags |= CLONE_NEWUSER; + } + break; + default: + // Nothing yet. + break; + } + + if (canChroot || flags != 0) { + flags |= CLONE_NEWUSER; + auto forker = MakeUnique<SandboxFork>(flags, canChroot); + forker->PrepareMapping(&aOptions->fds_to_remap); + aOptions->fork_delegate = std::move(forker); + // Pass to |SandboxLaunchForkServerPrepare()| in the fork server. + aOptions->env_map[kSandboxChrootEnvFlag] = + std::to_string(canChroot ? 1 : 0) + std::to_string(flags); + } +} + +#if defined(MOZ_ENABLE_FORKSERVER) +/** + * Called by the fork server to install a fork delegator. + * + * In the case of fork server, the value of the flags of |SandboxFork| + * are passed as an env variable to the fork server so that we can + * recreate a |SandboxFork| as a fork delegator at the fork server. + */ +void SandboxLaunchForkServerPrepare(const std::vector<std::string>& aArgv, + base::LaunchOptions& aOptions) { + auto chroot = std::find_if( + aOptions.env_map.begin(), aOptions.env_map.end(), + [](auto& elt) { return elt.first == kSandboxChrootEnvFlag; }); + if (chroot == aOptions.env_map.end()) { + return; + } + bool canChroot = chroot->second.c_str()[0] == '1'; + int flags = atoi(chroot->second.c_str() + 1); + MOZ_ASSERT(flags || canChroot); + + // Find chroot server fd. It is supposed to be map to + // kSandboxChrootServerFd so that we find it out from the mapping. + auto fdmap = std::find_if( + aOptions.fds_to_remap.begin(), aOptions.fds_to_remap.end(), + [](auto& elt) { return elt.second == kSandboxChrootServerFd; }); + MOZ_ASSERT(fdmap != aOptions.fds_to_remap.end(), + "ChrootServerFd is not found with sandbox chroot"); + int chrootserverfd = fdmap->first; + aOptions.fds_to_remap.erase(fdmap); + + // Set only the chroot server fd, not the client fd. Because, the + // client fd is already in |fds_to_remap|, we don't need the forker + // to do it again. And, the forker need only the server fd, that + // chroot server uses it to sync with the client (content). See + // |SandboxFox::StartChrootServer()|. + auto forker = MakeUnique<SandboxFork>(flags, canChroot, chrootserverfd); + aOptions.fork_delegate = std::move(forker); +} +#endif + +SandboxFork::SandboxFork(int aFlags, bool aChroot, int aServerFd, int aClientFd) + : mFlags(aFlags), mChrootServer(aServerFd), mChrootClient(aClientFd) { + if (aChroot && mChrootServer < 0) { + int fds[2]; + int rv = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, fds); + if (rv != 0) { + SANDBOX_LOG_ERRNO("socketpair"); + MOZ_CRASH("socketpair failed"); + } + mChrootClient = fds[0]; + mChrootServer = fds[1]; + } +} + +void SandboxFork::PrepareMapping(base::file_handle_mapping_vector* aMap) { + MOZ_ASSERT(XRE_GetProcessType() != GeckoProcessType_ForkServer); + if (mChrootClient >= 0) { + aMap->push_back({mChrootClient, kSandboxChrootClientFd}); + } +#if defined(MOZ_ENABLE_FORKSERVER) + if (mChrootServer >= 0) { + aMap->push_back({mChrootServer, kSandboxChrootServerFd}); + } +#endif +} + +SandboxFork::~SandboxFork() { + if (mChrootClient >= 0) { + close(mChrootClient); + } + if (mChrootServer >= 0) { + close(mChrootServer); + } +} + +static void BlockAllSignals(sigset_t* aOldSigs) { + sigset_t allSigs; + int rv = sigfillset(&allSigs); + MOZ_RELEASE_ASSERT(rv == 0); + rv = pthread_sigmask(SIG_BLOCK, &allSigs, aOldSigs); + if (rv != 0) { + SANDBOX_LOG_WITH_ERROR(rv, "pthread_sigmask (block all)"); + MOZ_CRASH("pthread_sigmask"); + } +} + +static void RestoreSignals(const sigset_t* aOldSigs) { + // Assuming that pthread_sigmask is a thin layer over rt_sigprocmask + // and doesn't try to touch TLS, which may be in an "interesting" + // state right now: + int rv = pthread_sigmask(SIG_SETMASK, aOldSigs, nullptr); + if (rv != 0) { + SANDBOX_LOG_WITH_ERROR(rv, "pthread_sigmask (restore)"); + MOZ_CRASH("pthread_sigmask"); + } +} + +static bool IsSignalIgnored(int aSig) { + struct sigaction sa {}; + + if (sigaction(aSig, nullptr, &sa) != 0) { + if (errno != EINVAL) { + SANDBOX_LOG_ERRNO("sigaction(%d)", aSig); + } + return false; + } + return sa.sa_handler == SIG_IGN; +} + +static void ResetSignalHandlers() { + for (int signum = 1; signum <= SIGRTMAX; ++signum) { + if (IsSignalIgnored(signum)) { + continue; + } + if (signal(signum, SIG_DFL) == SIG_ERR) { + MOZ_DIAGNOSTIC_ASSERT(errno == EINVAL); + } + } +} + +namespace { + +// The libc clone() routine insists on calling a provided function on +// a new stack, even if the address space isn't shared and it would be +// safe to expose the underlying system call's fork()-like behavior. +// So, we work around this by longjmp()ing back onto the original stack; +// this technique is also used by Chromium. +// +// In theory, the clone syscall could be used directly if we ensure +// that functions like raise() are never used in the child, including +// by inherited signal handlers, but the longjmp approach isn't much +// extra code and avoids a class of potential bugs. +static int CloneCallee(void* aPtr) { + auto ctxPtr = reinterpret_cast<jmp_buf*>(aPtr); + longjmp(*ctxPtr, 1); + MOZ_CRASH("unreachable"); + return 1; +} + +// According to the Chromium developers, builds with FORTIFY_SOURCE +// require that longjump move the stack pointer towards the root +// function of the call stack. Therefore, we must ensure that the +// clone callee stack is leafward of the stack pointer captured in +// setjmp() below by using this no-inline helper function. +// +// ASan apparently also causes problems, by the combination of +// allocating the large stack-allocated buffer outside of the actual +// stack and then assuming that longjmp is used only to unwind a +// stack, not switch stacks. +// +// Valgrind would disapprove of using clone() without CLONE_VM; +// Chromium uses the raw syscall as a workaround in that case, but +// we don't currently support sandboxing under valgrind. +MOZ_NEVER_INLINE MOZ_ASAN_IGNORE static pid_t DoClone(int aFlags, + jmp_buf* aCtx) { + static constexpr size_t kStackAlignment = 16; + uint8_t miniStack[4096] __attribute__((aligned(kStackAlignment))); +#ifdef __hppa__ + void* stackPtr = miniStack; +#else + void* stackPtr = ArrayEnd(miniStack); +#endif + return clone(CloneCallee, stackPtr, aFlags, aCtx); +} + +} // namespace + +// Similar to fork(), but allows passing flags to clone() and does not +// run pthread_atfork hooks. +static pid_t ForkWithFlags(int aFlags) { + // Don't allow flags that would share the address space, or + // require clone() arguments we're not passing: + static const int kBadFlags = CLONE_VM | CLONE_VFORK | CLONE_SETTLS | + CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | + CLONE_CHILD_CLEARTID; + MOZ_RELEASE_ASSERT((aFlags & kBadFlags) == 0); + + // Block signals due to small stack in DoClone. + sigset_t oldSigs; + BlockAllSignals(&oldSigs); + + int ret = 0; + jmp_buf ctx; + if (setjmp(ctx) == 0) { + // In the parent and just called setjmp: + ret = DoClone(aFlags | SIGCHLD, &ctx); + } + RestoreSignals(&oldSigs); + // In the child and have longjmp'ed: + return ret; +} + +static bool WriteStringToFile(const char* aPath, const char* aStr, + const size_t aLen) { + int fd = open(aPath, O_WRONLY); + if (fd < 0) { + return false; + } + ssize_t written = write(fd, aStr, aLen); + if (close(fd) != 0 || written != ssize_t(aLen)) { + return false; + } + return true; +} + +// This function sets up uid/gid mappings that preserve the +// process's previous ids. Mapping the uid/gid to something is +// necessary in order to nest user namespaces (not currently being +// used, but could be useful), and leaving the ids unchanged is +// likely to minimize unexpected side-effects. +static void ConfigureUserNamespace(uid_t uid, gid_t gid) { + using base::strings::SafeSPrintf; + char buf[sizeof("18446744073709551615 18446744073709551615 1")]; + size_t len; + + len = static_cast<size_t>(SafeSPrintf(buf, "%d %d 1", uid, uid)); + MOZ_RELEASE_ASSERT(len < sizeof(buf)); + if (!WriteStringToFile("/proc/self/uid_map", buf, len)) { + MOZ_CRASH("Failed to write /proc/self/uid_map"); + } + + // In recent kernels (3.19, 3.18.2, 3.17.8), for security reasons, + // establishing gid mappings will fail unless the process first + // revokes its ability to call setgroups() by using a /proc node + // added in the same set of patches. + Unused << WriteStringToFile("/proc/self/setgroups", "deny", 4); + + len = static_cast<size_t>(SafeSPrintf(buf, "%d %d 1", gid, gid)); + MOZ_RELEASE_ASSERT(len < sizeof(buf)); + if (!WriteStringToFile("/proc/self/gid_map", buf, len)) { + MOZ_CRASH("Failed to write /proc/self/gid_map"); + } +} + +static void DropAllCaps() { + if (!LinuxCapabilities().SetCurrent()) { + SANDBOX_LOG_ERRNO("capset (drop all)"); + } +} + +pid_t SandboxFork::Fork() { + if (mFlags == 0) { + MOZ_ASSERT(mChrootServer < 0); + return fork(); + } + + uid_t uid = getuid(); + gid_t gid = getgid(); + + // Block signals so that the handlers can be safely reset in the + // child process without races, and so that repeated SIGPROF from + // the profiler won't prevent clone() from making progress. (The + // profiler uses pthread_atfork to do that, but ForkWithFlags + // can't run atfork hooks.) + sigset_t oldSigs; + BlockAllSignals(&oldSigs); + pid_t pid = ForkWithFlags(mFlags); + if (pid != 0) { + RestoreSignals(&oldSigs); + return pid; + } + + // WARNING: all code from this point on (and in StartChrootServer) + // must be async signal safe. In particular, it cannot do anything + // that could allocate heap memory or use mutexes. + prctl(PR_SET_NAME, "Sandbox Forked"); + + // Clear signal handlers in the child, under the assumption that any + // actions they would take (running the crash reporter, manipulating + // the Gecko profile, etc.) wouldn't work correctly in the child. + ResetSignalHandlers(); + RestoreSignals(&oldSigs); + ConfigureUserNamespace(uid, gid); + + if (mChrootServer >= 0) { + StartChrootServer(); + } + + // execve() will drop capabilities, but it seems best to also drop + // them here in case they'd do something unexpected in the generic + // post-fork code. + DropAllCaps(); + return 0; +} + +void SandboxFork::StartChrootServer() { + // Run the rest of this function in a separate process that can + // chroot() on behalf of this process after it's sandboxed. + pid_t pid = ForkWithFlags(CLONE_FS); + if (pid < 0) { + MOZ_CRASH("failed to clone chroot helper process"); + } + if (pid > 0) { + return; + } + prctl(PR_SET_NAME, "Chroot Helper"); + + LinuxCapabilities caps; + caps.Effective(CAP_SYS_CHROOT) = true; + if (!caps.SetCurrent()) { + SANDBOX_LOG_ERRNO("capset (chroot helper)"); + MOZ_DIAGNOSTIC_ASSERT(false); + } + + base::CloseSuperfluousFds(this, [](void* aCtx, int aFd) { + return aFd == static_cast<decltype(this)>(aCtx)->mChrootServer; + }); + + char msg; + ssize_t msgLen = HANDLE_EINTR(read(mChrootServer, &msg, 1)); + if (msgLen == 0) { + // Process exited before chrooting (or chose not to chroot?). + _exit(0); + } + MOZ_RELEASE_ASSERT(msgLen == 1); + MOZ_RELEASE_ASSERT(msg == kSandboxChrootRequest); + + // This chroots both processes to this process's procfs fdinfo + // directory, which becomes empty and unlinked when this process + // exits at the end of this function, and which is always + // unwriteable. + int rv = chroot("/proc/self/fdinfo"); + MOZ_RELEASE_ASSERT(rv == 0); + + // Drop CAP_SYS_CHROOT ASAP. This must happen before responding; + // the main child won't be able to waitpid(), so it could start + // handling hostile content before this process finishes exiting. + DropAllCaps(); + + // The working directory still grant access to the real filesystem; + // remove that. (Note: if the process can obtain directory fds, for + // example via SandboxBroker, it must be blocked from using fchdir.) + rv = chdir("/"); + MOZ_RELEASE_ASSERT(rv == 0); + + msg = kSandboxChrootResponse; + msgLen = HANDLE_EINTR(write(mChrootServer, &msg, 1)); + MOZ_RELEASE_ASSERT(msgLen == 1); + _exit(0); +} + +} // namespace mozilla |