1 files changed, 715 insertions, 0 deletions
diff --git a/security/sandbox/linux/launch/SandboxLaunch.cpp b/security/sandbox/linux/launch/SandboxLaunch.cpp
new file mode 100644
index 0000000000..267b71bd13
--- /dev/null
+++ b/security/sandbox/linux/launch/SandboxLaunch.cpp
@@ -0,0 +1,715 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SandboxLaunch.h"
+
+#include <fcntl.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <utility>
+
+#include "LinuxCapabilities.h"
+#include "LinuxSched.h"
+#include "SandboxChrootProto.h"
+#include "SandboxInfo.h"
+#include "SandboxLogging.h"
+#include "base/eintr_wrapper.h"
+#include "base/strings/safe_sprintf.h"
+#include "mozilla/Array.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/SandboxReporter.h"
+#include "mozilla/SandboxSettings.h"
+#include "mozilla/Components.h"
+#include "mozilla/StaticPrefs_media.h"
+#include "mozilla/StaticPrefs_security.h"
+#include "mozilla/Unused.h"
+#include "nsCOMPtr.h"
+#include "nsDebug.h"
+#include "nsIGfxInfo.h"
+#include "nsString.h"
+#include "nsThreadUtils.h"
+#include "prenv.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+
+#ifdef MOZ_X11
+#  ifndef MOZ_WIDGET_GTK
+#    error "Unknown toolkit"
+#  endif
+#  include "mozilla/WidgetUtilsGtk.h"
+#  include <gdk/gdk.h>
+#  include <gdk/gdkx.h>
+#  include "X11UndefineNone.h"
+#  include "gfxPlatform.h"
+#endif
+
+namespace mozilla {
+
+// Returns true if graphics will work from a content process
+// started in a new network namespace.  Specifically, named
+// Unix-domain sockets will work, but TCP/IP will not, even if it's a
+// connection to localhost: the child process has its own private
+// loopback interface.
+//
+// (Longer-term we intend to either proxy or remove X11 access from
+// content processes, at which point this will stop being an issue.)
+static bool IsGraphicsOkWithoutNetwork() {
+  // For X11, check whether the parent's connection is a Unix-domain
+  // socket.  This is done instead of trying to parse the display name
+  // because an empty hostname (e.g., ":0") will fall back to TCP in
+  // case of failure to connect using Unix-domain sockets.
+#ifdef MOZ_X11
+  // First, ensure that the parent process's graphics are initialized.
+  DebugOnly<gfxPlatform*> gfxPlatform = gfxPlatform::GetPlatform();
+
+  const auto display = gdk_display_get_default();
+  if (!display) {
+    // In this case, the browser is headless, but WebGL could still
+    // try to use X11.  However, WebGL isn't supported with remote
+    // X11, and in any case these connections are made after sandbox
+    // startup (lazily when WebGL is used), so they aren't being done
+    // directly by the process anyway.  (For local X11, they're
+    // brokered.)
+    MOZ_ASSERT(gfxPlatform->IsHeadless());
+    return true;
+  }
+  if (mozilla::widget::GdkIsX11Display(display)) {
+    const int xSocketFd = ConnectionNumber(GDK_DISPLAY_XDISPLAY(display));
+    if (NS_WARN_IF(xSocketFd < 0)) {
+      return false;
+    }
+
+    int domain;
+    socklen_t optlen = static_cast<socklen_t>(sizeof(domain));
+    int rv = getsockopt(xSocketFd, SOL_SOCKET, SO_DOMAIN, &domain, &optlen);
+    if (NS_WARN_IF(rv != 0)) {
+      return false;
+    }
+    MOZ_RELEASE_ASSERT(static_cast<size_t>(optlen) == sizeof(domain));
+    if (domain != AF_LOCAL) {
+      return false;
+    }
+    // There's one more complication: Xorg listens on named sockets
+    // (actual filesystem nodes) as well as abstract addresses (opaque
+    // octet strings scoped to the network namespace; this is a Linux
+    // extension).
+    //
+    // Inside a container environment (e.g., when running as a Snap
+    // package), it's possible that only the abstract addresses are
+    // accessible.  In that case, the display must be considered
+    // remote.  See also bug 1450740.
+    //
+    // Unfortunately, the Xorg client libraries prefer the abstract
+    // addresses, so this isn't directly detectable by inspecting the
+    // parent process's socket.  Instead, parse the DISPLAY env var
+    // (which was updated if necessary in nsAppRunner.cpp) to get the
+    // display number and construct the socket path, falling back to
+    // testing the directory in case that doesn't work.  (See bug
+    // 1565972 and bug 1559368 for cases where we need to test the
+    // specific socket.)
+    const char* const displayStr = PR_GetEnv("DISPLAY");
+    nsAutoCString socketPath("/tmp/.X11-unix");
+    int accessFlags = X_OK;
+    int displayNum;
+    // sscanf ignores trailing text, so display names with a screen
+    // number (e.g., ":0.2") will parse correctly.
+    if (displayStr && (sscanf(displayStr, ":%d", &displayNum) == 1 ||
+                       sscanf(displayStr, "unix:%d", &displayNum) == 1)) {
+      socketPath.AppendPrintf("/X%d", displayNum);
+      accessFlags = R_OK | W_OK;
+    }
+    if (access(socketPath.get(), accessFlags) != 0) {
+      SANDBOX_LOG_ERRNO(
+          "%s is inaccessible; can't isolate network namespace in"
+          " content processes",
+          socketPath.get());
+      return false;
+    }
+  }
+#endif
+
+  // Assume that other backends (e.g., Wayland) will not use the
+  // network namespace.
+  return true;
+}
+
+bool HasAtiDrivers() {
+  nsCOMPtr<nsIGfxInfo> gfxInfo = components::GfxInfo::Service();
+  nsAutoString vendorID;
+  static const Array<nsresult (nsIGfxInfo::*)(nsAString&), 2> kMethods = {
+      &nsIGfxInfo::GetAdapterVendorID,
+      &nsIGfxInfo::GetAdapterVendorID2,
+  };
+  for (const auto method : kMethods) {
+    if (NS_SUCCEEDED((gfxInfo->*method)(vendorID))) {
+      // This test is based on telemetry data.  The proprietary ATI
+      // drivers seem to use this vendor string, including for some
+      // newer devices that have AMD branding in the device name, such
+      // as those using AMDGPU-PRO drivers.
+      // The open-source drivers integrated into Mesa appear to use
+      // the vendor ID "X.Org" instead.
+      if (vendorID.EqualsLiteral("ATI Technologies Inc.")) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+// Content processes may need direct access to SysV IPC in certain
+// uncommon use cases.
+static bool ContentNeedsSysVIPC() {
+  // The ALSA dmix plugin uses SysV semaphores and shared memory to
+  // coordinate software mixing.
+#ifdef MOZ_ALSA
+  if (!StaticPrefs::media_cubeb_sandbox()) {
+    return true;
+  }
+#endif
+
+  if (!StaticPrefs::security_sandbox_content_headless_AtStartup()) {
+    // Bug 1438391: VirtualGL uses SysV shm for images and configuration.
+    if (PR_GetEnv("VGL_ISACTIVE") != nullptr) {
+      return true;
+    }
+
+    // The fglrx (ATI Catalyst) GPU drivers use SysV IPC.
+    if (HasAtiDrivers()) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static void PreloadSandboxLib(base::environment_map* aEnv) {
+  // Preload libmozsandbox.so so that sandbox-related interpositions
+  // can be defined there instead of in the executable.
+  // (This could be made conditional on intent to use sandboxing, but
+  // it's harmless for non-sandboxed processes.)
+  nsAutoCString preload;
+  // Prepend this, because people can and do preload libpthread.
+  // (See bug 1222500.)
+  preload.AssignLiteral("libmozsandbox.so");
+  if (const char* oldPreload = PR_GetEnv("LD_PRELOAD")) {
+    // Doesn't matter if oldPreload is ""; extra separators are ignored.
+    preload.Append(' ');
+    preload.Append(oldPreload);
+    (*aEnv)["MOZ_ORIG_LD_PRELOAD"] = oldPreload;
+  }
+  MOZ_ASSERT(aEnv->count("LD_PRELOAD") == 0);
+  (*aEnv)["LD_PRELOAD"] = preload.get();
+}
+
+static void AttachSandboxReporter(base::file_handle_mapping_vector* aFdMap) {
+  int srcFd, dstFd;
+  SandboxReporter::Singleton()->GetClientFileDescriptorMapping(&srcFd, &dstFd);
+  aFdMap->push_back({srcFd, dstFd});
+}
+
+class SandboxFork : public base::LaunchOptions::ForkDelegate {
+ public:
+  explicit SandboxFork(int aFlags, bool aChroot, int aServerFd = -1,
+                       int aClientFd = -1);
+  virtual ~SandboxFork();
+
+  void PrepareMapping(base::file_handle_mapping_vector* aMap);
+  pid_t Fork() override;
+
+ private:
+  int mFlags;
+  int mChrootServer;
+  int mChrootClient;
+
+  void StartChrootServer();
+  SandboxFork(const SandboxFork&) = delete;
+  SandboxFork& operator=(const SandboxFork&) = delete;
+};
+
+static int GetEffectiveSandboxLevel(GeckoProcessType aType) {
+  auto info = SandboxInfo::Get();
+  switch (aType) {
+    case GeckoProcessType_GMPlugin:
+      if (info.Test(SandboxInfo::kEnabledForMedia)) {
+        return 1;
+      }
+      return 0;
+    case GeckoProcessType_Content:
+#ifdef MOZ_ENABLE_FORKSERVER
+      // With this env MOZ_SANDBOXED will be set, and mozsandbox will
+      // be preloaded for the fork server.  The content processes rely
+      // on wrappers defined by mozsandbox to work properly.
+    case GeckoProcessType_ForkServer:
+#endif
+      // GetEffectiveContentSandboxLevel is main-thread-only due to prefs.
+      MOZ_ASSERT(NS_IsMainThread());
+      if (info.Test(SandboxInfo::kEnabledForContent)) {
+        return GetEffectiveContentSandboxLevel();
+      }
+      return 0;
+    case GeckoProcessType_RDD:
+      return PR_GetEnv("MOZ_DISABLE_RDD_SANDBOX") == nullptr ? 1 : 0;
+    case GeckoProcessType_Socket:
+      // GetEffectiveSocketProcessSandboxLevel is main-thread-only due to prefs.
+      MOZ_ASSERT(NS_IsMainThread());
+      return GetEffectiveSocketProcessSandboxLevel();
+    case GeckoProcessType_Utility:
+      return PR_GetEnv("MOZ_DISABLE_UTILITY_SANDBOX") == nullptr ? 1 : 0;
+    default:
+      return 0;
+  }
+}
+
+void SandboxLaunchPrepare(GeckoProcessType aType,
+                          base::LaunchOptions* aOptions) {
+  auto info = SandboxInfo::Get();
+
+  // We won't try any kind of sandboxing without seccomp-bpf.
+  if (!info.Test(SandboxInfo::kHasSeccompBPF)) {
+    return;
+  }
+
+  // Check prefs (and env vars) controlling sandbox use.
+  int level = GetEffectiveSandboxLevel(aType);
+  if (level == 0) {
+    return;
+  }
+
+  // At this point, we know we'll be using sandboxing; generic
+  // sandboxing support goes here.  The MOZ_SANDBOXED env var tells
+  // the child process whether this is the case.
+  aOptions->env_map["MOZ_SANDBOXED"] = "1";
+  PreloadSandboxLib(&aOptions->env_map);
+  AttachSandboxReporter(&aOptions->fds_to_remap);
+
+  bool canChroot = false;
+  int flags = 0;
+
+  if (aType == GeckoProcessType_Content && level >= 1) {
+    static const bool needSysV = ContentNeedsSysVIPC();
+    if (needSysV) {
+      // Tell the child process so it can adjust its seccomp-bpf
+      // policy.
+      aOptions->env_map["MOZ_SANDBOX_ALLOW_SYSV"] = "1";
+    } else {
+      flags |= CLONE_NEWIPC;
+    }
+
+    if (StaticPrefs::security_sandbox_content_headless_AtStartup()) {
+      aOptions->env_map["MOZ_HEADLESS"] = "1";
+    }
+  }
+
+  // Anything below this requires unprivileged user namespaces.
+  if (!info.Test(SandboxInfo::kHasUserNamespaces)) {
+    return;
+  }
+
+  switch (aType) {
+    case GeckoProcessType_Socket:
+      if (level >= 1) {
+        canChroot = true;
+        flags |= CLONE_NEWIPC;
+      }
+      break;
+    case GeckoProcessType_GMPlugin:
+    case GeckoProcessType_RDD:
+      if (level >= 1) {
+        canChroot = true;
+        // Can't use CLONE_NEWIPC because of intel-media-driver.
+        flags |= CLONE_NEWNET;
+      }
+      break;
+    case GeckoProcessType_Content:
+      if (level >= 4) {
+        canChroot = true;
+
+        // Unshare network namespace if allowed by graphics; see
+        // function definition above for details.  (The display
+        // local-ness is cached because it won't change.)
+        static const bool canCloneNet =
+            StaticPrefs::security_sandbox_content_headless_AtStartup() ||
+            (IsGraphicsOkWithoutNetwork() &&
+             !PR_GetEnv("RENDERDOC_CAPTUREOPTS"));
+
+        if (canCloneNet) {
+          flags |= CLONE_NEWNET;
+        }
+      }
+      // Hidden pref to allow testing user namespaces separately, even
+      // if there's nothing that would require them.
+      if (Preferences::GetBool("security.sandbox.content.force-namespace",
+                               false)) {
+        flags |= CLONE_NEWUSER;
+      }
+      break;
+    default:
+      // Nothing yet.
+      break;
+  }
+
+  if (canChroot || flags != 0) {
+    flags |= CLONE_NEWUSER;
+    auto forker = MakeUnique<SandboxFork>(flags, canChroot);
+    forker->PrepareMapping(&aOptions->fds_to_remap);
+    aOptions->fork_delegate = std::move(forker);
+    // Pass to |SandboxLaunchForkServerPrepare()| in the fork server.
+    aOptions->env_map[kSandboxChrootEnvFlag] =
+        std::to_string(canChroot ? 1 : 0) + std::to_string(flags);
+  }
+}
+
+#if defined(MOZ_ENABLE_FORKSERVER)
+/**
+ * Called by the fork server to install a fork delegator.
+ *
+ * In the case of fork server, the value of the flags of |SandboxFork|
+ * are passed as an env variable to the fork server so that we can
+ * recreate a |SandboxFork| as a fork delegator at the fork server.
+ */
+void SandboxLaunchForkServerPrepare(const std::vector<std::string>& aArgv,
+                                    base::LaunchOptions& aOptions) {
+  auto chroot = std::find_if(
+      aOptions.env_map.begin(), aOptions.env_map.end(),
+      [](auto& elt) { return elt.first == kSandboxChrootEnvFlag; });
+  if (chroot == aOptions.env_map.end()) {
+    return;
+  }
+  bool canChroot = chroot->second.c_str()[0] == '1';
+  int flags = atoi(chroot->second.c_str() + 1);
+  MOZ_ASSERT(flags || canChroot);
+
+  // Find chroot server fd.  It is supposed to be map to
+  // kSandboxChrootServerFd so that we find it out from the mapping.
+  auto fdmap = std::find_if(
+      aOptions.fds_to_remap.begin(), aOptions.fds_to_remap.end(),
+      [](auto& elt) { return elt.second == kSandboxChrootServerFd; });
+  MOZ_ASSERT(fdmap != aOptions.fds_to_remap.end(),
+             "ChrootServerFd is not found with sandbox chroot");
+  int chrootserverfd = fdmap->first;
+  aOptions.fds_to_remap.erase(fdmap);
+
+  // Set only the chroot server fd, not the client fd.  Because, the
+  // client fd is already in |fds_to_remap|, we don't need the forker
+  // to do it again.  And, the forker need only the server fd, that
+  // chroot server uses it to sync with the client (content).  See
+  // |SandboxFox::StartChrootServer()|.
+  auto forker = MakeUnique<SandboxFork>(flags, canChroot, chrootserverfd);
+  aOptions.fork_delegate = std::move(forker);
+}
+#endif
+
+SandboxFork::SandboxFork(int aFlags, bool aChroot, int aServerFd, int aClientFd)
+    : mFlags(aFlags), mChrootServer(aServerFd), mChrootClient(aClientFd) {
+  if (aChroot && mChrootServer < 0) {
+    int fds[2];
+    int rv = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, fds);
+    if (rv != 0) {
+      SANDBOX_LOG_ERRNO("socketpair");
+      MOZ_CRASH("socketpair failed");
+    }
+    mChrootClient = fds[0];
+    mChrootServer = fds[1];
+  }
+}
+
+void SandboxFork::PrepareMapping(base::file_handle_mapping_vector* aMap) {
+  MOZ_ASSERT(XRE_GetProcessType() != GeckoProcessType_ForkServer);
+  if (mChrootClient >= 0) {
+    aMap->push_back({mChrootClient, kSandboxChrootClientFd});
+  }
+#if defined(MOZ_ENABLE_FORKSERVER)
+  if (mChrootServer >= 0) {
+    aMap->push_back({mChrootServer, kSandboxChrootServerFd});
+  }
+#endif
+}
+
+SandboxFork::~SandboxFork() {
+  if (mChrootClient >= 0) {
+    close(mChrootClient);
+  }
+  if (mChrootServer >= 0) {
+    close(mChrootServer);
+  }
+}
+
+static void BlockAllSignals(sigset_t* aOldSigs) {
+  sigset_t allSigs;
+  int rv = sigfillset(&allSigs);
+  MOZ_RELEASE_ASSERT(rv == 0);
+  rv = pthread_sigmask(SIG_BLOCK, &allSigs, aOldSigs);
+  if (rv != 0) {
+    SANDBOX_LOG_WITH_ERROR(rv, "pthread_sigmask (block all)");
+    MOZ_CRASH("pthread_sigmask");
+  }
+}
+
+static void RestoreSignals(const sigset_t* aOldSigs) {
+  // Assuming that pthread_sigmask is a thin layer over rt_sigprocmask
+  // and doesn't try to touch TLS, which may be in an "interesting"
+  // state right now:
+  int rv = pthread_sigmask(SIG_SETMASK, aOldSigs, nullptr);
+  if (rv != 0) {
+    SANDBOX_LOG_WITH_ERROR(rv, "pthread_sigmask (restore)");
+    MOZ_CRASH("pthread_sigmask");
+  }
+}
+
+static bool IsSignalIgnored(int aSig) {
+  struct sigaction sa {};
+
+  if (sigaction(aSig, nullptr, &sa) != 0) {
+    if (errno != EINVAL) {
+      SANDBOX_LOG_ERRNO("sigaction(%d)", aSig);
+    }
+    return false;
+  }
+  return sa.sa_handler == SIG_IGN;
+}
+
+static void ResetSignalHandlers() {
+  for (int signum = 1; signum <= SIGRTMAX; ++signum) {
+    if (IsSignalIgnored(signum)) {
+      continue;
+    }
+    if (signal(signum, SIG_DFL) == SIG_ERR) {
+      MOZ_DIAGNOSTIC_ASSERT(errno == EINVAL);
+    }
+  }
+}
+
+namespace {
+
+// The libc clone() routine insists on calling a provided function on
+// a new stack, even if the address space isn't shared and it would be
+// safe to expose the underlying system call's fork()-like behavior.
+// So, we work around this by longjmp()ing back onto the original stack;
+// this technique is also used by Chromium.
+//
+// In theory, the clone syscall could be used directly if we ensure
+// that functions like raise() are never used in the child, including
+// by inherited signal handlers, but the longjmp approach isn't much
+// extra code and avoids a class of potential bugs.
+static int CloneCallee(void* aPtr) {
+  auto ctxPtr = reinterpret_cast<jmp_buf*>(aPtr);
+  longjmp(*ctxPtr, 1);
+  MOZ_CRASH("unreachable");
+  return 1;
+}
+
+// According to the Chromium developers, builds with FORTIFY_SOURCE
+// require that longjump move the stack pointer towards the root
+// function of the call stack.  Therefore, we must ensure that the
+// clone callee stack is leafward of the stack pointer captured in
+// setjmp() below by using this no-inline helper function.
+//
+// ASan apparently also causes problems, by the combination of
+// allocating the large stack-allocated buffer outside of the actual
+// stack and then assuming that longjmp is used only to unwind a
+// stack, not switch stacks.
+//
+// Valgrind would disapprove of using clone() without CLONE_VM;
+// Chromium uses the raw syscall as a workaround in that case, but
+// we don't currently support sandboxing under valgrind.
+MOZ_NEVER_INLINE MOZ_ASAN_BLACKLIST static pid_t DoClone(int aFlags,
+                                                         jmp_buf* aCtx) {
+  static constexpr size_t kStackAlignment = 16;
+  uint8_t miniStack[4096] __attribute__((aligned(kStackAlignment)));
+#ifdef __hppa__
+  void* stackPtr = miniStack;
+#else
+  void* stackPtr = ArrayEnd(miniStack);
+#endif
+  return clone(CloneCallee, stackPtr, aFlags, aCtx);
+}
+
+}  // namespace
+
+// Similar to fork(), but allows passing flags to clone() and does not
+// run pthread_atfork hooks.
+static pid_t ForkWithFlags(int aFlags) {
+  // Don't allow flags that would share the address space, or
+  // require clone() arguments we're not passing:
+  static const int kBadFlags = CLONE_VM | CLONE_VFORK | CLONE_SETTLS |
+                               CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
+                               CLONE_CHILD_CLEARTID;
+  MOZ_RELEASE_ASSERT((aFlags & kBadFlags) == 0);
+
+  // Block signals due to small stack in DoClone.
+  sigset_t oldSigs;
+  BlockAllSignals(&oldSigs);
+
+  int ret = 0;
+  jmp_buf ctx;
+  if (setjmp(ctx) == 0) {
+    // In the parent and just called setjmp:
+    ret = DoClone(aFlags | SIGCHLD, &ctx);
+  }
+  RestoreSignals(&oldSigs);
+  // In the child and have longjmp'ed:
+  return ret;
+}
+
+static bool WriteStringToFile(const char* aPath, const char* aStr,
+                              const size_t aLen) {
+  int fd = open(aPath, O_WRONLY);
+  if (fd < 0) {
+    return false;
+  }
+  ssize_t written = write(fd, aStr, aLen);
+  if (close(fd) != 0 || written != ssize_t(aLen)) {
+    return false;
+  }
+  return true;
+}
+
+// This function sets up uid/gid mappings that preserve the
+// process's previous ids.  Mapping the uid/gid to something is
+// necessary in order to nest user namespaces (not currently being
+// used, but could be useful), and leaving the ids unchanged is
+// likely to minimize unexpected side-effects.
+static void ConfigureUserNamespace(uid_t uid, gid_t gid) {
+  using base::strings::SafeSPrintf;
+  char buf[sizeof("18446744073709551615 18446744073709551615 1")];
+  size_t len;
+
+  len = static_cast<size_t>(SafeSPrintf(buf, "%d %d 1", uid, uid));
+  MOZ_RELEASE_ASSERT(len < sizeof(buf));
+  if (!WriteStringToFile("/proc/self/uid_map", buf, len)) {
+    MOZ_CRASH("Failed to write /proc/self/uid_map");
+  }
+
+  // In recent kernels (3.19, 3.18.2, 3.17.8), for security reasons,
+  // establishing gid mappings will fail unless the process first
+  // revokes its ability to call setgroups() by using a /proc node
+  // added in the same set of patches.
+  Unused << WriteStringToFile("/proc/self/setgroups", "deny", 4);
+
+  len = static_cast<size_t>(SafeSPrintf(buf, "%d %d 1", gid, gid));
+  MOZ_RELEASE_ASSERT(len < sizeof(buf));
+  if (!WriteStringToFile("/proc/self/gid_map", buf, len)) {
+    MOZ_CRASH("Failed to write /proc/self/gid_map");
+  }
+}
+
+static void DropAllCaps() {
+  if (!LinuxCapabilities().SetCurrent()) {
+    SANDBOX_LOG_ERRNO("capset (drop all)");
+  }
+}
+
+pid_t SandboxFork::Fork() {
+  if (mFlags == 0) {
+    MOZ_ASSERT(mChrootServer < 0);
+    return fork();
+  }
+
+  uid_t uid = getuid();
+  gid_t gid = getgid();
+
+  // Block signals so that the handlers can be safely reset in the
+  // child process without races, and so that repeated SIGPROF from
+  // the profiler won't prevent clone() from making progress.  (The
+  // profiler uses pthread_atfork to do that, but ForkWithFlags
+  // can't run atfork hooks.)
+  sigset_t oldSigs;
+  BlockAllSignals(&oldSigs);
+  pid_t pid = ForkWithFlags(mFlags);
+  if (pid != 0) {
+    RestoreSignals(&oldSigs);
+    return pid;
+  }
+
+  // WARNING: all code from this point on (and in StartChrootServer)
+  // must be async signal safe.  In particular, it cannot do anything
+  // that could allocate heap memory or use mutexes.
+  prctl(PR_SET_NAME, "Sandbox Forked");
+
+  // Clear signal handlers in the child, under the assumption that any
+  // actions they would take (running the crash reporter, manipulating
+  // the Gecko profile, etc.) wouldn't work correctly in the child.
+  ResetSignalHandlers();
+  RestoreSignals(&oldSigs);
+  ConfigureUserNamespace(uid, gid);
+
+  if (mChrootServer >= 0) {
+    StartChrootServer();
+  }
+
+  // execve() will drop capabilities, but it seems best to also drop
+  // them here in case they'd do something unexpected in the generic
+  // post-fork code.
+  DropAllCaps();
+  return 0;
+}
+
+void SandboxFork::StartChrootServer() {
+  // Run the rest of this function in a separate process that can
+  // chroot() on behalf of this process after it's sandboxed.
+  pid_t pid = ForkWithFlags(CLONE_FS);
+  if (pid < 0) {
+    MOZ_CRASH("failed to clone chroot helper process");
+  }
+  if (pid > 0) {
+    return;
+  }
+  prctl(PR_SET_NAME, "Chroot Helper");
+
+  LinuxCapabilities caps;
+  caps.Effective(CAP_SYS_CHROOT) = true;
+  if (!caps.SetCurrent()) {
+    SANDBOX_LOG_ERRNO("capset (chroot helper)");
+    MOZ_DIAGNOSTIC_ASSERT(false);
+  }
+
+  base::CloseSuperfluousFds(this, [](void* aCtx, int aFd) {
+    return aFd == static_cast<decltype(this)>(aCtx)->mChrootServer;
+  });
+
+  char msg;
+  ssize_t msgLen = HANDLE_EINTR(read(mChrootServer, &msg, 1));
+  if (msgLen == 0) {
+    // Process exited before chrooting (or chose not to chroot?).
+    _exit(0);
+  }
+  MOZ_RELEASE_ASSERT(msgLen == 1);
+  MOZ_RELEASE_ASSERT(msg == kSandboxChrootRequest);
+
+  // This chroots both processes to this process's procfs fdinfo
+  // directory, which becomes empty and unlinked when this process
+  // exits at the end of this function, and which is always
+  // unwriteable.
+  int rv = chroot("/proc/self/fdinfo");
+  MOZ_RELEASE_ASSERT(rv == 0);
+
+  // Drop CAP_SYS_CHROOT ASAP.  This must happen before responding;
+  // the main child won't be able to waitpid(), so it could start
+  // handling hostile content before this process finishes exiting.
+  DropAllCaps();
+
+  // The working directory still grant access to the real filesystem;
+  // remove that.  (Note: if the process can obtain directory fds, for
+  // example via SandboxBroker, it must be blocked from using fchdir.)
+  rv = chdir("/");
+  MOZ_RELEASE_ASSERT(rv == 0);
+
+  msg = kSandboxChrootResponse;
+  msgLen = HANDLE_EINTR(write(mChrootServer, &msg, 1));
+  MOZ_RELEASE_ASSERT(msgLen == 1);
+  _exit(0);
+}
+
+}  // namespace mozilla