244 files changed, 74959 insertions, 0 deletions
diff --git a/mozglue/android/APKOpen.cpp b/mozglue/android/APKOpen.cpp
new file mode 100644
index 0000000000..04d3577029
--- /dev/null
+++ b/mozglue/android/APKOpen.cpp
@@ -0,0 +1,546 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This custom library loading code is only meant to be called
+ * during initialization. As a result, it takes no special
+ * precautions to be threadsafe. Any of the library loading functions
+ * like mozload should not be available to other code.
+ */
+
+#include <jni.h>
+#include <android/log.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/limits.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <zlib.h>
+#include "dlfcn.h"
+#include "APKOpen.h"
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include "sqlite3.h"
+#include "Linker.h"
+#include "BaseProfiler.h"
+#include "application.ini.h"
+
+#include "mozilla/arm.h"
+#include "mozilla/Bootstrap.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "XREChildData.h"
+
+/* Android headers don't define RUSAGE_THREAD */
+#ifndef RUSAGE_THREAD
+#  define RUSAGE_THREAD 1
+#endif
+
+#ifndef RELEASE_OR_BETA
+/* Official builds have the debuggable flag set to false, which disables
+ * the backtrace dumper from bionic. However, as it is useful for native
+ * crashes happening before the crash reporter is registered, re-enable
+ * it on non release builds (i.e. nightly and aurora).
+ * Using a constructor so that it is re-enabled as soon as libmozglue.so
+ * is loaded.
+ */
+__attribute__((constructor)) void make_dumpable() { prctl(PR_SET_DUMPABLE, 1); }
+#endif
+
+typedef int mozglueresult;
+
+enum StartupEvent {
+#define mozilla_StartupTimeline_Event(ev, z) ev,
+#include "StartupTimeline.h"
+#undef mozilla_StartupTimeline_Event
+  MAX_STARTUP_EVENT_ID
+};
+
+using namespace mozilla;
+
+void JNI_Throw(JNIEnv* jenv, const char* classname, const char* msg) {
+  __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Throw\n");
+  jclass cls = jenv->FindClass(classname);
+  if (cls == nullptr) {
+    __android_log_print(
+        ANDROID_LOG_ERROR, "GeckoLibLoad",
+        "Couldn't find exception class (or exception pending) %s\n", classname);
+    exit(FAILURE);
+  }
+  int rc = jenv->ThrowNew(cls, msg);
+  if (rc < 0) {
+    __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad",
+                        "Error throwing exception %s\n", msg);
+    exit(FAILURE);
+  }
+  jenv->DeleteLocalRef(cls);
+}
+
+namespace {
+JavaVM* sJavaVM;
+}
+
+void abortThroughJava(const char* msg) {
+  struct sigaction sigact = {};
+  if (__wrap_sigaction(SIGSEGV, nullptr, &sigact)) {
+    return;  // sigaction call failed.
+  }
+
+  Dl_info info = {};
+  if ((sigact.sa_flags & SA_SIGINFO) &&
+      __wrap_dladdr(reinterpret_cast<void*>(sigact.sa_sigaction), &info) &&
+      info.dli_fname && strstr(info.dli_fname, "libxul.so")) {
+    return;  // Existing signal handler is in libxul (i.e. we have crash
+             // reporter).
+  }
+
+  JNIEnv* env = nullptr;
+  if (!sJavaVM ||
+      sJavaVM->AttachCurrentThreadAsDaemon(&env, nullptr) != JNI_OK) {
+    return;
+  }
+
+  if (!env || env->PushLocalFrame(2) != JNI_OK) {
+    return;
+  }
+
+  jclass loader = env->FindClass("org/mozilla/gecko/mozglue/GeckoLoader");
+  if (!loader) {
+    return;
+  }
+
+  jmethodID method =
+      env->GetStaticMethodID(loader, "abort", "(Ljava/lang/String;)V");
+  jstring str = env->NewStringUTF(msg);
+
+  if (method && str) {
+    env->CallStaticVoidMethod(loader, method, str);
+  }
+
+  env->PopLocalFrame(nullptr);
+}
+
+Bootstrap::UniquePtr gBootstrap;
+#ifndef MOZ_FOLD_LIBS
+static void* sqlite_handle = nullptr;
+static void* nspr_handle = nullptr;
+static void* plc_handle = nullptr;
+#else
+#  define sqlite_handle nss_handle
+#  define nspr_handle nss_handle
+#  define plc_handle nss_handle
+#endif
+static void* nss_handle = nullptr;
+
+static UniquePtr<char[]> getUnpackedLibraryName(const char* libraryName) {
+  static const char* libdir = getenv("MOZ_ANDROID_LIBDIR");
+
+  size_t len = strlen(libdir) + 1 /* path separator */ + strlen(libraryName) +
+               1; /* null terminator */
+  auto file = MakeUnique<char[]>(len);
+  snprintf(file.get(), len, "%s/%s", libdir, libraryName);
+  return file;
+}
+
+static void* dlopenLibrary(const char* libraryName) {
+  return __wrap_dlopen(getUnpackedLibraryName(libraryName).get(),
+                       RTLD_GLOBAL | RTLD_LAZY);
+}
+
+static void EnsureBaseProfilerInitialized() {
+  // There is no single entry-point into C++ code on Android.
+  // Instead, GeckoThread and GeckoLibLoader call various functions to load
+  // libraries one-by-one.
+  // We want to capture all that library loading in the profiler, so we need to
+  // kick off the base profiler at the beginning of whichever function is called
+  // first.
+  // We currently assume that all these functions are called on the same thread.
+  static bool sInitialized = false;
+  if (sInitialized) {
+    return;
+  }
+
+#ifdef MOZ_GECKO_PROFILER
+  // The stack depth we observe here will be determined by the stack of
+  // whichever caller enters this code first. In practice this means that we may
+  // miss some root-most frames, which hopefully shouldn't ruin profiling.
+  int stackBase = 5;
+  mozilla::baseprofiler::profiler_init(&stackBase);
+#endif
+  sInitialized = true;
+}
+
+static mozglueresult loadGeckoLibs() {
+  TimeStamp t0 = TimeStamp::Now();
+  struct rusage usage1_thread, usage1;
+  getrusage(RUSAGE_THREAD, &usage1_thread);
+  getrusage(RUSAGE_SELF, &usage1);
+
+  static const char* libxul = getenv("MOZ_ANDROID_LIBDIR_OVERRIDE");
+  if (libxul) {
+    gBootstrap = GetBootstrap(libxul, LibLoadingStrategy::ReadAhead);
+  } else {
+    gBootstrap = GetBootstrap(getUnpackedLibraryName("libxul.so").get(),
+                              LibLoadingStrategy::ReadAhead);
+  }
+  if (!gBootstrap) {
+    __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad",
+                        "Couldn't get a handle to libxul!");
+    return FAILURE;
+  }
+
+  TimeStamp t1 = TimeStamp::Now();
+  struct rusage usage2_thread, usage2;
+  getrusage(RUSAGE_THREAD, &usage2_thread);
+  getrusage(RUSAGE_SELF, &usage2);
+
+#define RUSAGE_TIMEDIFF(u1, u2, field)                    \
+  ((u2.ru_##field.tv_sec - u1.ru_##field.tv_sec) * 1000 + \
+   (u2.ru_##field.tv_usec - u1.ru_##field.tv_usec) / 1000)
+
+  __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad",
+                      "Loaded libs in %fms total, %ldms(%ldms) user, "
+                      "%ldms(%ldms) system, %ld(%ld) faults",
+                      (t1 - t0).ToMilliseconds(),
+                      RUSAGE_TIMEDIFF(usage1_thread, usage2_thread, utime),
+                      RUSAGE_TIMEDIFF(usage1, usage2, utime),
+                      RUSAGE_TIMEDIFF(usage1_thread, usage2_thread, stime),
+                      RUSAGE_TIMEDIFF(usage1, usage2, stime),
+                      usage2_thread.ru_majflt - usage1_thread.ru_majflt,
+                      usage2.ru_majflt - usage1.ru_majflt);
+
+  gBootstrap->XRE_StartupTimelineRecord(LINKER_INITIALIZED, t0);
+  gBootstrap->XRE_StartupTimelineRecord(LIBRARIES_LOADED, t1);
+  return SUCCESS;
+}
+
+static mozglueresult loadNSSLibs();
+
+static mozglueresult loadSQLiteLibs() {
+  if (sqlite_handle) return SUCCESS;
+
+#ifdef MOZ_FOLD_LIBS
+  if (loadNSSLibs() != SUCCESS) return FAILURE;
+#else
+
+  sqlite_handle = dlopenLibrary("libmozsqlite3.so");
+  if (!sqlite_handle) {
+    __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad",
+                        "Couldn't get a handle to libmozsqlite3!");
+    return FAILURE;
+  }
+#endif
+
+  return SUCCESS;
+}
+
+static mozglueresult loadNSSLibs() {
+  if (nss_handle && nspr_handle && plc_handle) return SUCCESS;
+
+  nss_handle = dlopenLibrary("libnss3.so");
+
+#ifndef MOZ_FOLD_LIBS
+  nspr_handle = dlopenLibrary("libnspr4.so");
+
+  plc_handle = dlopenLibrary("libplc4.so");
+#endif
+
+  if (!nss_handle) {
+    __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad",
+                        "Couldn't get a handle to libnss3!");
+    return FAILURE;
+  }
+
+#ifndef MOZ_FOLD_LIBS
+  if (!nspr_handle) {
+    __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad",
+                        "Couldn't get a handle to libnspr4!");
+    return FAILURE;
+  }
+
+  if (!plc_handle) {
+    __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad",
+                        "Couldn't get a handle to libplc4!");
+    return FAILURE;
+  }
+#endif
+
+  return SUCCESS;
+}
+
+extern "C" APKOPEN_EXPORT void MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_GeckoLoader_loadGeckoLibsNative(
+    JNIEnv* jenv, jclass jGeckoAppShellClass) {
+  EnsureBaseProfilerInitialized();
+
+  jenv->GetJavaVM(&sJavaVM);
+
+  int res = loadGeckoLibs();
+  if (res != SUCCESS) {
+    JNI_Throw(jenv, "java/lang/Exception", "Error loading gecko libraries");
+  }
+}
+
+extern "C" APKOPEN_EXPORT void MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_GeckoLoader_loadSQLiteLibsNative(
+    JNIEnv* jenv, jclass jGeckoAppShellClass) {
+  EnsureBaseProfilerInitialized();
+
+  __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Load sqlite start\n");
+  mozglueresult rv = loadSQLiteLibs();
+  if (rv != SUCCESS) {
+    JNI_Throw(jenv, "java/lang/Exception", "Error loading sqlite libraries");
+  }
+  __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Load sqlite done\n");
+}
+
+extern "C" APKOPEN_EXPORT void MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_GeckoLoader_loadNSSLibsNative(
+    JNIEnv* jenv, jclass jGeckoAppShellClass) {
+  EnsureBaseProfilerInitialized();
+
+  __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Load nss start\n");
+  mozglueresult rv = loadNSSLibs();
+  if (rv != SUCCESS) {
+    JNI_Throw(jenv, "java/lang/Exception", "Error loading nss libraries");
+  }
+  __android_log_print(ANDROID_LOG_ERROR, "GeckoLibLoad", "Load nss done\n");
+}
+
+static char** CreateArgvFromObjectArray(JNIEnv* jenv, jobjectArray jargs,
+                                        int* length) {
+  size_t stringCount = jenv->GetArrayLength(jargs);
+
+  if (length) {
+    *length = stringCount;
+  }
+
+  if (!stringCount) {
+    return nullptr;
+  }
+
+  char** argv = new char*[stringCount + 1];
+
+  argv[stringCount] = nullptr;
+
+  for (size_t ix = 0; ix < stringCount; ix++) {
+    jstring string = (jstring)(jenv->GetObjectArrayElement(jargs, ix));
+    const char* rawString = jenv->GetStringUTFChars(string, nullptr);
+    const int strLength = jenv->GetStringUTFLength(string);
+    argv[ix] = strndup(rawString, strLength);
+    jenv->ReleaseStringUTFChars(string, rawString);
+    jenv->DeleteLocalRef(string);
+  }
+
+  return argv;
+}
+
+static void FreeArgv(char** argv, int argc) {
+  for (int ix = 0; ix < argc; ix++) {
+    // String was allocated with strndup, so need to use free to deallocate.
+    free(argv[ix]);
+  }
+  delete[](argv);
+}
+
+extern "C" APKOPEN_EXPORT void MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_GeckoLoader_nativeRun(JNIEnv* jenv, jclass jc,
+                                                     jobjectArray jargs,
+                                                     int prefsFd, int prefMapFd,
+                                                     int ipcFd, int crashFd,
+                                                     int crashAnnotationFd) {
+  EnsureBaseProfilerInitialized();
+
+  int argc = 0;
+  char** argv = CreateArgvFromObjectArray(jenv, jargs, &argc);
+
+  if (ipcFd < 0) {
+    if (gBootstrap == nullptr) {
+      FreeArgv(argv, argc);
+      return;
+    }
+
+#ifdef MOZ_LINKER
+    ElfLoader::Singleton.ExpectShutdown(false);
+#endif
+    gBootstrap->GeckoStart(jenv, argv, argc, sAppData);
+#ifdef MOZ_LINKER
+    ElfLoader::Singleton.ExpectShutdown(true);
+#endif
+  } else {
+    gBootstrap->XRE_SetAndroidChildFds(
+        jenv, {prefsFd, prefMapFd, ipcFd, crashFd, crashAnnotationFd});
+    gBootstrap->XRE_SetProcessType(argv[argc - 1]);
+
+    XREChildData childData;
+    gBootstrap->XRE_InitChildProcess(argc - 1, argv, &childData);
+  }
+
+#ifdef MOZ_WIDGET_ANDROID
+#  ifdef MOZ_PROFILE_GENERATE
+  gBootstrap->XRE_WriteLLVMProfData();
+#  endif
+#endif
+  gBootstrap.reset();
+  FreeArgv(argv, argc);
+}
+
+extern "C" APKOPEN_EXPORT mozglueresult ChildProcessInit(int argc,
+                                                         char* argv[]) {
+  EnsureBaseProfilerInitialized();
+
+  if (loadNSSLibs() != SUCCESS) {
+    return FAILURE;
+  }
+  if (loadSQLiteLibs() != SUCCESS) {
+    return FAILURE;
+  }
+  if (loadGeckoLibs() != SUCCESS) {
+    return FAILURE;
+  }
+
+  gBootstrap->XRE_SetProcessType(argv[--argc]);
+
+  XREChildData childData;
+  return NS_FAILED(gBootstrap->XRE_InitChildProcess(argc, argv, &childData));
+}
+
+extern "C" APKOPEN_EXPORT jboolean MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_GeckoLoader_neonCompatible(JNIEnv* jenv,
+                                                          jclass jc) {
+#ifdef __ARM_EABI__
+  return mozilla::supports_neon();
+#else
+  return true;
+#endif  // __ARM_EABI__
+}
+
+// Does current process name end with ':media'?
+static bool IsMediaProcess() {
+  pid_t pid = getpid();
+  char str[256];
+  SprintfLiteral(str, "/proc/%d/cmdline", pid);
+  FILE* f = fopen(str, "r");
+  if (f) {
+    fgets(str, sizeof(str), f);
+    fclose(f);
+    const size_t strLen = strlen(str);
+    const char suffix[] = ":media";
+    const size_t suffixLen = sizeof(suffix) - 1;
+    if (strLen >= suffixLen &&
+        !strncmp(str + strLen - suffixLen, suffix, suffixLen)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+#ifndef SYS_rt_tgsigqueueinfo
+#  define SYS_rt_tgsigqueueinfo __NR_rt_tgsigqueueinfo
+#endif
+/* Copy of http://androidxref.com/7.1.1_r6/xref/bionic/linker/debugger.cpp#262,
+ * with debuggerd related code stripped.
+ *
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+static void CatchFatalSignals(int num, siginfo_t* info, void* context) {
+  // It's possible somebody cleared the SA_SIGINFO flag, which would mean
+  // our "info" arg holds an undefined value.
+  struct sigaction action = {};
+  if ((sigaction(num, nullptr, &action) < 0) ||
+      !(action.sa_flags & SA_SIGINFO)) {
+    info = nullptr;
+  }
+
+  // We need to return from the signal handler so that debuggerd can dump the
+  // thread that crashed, but returning here does not guarantee that the signal
+  // will be thrown again, even for SIGSEGV and friends, since the signal could
+  // have been sent manually. Resend the signal with rt_tgsigqueueinfo(2) to
+  // preserve the SA_SIGINFO contents.
+  signal(num, SIG_DFL);
+
+  struct siginfo si;
+  if (!info) {
+    memset(&si, 0, sizeof(si));
+    si.si_code = SI_USER;
+    si.si_pid = getpid();
+    si.si_uid = getuid();
+    info = &si;
+  } else if (info->si_code >= 0 || info->si_code == SI_TKILL) {
+    // rt_tgsigqueueinfo(2)'s documentation appears to be incorrect on kernels
+    // that contain commit 66dd34a (3.9+). The manpage claims to only allow
+    // negative si_code values that are not SI_TKILL, but 66dd34a changed the
+    // check to allow all si_code values in calls coming from inside the house.
+  }
+
+  int rc = syscall(SYS_rt_tgsigqueueinfo, getpid(), gettid(), num, info);
+  if (rc != 0) {
+    __android_log_print(ANDROID_LOG_FATAL, "mozglue",
+                        "failed to resend signal during crash: %s",
+                        strerror(errno));
+    _exit(0);
+  }
+}
+
+extern "C" APKOPEN_EXPORT void MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_GeckoLoader_suppressCrashDialog(JNIEnv* jenv,
+                                                               jclass jc) {
+  MOZ_RELEASE_ASSERT(IsMediaProcess(),
+                     "Suppress crash dialog only for media process");
+  // Restoring to SIG_DFL will crash on x86/Android M devices (see bug 1374556)
+  // so copy Android code
+  // (http://androidxref.com/7.1.1_r6/xref/bionic/linker/debugger.cpp#302). See
+  // comments above CatchFatalSignals() for copyright notice.
+  struct sigaction action;
+  memset(&action, 0, sizeof(action));
+  sigemptyset(&action.sa_mask);
+  action.sa_sigaction = &CatchFatalSignals;
+  action.sa_flags = SA_RESTART | SA_SIGINFO;
+
+  // Use the alternate signal stack if available so we can catch stack
+  // overflows.
+  action.sa_flags |= SA_ONSTACK;
+
+  sigaction(SIGABRT, &action, nullptr);
+  sigaction(SIGBUS, &action, nullptr);
+  sigaction(SIGFPE, &action, nullptr);
+  sigaction(SIGILL, &action, nullptr);
+  sigaction(SIGSEGV, &action, nullptr);
+#if defined(SIGSTKFLT)
+  sigaction(SIGSTKFLT, &action, nullptr);
+#endif
+  sigaction(SIGTRAP, &action, nullptr);
+}
diff --git a/mozglue/android/APKOpen.h b/mozglue/android/APKOpen.h
new file mode 100644
index 0000000000..f3666bf028
--- /dev/null
+++ b/mozglue/android/APKOpen.h
@@ -0,0 +1,29 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef APKOpen_h
+#define APKOpen_h
+
+#include <jni.h>
+
+#ifndef APKOPEN_EXPORT
+#  define APKOPEN_EXPORT __attribute__((visibility("default")))
+#endif
+
+APKOPEN_EXPORT void abortThroughJava(const char* msg);
+
+static const int SUCCESS = 0;
+static const int FAILURE = 1;
+void JNI_Throw(JNIEnv* jenv, const char* classname, const char* msg);
+
+// Bug 1207642 - Work around Dalvik bug by realigning stack on JNI entry
+#ifndef MOZ_JNICALL
+#  ifdef __i386__
+#    define MOZ_JNICALL JNICALL __attribute__((force_align_arg_pointer))
+#  else
+#    define MOZ_JNICALL JNICALL
+#  endif
+#endif
+
+#endif /* APKOpen_h */
diff --git a/mozglue/android/Ashmem.cpp b/mozglue/android/Ashmem.cpp
new file mode 100644
index 0000000000..9c93fe2b10
--- /dev/null
+++ b/mozglue/android/Ashmem.cpp
@@ -0,0 +1,73 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <cstring>
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <linux/ashmem.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "Ashmem.h"
+
+namespace mozilla {
+namespace android {
+
+static void* libhandle() {
+  static void* handle = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL);
+  return handle;
+}
+
+int ashmem_create(const char* name, size_t size) {
+  static auto fCreate =
+      (int (*)(const char*, size_t))dlsym(libhandle(), "ASharedMemory_create");
+
+  if (fCreate) {
+    return fCreate(name, size);
+  }
+
+  int fd = open("/" ASHMEM_NAME_DEF, O_RDWR, 0600);
+  if (fd < 0) {
+    return fd;
+  }
+
+  if (name) {
+    char str[ASHMEM_NAME_LEN];
+    strlcpy(str, name, sizeof(str));
+    ioctl(fd, ASHMEM_SET_NAME, str);
+  }
+
+  if (ioctl(fd, ASHMEM_SET_SIZE, size) != 0) {
+    close(fd);
+    return -1;
+  }
+
+  return fd;
+}
+
+size_t ashmem_getSize(int fd) {
+  static auto fGetSize =
+      (size_t(*)(int))dlsym(libhandle(), "ASharedMemory_getSize");
+  if (fGetSize) {
+    return fGetSize(fd);
+  }
+
+  return (size_t)ioctl(fd, ASHMEM_GET_SIZE, nullptr);
+}
+
+int ashmem_setProt(int fd, int prot) {
+  static auto fSetProt =
+      (int (*)(int, int))dlsym(libhandle(), "ASharedMemory_setProt");
+  if (fSetProt) {
+    return fSetProt(fd, prot);
+  }
+
+  return ioctl(fd, ASHMEM_SET_PROT_MASK, prot);
+}
+
+}  // namespace android
+}  // namespace mozilla
diff --git a/mozglue/android/Ashmem.h b/mozglue/android/Ashmem.h
new file mode 100644
index 0000000000..9779f71ba0
--- /dev/null
+++ b/mozglue/android/Ashmem.h
@@ -0,0 +1,22 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Ashmem_h__
+#define Ashmem_h__
+
+#include <linux/ashmem.h>
+
+namespace mozilla {
+namespace android {
+
+// Wrappers for the ASharedMemory function in the NDK
+// https://developer.android.com/ndk/reference/group/memory
+MFBT_API int ashmem_create(const char* name, size_t size);
+MFBT_API size_t ashmem_getSize(int fd);
+MFBT_API int ashmem_setProt(int fd, int prot);
+
+}  // namespace android
+}  // namespace mozilla
+
+#endif  // Ashmem_h__
diff --git a/mozglue/android/NativeCrypto.cpp b/mozglue/android/NativeCrypto.cpp
new file mode 100644
index 0000000000..f24f6eb55c
--- /dev/null
+++ b/mozglue/android/NativeCrypto.cpp
@@ -0,0 +1,140 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "NativeCrypto.h"
+#include "APKOpen.h"
+
+#include <jni.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mozilla/SHA1.h"
+#include "pbkdf2_sha256.h"
+
+/**
+ * Helper function to invoke native PBKDF2 function with JNI
+ * arguments.
+ */
+extern "C" JNIEXPORT jbyteArray MOZ_JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_pbkdf2SHA256(
+    JNIEnv* env, jclass jc, jbyteArray jpassword, jbyteArray jsalt, jint c,
+    jint dkLen) {
+  if (dkLen < 0) {
+    env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"),
+                  "dkLen should not be less than 0");
+    return nullptr;
+  }
+
+  jbyte* password = env->GetByteArrayElements(jpassword, nullptr);
+  size_t passwordLen = env->GetArrayLength(jpassword);
+
+  jbyte* salt = env->GetByteArrayElements(jsalt, nullptr);
+  size_t saltLen = env->GetArrayLength(jsalt);
+
+  uint8_t hashResult[dkLen];
+  PBKDF2_SHA256((uint8_t*)password, passwordLen, (uint8_t*)salt, saltLen,
+                (uint64_t)c, hashResult, (size_t)dkLen);
+
+  env->ReleaseByteArrayElements(jpassword, password, JNI_ABORT);
+  env->ReleaseByteArrayElements(jsalt, salt, JNI_ABORT);
+
+  jbyteArray out = env->NewByteArray(dkLen);
+  if (out == nullptr) {
+    return nullptr;
+  }
+  env->SetByteArrayRegion(out, 0, dkLen, (jbyte*)hashResult);
+
+  return out;
+}
+
+using namespace mozilla;
+
+/**
+ * Helper function to invoke native SHA-1 function with JNI arguments.
+ */
+extern "C" JNIEXPORT jbyteArray MOZ_JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_sha1(
+    JNIEnv* env, jclass jc, jbyteArray jstr) {
+  jbyte* str = env->GetByteArrayElements(jstr, nullptr);
+  size_t strLen = env->GetArrayLength(jstr);
+
+  SHA1Sum sha1;
+  SHA1Sum::Hash hashResult;
+  sha1.update((void*)str, (uint32_t)strLen);
+  sha1.finish(hashResult);
+
+  env->ReleaseByteArrayElements(jstr, str, JNI_ABORT);
+
+  jbyteArray out = env->NewByteArray(SHA1Sum::kHashSize);
+  if (out == nullptr) {
+    return nullptr;
+  }
+  env->SetByteArrayRegion(out, 0, SHA1Sum::kHashSize, (jbyte*)hashResult);
+
+  return out;
+}
+
+/**
+ * Helper function to invoke native SHA-256 init with JNI arguments.
+ */
+extern "C" JNIEXPORT jbyteArray MOZ_JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_sha256init(
+    JNIEnv* env, jclass jc) {
+  jbyteArray out = env->NewByteArray(sizeof(SHA256_CTX));
+  if (nullptr == out) {
+    return nullptr;
+  }
+
+  SHA256_CTX* shaContext = (SHA256_CTX*)env->GetByteArrayElements(out, nullptr);
+  SHA256_Init(shaContext);
+
+  env->ReleaseByteArrayElements(out, (jbyte*)shaContext, 0);
+
+  return out;
+}
+
+/**
+ * Helper function to invoke native SHA-256 update with JNI arguments.
+ */
+extern "C" JNIEXPORT void MOZ_JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_sha256update(
+    JNIEnv* env, jclass jc, jbyteArray jctx, jbyteArray jstr, jint len) {
+  jbyte* str = env->GetByteArrayElements(jstr, nullptr);
+
+  SHA256_CTX* shaContext =
+      (SHA256_CTX*)env->GetByteArrayElements(jctx, nullptr);
+
+  SHA256_Update(shaContext, (void*)str, (size_t)len);
+
+  env->ReleaseByteArrayElements(jstr, str, JNI_ABORT);
+  env->ReleaseByteArrayElements(jctx, (jbyte*)shaContext, 0);
+
+  return;
+}
+
+/**
+ * Helper function to invoke native SHA-256 finalize with JNI arguments.
+ */
+extern "C" JNIEXPORT jbyteArray MOZ_JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_sha256finalize(
+    JNIEnv* env, jclass jc, jbyteArray jctx) {
+  SHA256_CTX* shaContext =
+      (SHA256_CTX*)env->GetByteArrayElements(jctx, nullptr);
+
+  unsigned char* digest = new unsigned char[32];
+  SHA256_Final(digest, shaContext);
+
+  env->ReleaseByteArrayElements(jctx, (jbyte*)shaContext, JNI_ABORT);
+
+  jbyteArray out = env->NewByteArray(32);
+  if (nullptr != out) {
+    env->SetByteArrayRegion(out, 0, 32, (jbyte*)digest);
+  }
+
+  delete[] digest;
+
+  return out;
+}
diff --git a/mozglue/android/NativeCrypto.h b/mozglue/android/NativeCrypto.h
new file mode 100644
index 0000000000..e06145ef6b
--- /dev/null
+++ b/mozglue/android/NativeCrypto.h
@@ -0,0 +1,58 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_mozilla_gecko_background_nativecode_NativeCrypto */
+
+#ifndef _Included_org_mozilla_gecko_background_nativecode_NativeCrypto
+#  define _Included_org_mozilla_gecko_background_nativecode_NativeCrypto
+#  ifdef __cplusplus
+extern "C" {
+#  endif
+/*
+ * Class:     org_mozilla_gecko_background_nativecode_NativeCrypto
+ * Method:    pbkdf2SHA256
+ * Signature: ([B[BII)[B
+ */
+JNIEXPORT jbyteArray JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_pbkdf2SHA256(
+    JNIEnv*, jclass, jbyteArray, jbyteArray, jint, jint);
+
+/*
+ * Class:     org_mozilla_gecko_background_nativecode_NativeCrypto
+ * Method:    sha1
+ * Signature: ([B)[B
+ */
+JNIEXPORT jbyteArray JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_sha1(JNIEnv*, jclass,
+                                                               jbyteArray);
+
+/*
+ * Class:     org_mozilla_gecko_background_nativecode_NativeCrypto
+ * Method:    sha256init
+ * Signature: ()[B
+ */
+JNIEXPORT jbyteArray JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_sha256init(JNIEnv*,
+                                                                     jclass);
+
+/*
+ * Class:     org_mozilla_gecko_background_nativecode_NativeCrypto
+ * Method:    sha256update
+ * Signature: ([B[B)V
+ */
+JNIEXPORT void JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_sha256update(
+    JNIEnv*, jclass, jbyteArray, jbyteArray, jint);
+
+/*
+ * Class:     org_mozilla_gecko_background_nativecode_NativeCrypto
+ * Method:    sha256finalize
+ * Signature: ([B)[B
+ */
+JNIEXPORT jbyteArray JNICALL
+Java_org_mozilla_gecko_background_nativecode_NativeCrypto_sha256finalize(
+    JNIEnv*, jclass, jbyteArray);
+
+#  ifdef __cplusplus
+}
+#  endif
+#endif
diff --git a/mozglue/android/SharedMemNatives.cpp b/mozglue/android/SharedMemNatives.cpp
new file mode 100644
index 0000000000..f38a2274b3
--- /dev/null
+++ b/mozglue/android/SharedMemNatives.cpp
@@ -0,0 +1,80 @@
+/* -*- Mode: c++; c-basic-offset: 2; tab-width: 20; indent-tabs-mode: nil; -*-
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <errno.h>
+#include <jni.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "mozilla/Sprintf.h"
+
+extern "C" {
+
+JNIEXPORT
+void JNICALL
+Java_org_mozilla_gecko_media_SampleBuffer_nativeReadFromDirectBuffer(
+    JNIEnv* jenv, jclass, jobject src, jlong dest, jint offset, jint size) {
+  uint8_t* from = static_cast<uint8_t*>(jenv->GetDirectBufferAddress(src));
+  if (from == nullptr) {
+    jenv->ThrowNew(jenv->FindClass("java/lang/NullPointerException"),
+                   "Null direct buffer");
+    return;
+  }
+
+  void* to = reinterpret_cast<void*>(dest);
+  if (to == nullptr) {
+    jenv->ThrowNew(jenv->FindClass("java/lang/NullPointerException"),
+                   "Null shared memory buffer");
+    return;
+  }
+
+  memcpy(to, from + offset, size);
+}
+
+JNIEXPORT
+void JNICALL
+Java_org_mozilla_gecko_media_SampleBuffer_nativeWriteToDirectBuffer(
+    JNIEnv* jenv, jclass, jlong src, jobject dest, jint offset, jint size) {
+  uint8_t* from = reinterpret_cast<uint8_t*>(src);
+  if (from == nullptr) {
+    jenv->ThrowNew(jenv->FindClass("java/lang/NullPointerException"),
+                   "Null shared memory buffer");
+    return;
+  }
+
+  void* to = jenv->GetDirectBufferAddress(dest);
+  if (to == nullptr) {
+    jenv->ThrowNew(jenv->FindClass("java/lang/NullPointerException"),
+                   "Null direct buffer");
+    return;
+  }
+
+  memcpy(to, from + offset, size);
+}
+
+JNIEXPORT
+jlong JNICALL Java_org_mozilla_gecko_mozglue_SharedMemory_map(JNIEnv* env,
+                                                              jobject jobj,
+                                                              jint fd,
+                                                              jint length) {
+  void* address = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  if (address == MAP_FAILED) {
+    char msg[128];
+    SprintfLiteral(msg, "mmap failed. errno=%d", errno);
+    env->ThrowNew(env->FindClass("java/lang/NullPointerException"), msg);
+    return 0;
+  }
+  return jlong(address);
+}
+
+JNIEXPORT
+void JNICALL Java_org_mozilla_gecko_mozglue_SharedMemory_unmap(JNIEnv* env,
+                                                               jobject jobj,
+                                                               jlong address,
+                                                               jint size) {
+  munmap((void*)address, (size_t)size);
+}
+}
diff --git a/mozglue/android/moz.build b/mozglue/android/moz.build
new file mode 100644
index 0000000000..5d72c135c0
--- /dev/null
+++ b/mozglue/android/moz.build
@@ -0,0 +1,71 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS += [
+    "APKOpen.h",
+]
+
+EXPORTS.mozilla += ["Ashmem.h"]
+
+SOURCES += [
+    "APKOpen.cpp",
+    "Ashmem.cpp",
+    "NativeCrypto.cpp",
+    "nsGeckoUtils.cpp",
+    "pbkdf2_sha256.c",
+    "SharedMemNatives.cpp",
+]
+
+if CONFIG["MOZ_CRASHREPORTER"]:
+    USE_LIBS += [
+        "minidump-analyzer",
+    ]
+
+    LOCAL_INCLUDES += [
+        "/toolkit/crashreporter/minidump-analyzer",
+    ]
+
+FINAL_LIBRARY = "mozglue"
+
+for var in ("ANDROID_PACKAGE_NAME", "ANDROID_CPU_ARCH"):
+    DEFINES[var] = '"%s"' % CONFIG[var]
+
+if CONFIG["MOZ_FOLD_LIBS"]:
+    DEFINES["MOZ_FOLD_LIBS"] = True
+
+LOCAL_INCLUDES += [
+    "!/build",
+    "../linker",
+    "/ipc/chromium/src",
+    "/nsprpub/lib/ds",
+    "/nsprpub/lib/libc/include",
+    "/nsprpub/pr/include",
+    "/security/nss/lib/base",
+    "/security/nss/lib/certdb",
+    "/security/nss/lib/cryptohi",
+    "/security/nss/lib/dev",
+    "/security/nss/lib/freebl",
+    "/security/nss/lib/nss",
+    "/security/nss/lib/pk11wrap",
+    "/security/nss/lib/pkcs7",
+    "/security/nss/lib/pki",
+    "/security/nss/lib/smime",
+    "/security/nss/lib/softoken",
+    "/security/nss/lib/ssl",
+    "/security/nss/lib/util",
+    "/third_party/sqlite3/src",
+    "/toolkit/components/startup",
+    "/xpcom/build",
+]
+
+if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+    CXXFLAGS += ["-Wno-error=shadow"]
+
+DEFINES["XPCOM_GLUE"] = True
+
+USE_LIBS += [
+    "xpcomglue",
+]
diff --git a/mozglue/android/nsGeckoUtils.cpp b/mozglue/android/nsGeckoUtils.cpp
new file mode 100644
index 0000000000..d6fbf5c0d7
--- /dev/null
+++ b/mozglue/android/nsGeckoUtils.cpp
@@ -0,0 +1,152 @@
+/* -*- Mode: c++; c-basic-offset: 2; tab-width: 20; indent-tabs-mode: nil; -*-
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <jni.h>
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include "APKOpen.h"
+#include "Zip.h"
+#include "mozilla/RefPtr.h"
+
+#ifdef MOZ_CRASHREPORTER
+#  include "minidump-analyzer.h"
+#endif
+
+extern "C" __attribute__((visibility("default"))) void MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_GeckoLoader_putenv(JNIEnv* jenv, jclass,
+                                                  jstring map) {
+  const char* str;
+  // XXX: java doesn't give us true UTF8, we should figure out something
+  // better to do here
+  str = jenv->GetStringUTFChars(map, nullptr);
+  if (str == nullptr) return;
+  putenv(strdup(str));
+  jenv->ReleaseStringUTFChars(map, str);
+}
+
+extern "C" APKOPEN_EXPORT jboolean MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_GeckoLoader_verifyCRCs(JNIEnv* jenv, jclass,
+                                                      jstring jApkName) {
+  const char* str;
+  // XXX: java doesn't give us true UTF8, we should figure out something
+  // better to do here
+  str = jenv->GetStringUTFChars(jApkName, nullptr);
+  if (str == nullptr) {
+    return false;
+  }
+
+  RefPtr<Zip> zip = Zip::Create(str);
+  const bool valid = zip->VerifyCRCs();
+  jenv->ReleaseStringUTFChars(jApkName, str);
+  return jboolean(valid);
+}
+
+extern "C" __attribute__((visibility("default"))) jobject MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_DirectBufferAllocator_nativeAllocateDirectBuffer(
+    JNIEnv* jenv, jclass, jlong size) {
+  jobject buffer = nullptr;
+  void* mem = malloc(size);
+  if (mem) {
+    buffer = jenv->NewDirectByteBuffer(mem, size);
+    if (!buffer) free(mem);
+  }
+  return buffer;
+}
+
+extern "C" __attribute__((visibility("default"))) void MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_DirectBufferAllocator_nativeFreeDirectBuffer(
+    JNIEnv* jenv, jclass, jobject buf) {
+  free(jenv->GetDirectBufferAddress(buf));
+}
+
+extern "C" __attribute__((visibility("default"))) jlong MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_NativeZip_getZip(JNIEnv* jenv, jclass,
+                                                jstring path) {
+  const char* str;
+  str = jenv->GetStringUTFChars(path, nullptr);
+  if (!str || !*str) {
+    if (str) jenv->ReleaseStringUTFChars(path, str);
+    JNI_Throw(jenv, "java/lang/IllegalArgumentException", "Invalid path");
+    return 0;
+  }
+  RefPtr<Zip> zip = ZipCollection::GetZip(str);
+  jenv->ReleaseStringUTFChars(path, str);
+  if (!zip) {
+    JNI_Throw(jenv, "java/lang/IllegalArgumentException",
+              "Invalid path or invalid zip");
+    return 0;
+  }
+  return reinterpret_cast<jlong>(zip.forget().take());
+}
+
+extern "C" __attribute__((visibility("default"))) jlong MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_NativeZip_getZipFromByteBuffer(JNIEnv* jenv,
+                                                              jclass,
+                                                              jobject buffer) {
+  void* buf = jenv->GetDirectBufferAddress(buffer);
+  size_t size = jenv->GetDirectBufferCapacity(buffer);
+  RefPtr<Zip> zip = Zip::Create(buf, size);
+  if (!zip) {
+    JNI_Throw(jenv, "java/lang/IllegalArgumentException", "Invalid zip");
+    return 0;
+  }
+  return reinterpret_cast<jlong>(zip.forget().take());
+}
+
+extern "C" __attribute__((visibility("default"))) void MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_NativeZip__1release(JNIEnv* jenv, jclass,
+                                                   jlong obj) {
+  Zip* zip = (Zip*)obj;
+  zip->Release();
+}
+
+extern "C" __attribute__((visibility("default"))) jobject MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_NativeZip__1getInputStream(JNIEnv* jenv,
+                                                          jobject jzip,
+                                                          jlong obj,
+                                                          jstring path) {
+  Zip* zip = (Zip*)obj;
+  const char* str;
+  str = jenv->GetStringUTFChars(path, nullptr);
+
+  Zip::Stream stream;
+  bool res = zip->GetStream(str, &stream);
+  jenv->ReleaseStringUTFChars(path, str);
+  if (!res) {
+    return nullptr;
+  }
+  jobject buf = jenv->NewDirectByteBuffer(const_cast<void*>(stream.GetBuffer()),
+                                          stream.GetSize());
+  if (!buf) {
+    JNI_Throw(jenv, "java/lang/RuntimeException",
+              "Failed to create ByteBuffer");
+    return nullptr;
+  }
+  jclass nativeZip = jenv->GetObjectClass(jzip);
+  jmethodID method =
+      jenv->GetMethodID(nativeZip, "createInputStream",
+                        "(Ljava/nio/ByteBuffer;I)Ljava/io/InputStream;");
+  // Since this function is only expected to be called from Java, it is safe
+  // to skip exception checking for the method call below, as long as no
+  // other Native -> Java call doesn't happen before returning to Java.
+  return jenv->CallObjectMethod(jzip, method, buf, (jint)stream.GetType());
+}
+
+#ifdef MOZ_CRASHREPORTER
+
+extern "C" __attribute__((visibility("default"))) jboolean MOZ_JNICALL
+Java_org_mozilla_gecko_mozglue_MinidumpAnalyzer_GenerateStacks(
+    JNIEnv* jenv, jclass, jstring minidumpPath, jboolean fullStacks) {
+  const char* str;
+  str = jenv->GetStringUTFChars(minidumpPath, nullptr);
+
+  bool res = CrashReporter::GenerateStacks(str, fullStacks);
+
+  jenv->ReleaseStringUTFChars(minidumpPath, str);
+  return res;
+}
+
+#endif  // MOZ_CRASHREPORTER
diff --git a/mozglue/android/pbkdf2_sha256.c b/mozglue/android/pbkdf2_sha256.c
new file mode 100644
index 0000000000..119d245476
--- /dev/null
+++ b/mozglue/android/pbkdf2_sha256.c
@@ -0,0 +1,389 @@
+/*-
+ * Copyright 2005,2007,2009 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/types.h>
+
+#include <stdint.h>
+#include <string.h>
+
+#include <sys/endian.h>
+
+#include "pbkdf2_sha256.h"
+
+static inline uint32_t be32dec(const void* pp) {
+  const uint8_t* p = (uint8_t const*)pp;
+
+  return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) +
+          ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24));
+}
+
+static inline void be32enc(void* pp, uint32_t x) {
+  uint8_t* p = (uint8_t*)pp;
+
+  p[3] = x & 0xff;
+  p[2] = (x >> 8) & 0xff;
+  p[1] = (x >> 16) & 0xff;
+  p[0] = (x >> 24) & 0xff;
+}
+
+/*
+ * Encode a length len/4 vector of (uint32_t) into a length len vector of
+ * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
+ */
+static void be32enc_vect(unsigned char* dst, const uint32_t* src, size_t len) {
+  size_t i;
+
+  for (i = 0; i < len / 4; i++) be32enc(dst + i * 4, src[i]);
+}
+
+/*
+ * Decode a big-endian length len vector of (unsigned char) into a length
+ * len/4 vector of (uint32_t).  Assumes len is a multiple of 4.
+ */
+static void be32dec_vect(uint32_t* dst, const unsigned char* src, size_t len) {
+  size_t i;
+
+  for (i = 0; i < len / 4; i++) dst[i] = be32dec(src + i * 4);
+}
+
+/* Elementary functions used by SHA256 */
+#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
+#define Maj(x, y, z) ((x & (y | z)) | (y & z))
+#define SHR(x, n) (x >> n)
+#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
+#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
+#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
+
+/* SHA256 round function */
+#define RND(a, b, c, d, e, f, g, h, k) \
+  t0 = h + S1(e) + Ch(e, f, g) + k;    \
+  t1 = S0(a) + Maj(a, b, c);           \
+  d += t0;                             \
+  h = t0 + t1;
+
+/* Adjusted round function for rotating state */
+#define RNDr(S, W, i, k)                                                  \
+  RND(S[(64 - i) % 8], S[(65 - i) % 8], S[(66 - i) % 8], S[(67 - i) % 8], \
+      S[(68 - i) % 8], S[(69 - i) % 8], S[(70 - i) % 8], S[(71 - i) % 8], \
+      W[i] + k)
+
+/*
+ * SHA256 block compression function.  The 256-bit state is transformed via
+ * the 512-bit input block to produce a new state.
+ */
+static void SHA256_Transform(uint32_t* state, const unsigned char block[64]) {
+  uint32_t W[64];
+  uint32_t S[8];
+  uint32_t t0, t1;
+  int i;
+
+  /* 1. Prepare message schedule W. */
+  be32dec_vect(W, block, 64);
+  for (i = 16; i < 64; i++)
+    W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
+
+  /* 2. Initialize working variables. */
+  memcpy(S, state, 32);
+
+  /* 3. Mix. */
+  RNDr(S, W, 0, 0x428a2f98);
+  RNDr(S, W, 1, 0x71374491);
+  RNDr(S, W, 2, 0xb5c0fbcf);
+  RNDr(S, W, 3, 0xe9b5dba5);
+  RNDr(S, W, 4, 0x3956c25b);
+  RNDr(S, W, 5, 0x59f111f1);
+  RNDr(S, W, 6, 0x923f82a4);
+  RNDr(S, W, 7, 0xab1c5ed5);
+  RNDr(S, W, 8, 0xd807aa98);
+  RNDr(S, W, 9, 0x12835b01);
+  RNDr(S, W, 10, 0x243185be);
+  RNDr(S, W, 11, 0x550c7dc3);
+  RNDr(S, W, 12, 0x72be5d74);
+  RNDr(S, W, 13, 0x80deb1fe);
+  RNDr(S, W, 14, 0x9bdc06a7);
+  RNDr(S, W, 15, 0xc19bf174);
+  RNDr(S, W, 16, 0xe49b69c1);
+  RNDr(S, W, 17, 0xefbe4786);
+  RNDr(S, W, 18, 0x0fc19dc6);
+  RNDr(S, W, 19, 0x240ca1cc);
+  RNDr(S, W, 20, 0x2de92c6f);
+  RNDr(S, W, 21, 0x4a7484aa);
+  RNDr(S, W, 22, 0x5cb0a9dc);
+  RNDr(S, W, 23, 0x76f988da);
+  RNDr(S, W, 24, 0x983e5152);
+  RNDr(S, W, 25, 0xa831c66d);
+  RNDr(S, W, 26, 0xb00327c8);
+  RNDr(S, W, 27, 0xbf597fc7);
+  RNDr(S, W, 28, 0xc6e00bf3);
+  RNDr(S, W, 29, 0xd5a79147);
+  RNDr(S, W, 30, 0x06ca6351);
+  RNDr(S, W, 31, 0x14292967);
+  RNDr(S, W, 32, 0x27b70a85);
+  RNDr(S, W, 33, 0x2e1b2138);
+  RNDr(S, W, 34, 0x4d2c6dfc);
+  RNDr(S, W, 35, 0x53380d13);
+  RNDr(S, W, 36, 0x650a7354);
+  RNDr(S, W, 37, 0x766a0abb);
+  RNDr(S, W, 38, 0x81c2c92e);
+  RNDr(S, W, 39, 0x92722c85);
+  RNDr(S, W, 40, 0xa2bfe8a1);
+  RNDr(S, W, 41, 0xa81a664b);
+  RNDr(S, W, 42, 0xc24b8b70);
+  RNDr(S, W, 43, 0xc76c51a3);
+  RNDr(S, W, 44, 0xd192e819);
+  RNDr(S, W, 45, 0xd6990624);
+  RNDr(S, W, 46, 0xf40e3585);
+  RNDr(S, W, 47, 0x106aa070);
+  RNDr(S, W, 48, 0x19a4c116);
+  RNDr(S, W, 49, 0x1e376c08);
+  RNDr(S, W, 50, 0x2748774c);
+  RNDr(S, W, 51, 0x34b0bcb5);
+  RNDr(S, W, 52, 0x391c0cb3);
+  RNDr(S, W, 53, 0x4ed8aa4a);
+  RNDr(S, W, 54, 0x5b9cca4f);
+  RNDr(S, W, 55, 0x682e6ff3);
+  RNDr(S, W, 56, 0x748f82ee);
+  RNDr(S, W, 57, 0x78a5636f);
+  RNDr(S, W, 58, 0x84c87814);
+  RNDr(S, W, 59, 0x8cc70208);
+  RNDr(S, W, 60, 0x90befffa);
+  RNDr(S, W, 61, 0xa4506ceb);
+  RNDr(S, W, 62, 0xbef9a3f7);
+  RNDr(S, W, 63, 0xc67178f2);
+
+  /* 4. Mix local working variables into global state. */
+  for (i = 0; i < 8; i++) state[i] += S[i];
+
+  /* Clean the stack. */
+  memset(W, 0, 256);
+  memset(S, 0, 32);
+  t0 = t1 = 0;
+}
+
+static unsigned char PAD[64] = {
+    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+/* Add padding and terminating bit-count. */
+static void SHA256_Pad(SHA256_CTX* ctx) {
+  unsigned char len[8];
+  uint32_t r, plen;
+
+  /*
+   * Convert length to a vector of bytes -- we do this now rather
+   * than later because the length will change after we pad.
+   */
+  be32enc_vect(len, ctx->count, 8);
+
+  /* Add 1--64 bytes so that the resulting length is 56 mod 64. */
+  r = (ctx->count[1] >> 3) & 0x3f;
+  plen = (r < 56) ? (56 - r) : (120 - r);
+  SHA256_Update(ctx, PAD, (size_t)plen);
+
+  /* Add the terminating bit-count. */
+  SHA256_Update(ctx, len, 8);
+}
+
+/* SHA-256 initialization.  Begins a SHA-256 operation. */
+void SHA256_Init(SHA256_CTX* ctx) {
+  /* Zero bits processed so far. */
+  ctx->count[0] = ctx->count[1] = 0;
+
+  /* Magic initialization constants. */
+  ctx->state[0] = 0x6A09E667;
+  ctx->state[1] = 0xBB67AE85;
+  ctx->state[2] = 0x3C6EF372;
+  ctx->state[3] = 0xA54FF53A;
+  ctx->state[4] = 0x510E527F;
+  ctx->state[5] = 0x9B05688C;
+  ctx->state[6] = 0x1F83D9AB;
+  ctx->state[7] = 0x5BE0CD19;
+}
+
+/* Add bytes into the hash. */
+void SHA256_Update(SHA256_CTX* ctx, const void* in, size_t len) {
+  uint32_t bitlen[2];
+  uint32_t r;
+  const unsigned char* src = in;
+
+  /* Number of bytes left in the buffer from previous updates. */
+  r = (ctx->count[1] >> 3) & 0x3f;
+
+  /* Convert the length into a number of bits. */
+  bitlen[1] = ((uint32_t)len) << 3;
+  bitlen[0] = (uint32_t)(len >> 29);
+
+  /* Update number of bits. */
+  if ((ctx->count[1] += bitlen[1]) < bitlen[1]) ctx->count[0]++;
+  ctx->count[0] += bitlen[0];
+
+  /* Handle the case where we don't need to perform any transforms. */
+  if (len < 64 - r) {
+    memcpy(&ctx->buf[r], src, len);
+    return;
+  }
+
+  /* Finish the current block. */
+  memcpy(&ctx->buf[r], src, 64 - r);
+  SHA256_Transform(ctx->state, ctx->buf);
+  src += 64 - r;
+  len -= 64 - r;
+
+  /* Perform complete blocks. */
+  while (len >= 64) {
+    SHA256_Transform(ctx->state, src);
+    src += 64;
+    len -= 64;
+  }
+
+  /* Copy left over data into buffer. */
+  memcpy(ctx->buf, src, len);
+}
+
+/*
+ * SHA-256 finalization.  Pads the input data, exports the hash value,
+ * and clears the context state.
+ */
+void SHA256_Final(unsigned char digest[32], SHA256_CTX* ctx) {
+  /* Add padding. */
+  SHA256_Pad(ctx);
+
+  /* Write the hash. */
+  be32enc_vect(digest, ctx->state, 32);
+
+  /* Clear the context state. */
+  memset((void*)ctx, 0, sizeof(*ctx));
+}
+
+/* Initialize an HMAC-SHA256 operation with the given key. */
+void HMAC_SHA256_Init(HMAC_SHA256_CTX* ctx, const void* _K, size_t Klen) {
+  unsigned char pad[64];
+  unsigned char khash[32];
+  const unsigned char* K = _K;
+  size_t i;
+
+  /* If Klen > 64, the key is really SHA256(K). */
+  if (Klen > 64) {
+    SHA256_Init(&ctx->ictx);
+    SHA256_Update(&ctx->ictx, K, Klen);
+    SHA256_Final(khash, &ctx->ictx);
+    K = khash;
+    Klen = 32;
+  }
+
+  /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */
+  SHA256_Init(&ctx->ictx);
+  memset(pad, 0x36, 64);
+  for (i = 0; i < Klen; i++) pad[i] ^= K[i];
+  SHA256_Update(&ctx->ictx, pad, 64);
+
+  /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */
+  SHA256_Init(&ctx->octx);
+  memset(pad, 0x5c, 64);
+  for (i = 0; i < Klen; i++) pad[i] ^= K[i];
+  SHA256_Update(&ctx->octx, pad, 64);
+
+  /* Clean the stack. */
+  memset(khash, 0, 32);
+}
+
+/* Add bytes to the HMAC-SHA256 operation. */
+void HMAC_SHA256_Update(HMAC_SHA256_CTX* ctx, const void* in, size_t len) {
+  /* Feed data to the inner SHA256 operation. */
+  SHA256_Update(&ctx->ictx, in, len);
+}
+
+/* Finish an HMAC-SHA256 operation. */
+void HMAC_SHA256_Final(unsigned char digest[32], HMAC_SHA256_CTX* ctx) {
+  unsigned char ihash[32];
+
+  /* Finish the inner SHA256 operation. */
+  SHA256_Final(ihash, &ctx->ictx);
+
+  /* Feed the inner hash to the outer SHA256 operation. */
+  SHA256_Update(&ctx->octx, ihash, 32);
+
+  /* Finish the outer SHA256 operation. */
+  SHA256_Final(digest, &ctx->octx);
+
+  /* Clean the stack. */
+  memset(ihash, 0, 32);
+}
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void PBKDF2_SHA256(const uint8_t* passwd, size_t passwdlen, const uint8_t* salt,
+                   size_t saltlen, uint64_t c, uint8_t* buf, size_t dkLen) {
+  HMAC_SHA256_CTX PShctx, hctx;
+  size_t i;
+  uint8_t ivec[4];
+  uint8_t U[32];
+  uint8_t T[32];
+  uint64_t j;
+  int k;
+  size_t clen;
+
+  /* Compute HMAC state after processing P and S. */
+  HMAC_SHA256_Init(&PShctx, passwd, passwdlen);
+  HMAC_SHA256_Update(&PShctx, salt, saltlen);
+
+  /* Iterate through the blocks. */
+  for (i = 0; i * 32 < dkLen; i++) {
+    /* Generate INT(i + 1). */
+    be32enc(ivec, (uint32_t)(i + 1));
+
+    /* Compute U_1 = PRF(P, S || INT(i)). */
+    memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX));
+    HMAC_SHA256_Update(&hctx, ivec, 4);
+    HMAC_SHA256_Final(U, &hctx);
+
+    /* T_i = U_1 ... */
+    memcpy(T, U, 32);
+
+    for (j = 2; j <= c; j++) {
+      /* Compute U_j. */
+      HMAC_SHA256_Init(&hctx, passwd, passwdlen);
+      HMAC_SHA256_Update(&hctx, U, 32);
+      HMAC_SHA256_Final(U, &hctx);
+
+      /* ... xor U_j ... */
+      for (k = 0; k < 32; k++) T[k] ^= U[k];
+    }
+
+    /* Copy as many bytes as necessary into buf. */
+    clen = dkLen - i * 32;
+    if (clen > 32) clen = 32;
+    memcpy(&buf[i * 32], T, clen);
+  }
+
+  /* Clean PShctx, since we never called _Final on it. */
+  memset(&PShctx, 0, sizeof(HMAC_SHA256_CTX));
+}
diff --git a/mozglue/android/pbkdf2_sha256.h b/mozglue/android/pbkdf2_sha256.h
new file mode 100644
index 0000000000..80223ffcd5
--- /dev/null
+++ b/mozglue/android/pbkdf2_sha256.h
@@ -0,0 +1,70 @@
+/*-
+ * Copyright 2005,2007,2009 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/lib/libmd/sha256.h,v 1.2 2006/01/17 15:35:56 phk Exp $
+ */
+
+#ifndef _SHA256_H_
+#define _SHA256_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#include <stdint.h>
+
+typedef struct SHA256Context {
+  uint32_t state[8];
+  uint32_t count[2];
+  unsigned char buf[64];
+} SHA256_CTX;
+
+typedef struct HMAC_SHA256Context {
+  SHA256_CTX ictx;
+  SHA256_CTX octx;
+} HMAC_SHA256_CTX;
+
+void SHA256_Init(SHA256_CTX*);
+void SHA256_Update(SHA256_CTX*, const void*, size_t);
+void SHA256_Final(unsigned char[32], SHA256_CTX*);
+void HMAC_SHA256_Init(HMAC_SHA256_CTX*, const void*, size_t);
+void HMAC_SHA256_Update(HMAC_SHA256_CTX*, const void*, size_t);
+void HMAC_SHA256_Final(unsigned char[32], HMAC_SHA256_CTX*);
+
+/**
+ * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen):
+ * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and
+ * write the output to buf.  The value dkLen must be at most 32 * (2^32 - 1).
+ */
+void PBKDF2_SHA256(const uint8_t*, size_t, const uint8_t*, size_t, uint64_t,
+                   uint8_t*, size_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_SHA256_H_ */
diff --git a/mozglue/baseprofiler/core/EHABIStackWalk.cpp b/mozglue/baseprofiler/core/EHABIStackWalk.cpp
new file mode 100644
index 0000000000..0c2c855c9b
--- /dev/null
+++ b/mozglue/baseprofiler/core/EHABIStackWalk.cpp
@@ -0,0 +1,592 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI, as described in:
+ *   http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf
+ *
+ * This handles only the ARM-defined "personality routines" (chapter
+ * 9), and don't track the value of FP registers, because profiling
+ * needs only chain of PC/SP values.
+ *
+ * Because the exception handling info may not be accurate for all
+ * possible places where an async signal could occur (e.g., in a
+ * prologue or epilogue), this bounds-checks all stack accesses.
+ *
+ * This file uses "struct" for structures in the exception tables and
+ * "class" otherwise.  We should avoid violating the C++11
+ * standard-layout rules in the former.
+ */
+
+#include "BaseProfiler.h"
+
+#include "EHABIStackWalk.h"
+
+#include "BaseProfilerSharedLibraries.h"
+#include "platform.h"
+
+#include "mozilla/Atomics.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/EndianUtils.h"
+
+#include <algorithm>
+#include <elf.h>
+#include <stdint.h>
+#include <vector>
+#include <string>
+
+#ifndef PT_ARM_EXIDX
+#  define PT_ARM_EXIDX 0x70000001
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+struct PRel31 {
+  uint32_t mBits;
+  bool topBit() const { return mBits & 0x80000000; }
+  uint32_t value() const { return mBits & 0x7fffffff; }
+  int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; }
+  const void* compute() const {
+    return reinterpret_cast<const char*>(this) + offset();
+  }
+
+ private:
+  PRel31(const PRel31& copied) = delete;
+  PRel31() = delete;
+};
+
+struct EHEntry {
+  PRel31 startPC;
+  PRel31 exidx;
+
+ private:
+  EHEntry(const EHEntry& copied) = delete;
+  EHEntry() = delete;
+};
+
+class EHState {
+  // Note that any core register can be used as a "frame pointer" to
+  // influence the unwinding process, so this must track all of them.
+  uint32_t mRegs[16];
+
+ public:
+  bool unwind(const EHEntry* aEntry, const void* stackBase);
+  uint32_t& operator[](int i) { return mRegs[i]; }
+  const uint32_t& operator[](int i) const { return mRegs[i]; }
+  explicit EHState(const mcontext_t&);
+};
+
+enum { R_SP = 13, R_LR = 14, R_PC = 15 };
+
+class EHTable {
+  uint32_t mStartPC;
+  uint32_t mEndPC;
+  uint32_t mBaseAddress;
+  const EHEntry* mEntriesBegin;
+  const EHEntry* mEntriesEnd;
+  std::string mName;
+
+ public:
+  EHTable(const void* aELF, size_t aSize, const std::string& aName);
+  const EHEntry* lookup(uint32_t aPC) const;
+  bool isValid() const { return mEntriesEnd != mEntriesBegin; }
+  const std::string& name() const { return mName; }
+  uint32_t startPC() const { return mStartPC; }
+  uint32_t endPC() const { return mEndPC; }
+  uint32_t baseAddress() const { return mBaseAddress; }
+};
+
+class EHAddrSpace {
+  std::vector<uint32_t> mStarts;
+  std::vector<EHTable> mTables;
+  static Atomic<const EHAddrSpace*> sCurrent;
+
+ public:
+  explicit EHAddrSpace(const std::vector<EHTable>& aTables);
+  const EHTable* lookup(uint32_t aPC) const;
+  static void Update();
+  static const EHAddrSpace* Get();
+};
+
+void EHABIStackWalkInit() { EHAddrSpace::Update(); }
+
+size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs,
+                      void** aPCs, const size_t aNumFrames) {
+  const EHAddrSpace* space = EHAddrSpace::Get();
+  EHState state(aContext);
+  size_t count = 0;
+
+  while (count < aNumFrames) {
+    uint32_t pc = state[R_PC], sp = state[R_SP];
+    aPCs[count] = reinterpret_cast<void*>(pc);
+    aSPs[count] = reinterpret_cast<void*>(sp);
+    count++;
+
+    if (!space) break;
+    // TODO: cache these lookups.  Binary-searching libxul is
+    // expensive (possibly more expensive than doing the actual
+    // unwind), and even a small cache should help.
+    const EHTable* table = space->lookup(pc);
+    if (!table) break;
+    const EHEntry* entry = table->lookup(pc);
+    if (!entry) break;
+    if (!state.unwind(entry, stackBase)) break;
+  }
+
+  return count;
+}
+
+class EHInterp {
+ public:
+  // Note that stackLimit is exclusive and stackBase is inclusive
+  // (i.e, stackLimit < SP <= stackBase), following the convention
+  // set by the AAPCS spec.
+  EHInterp(EHState& aState, const EHEntry* aEntry, uint32_t aStackLimit,
+           uint32_t aStackBase)
+      : mState(aState),
+        mStackLimit(aStackLimit),
+        mStackBase(aStackBase),
+        mNextWord(0),
+        mWordsLeft(0),
+        mFailed(false) {
+    const PRel31& exidx = aEntry->exidx;
+    uint32_t firstWord;
+
+    if (exidx.mBits == 1) {  // EXIDX_CANTUNWIND
+      mFailed = true;
+      return;
+    }
+    if (exidx.topBit()) {
+      firstWord = exidx.mBits;
+    } else {
+      mNextWord = reinterpret_cast<const uint32_t*>(exidx.compute());
+      firstWord = *mNextWord++;
+    }
+
+    switch (firstWord >> 24) {
+      case 0x80:  // short
+        mWord = firstWord << 8;
+        mBytesLeft = 3;
+        break;
+      case 0x81:
+      case 0x82:  // long; catch descriptor size ignored
+        mWord = firstWord << 16;
+        mBytesLeft = 2;
+        mWordsLeft = (firstWord >> 16) & 0xff;
+        break;
+      default:
+        // unknown personality
+        mFailed = true;
+    }
+  }
+
+  bool unwind();
+
+ private:
+  // TODO: GCC has been observed not CSEing repeated reads of
+  // mState[R_SP] with writes to mFailed between them, suggesting that
+  // it hasn't determined that they can't alias and is thus missing
+  // optimization opportunities.  So, we may want to flatten EHState
+  // into this class; this may also make the code simpler.
+  EHState& mState;
+  uint32_t mStackLimit;
+  uint32_t mStackBase;
+  const uint32_t* mNextWord;
+  uint32_t mWord;
+  uint8_t mWordsLeft;
+  uint8_t mBytesLeft;
+  bool mFailed;
+
+  enum {
+    I_ADDSP = 0x00,  // 0sxxxxxx (subtract if s)
+    M_ADDSP = 0x80,
+    I_POPMASK = 0x80,  // 1000iiii iiiiiiii (if any i set)
+    M_POPMASK = 0xf0,
+    I_MOVSP = 0x90,  // 1001nnnn
+    M_MOVSP = 0xf0,
+    I_POPN = 0xa0,  // 1010lnnn
+    M_POPN = 0xf0,
+    I_FINISH = 0xb0,    // 10110000
+    I_POPLO = 0xb1,     // 10110001 0000iiii (if any i set)
+    I_ADDSPBIG = 0xb2,  // 10110010 uleb128
+    I_POPFDX = 0xb3,    // 10110011 sssscccc
+    I_POPFDX8 = 0xb8,   // 10111nnn
+    M_POPFDX8 = 0xf8,
+    // "Intel Wireless MMX" extensions omitted.
+    I_POPFDD = 0xc8,  // 1100100h sssscccc
+    M_POPFDD = 0xfe,
+    I_POPFDD8 = 0xd0,  // 11010nnn
+    M_POPFDD8 = 0xf8
+  };
+
+  uint8_t next() {
+    if (mBytesLeft == 0) {
+      if (mWordsLeft == 0) {
+        return I_FINISH;
+      }
+      mWordsLeft--;
+      mWord = *mNextWord++;
+      mBytesLeft = 4;
+    }
+    mBytesLeft--;
+    mWord = (mWord << 8) | (mWord >> 24);  // rotate
+    return mWord;
+  }
+
+  uint32_t& vSP() { return mState[R_SP]; }
+  uint32_t* ptrSP() { return reinterpret_cast<uint32_t*>(vSP()); }
+
+  void checkStackBase() {
+    if (vSP() > mStackBase) mFailed = true;
+  }
+  void checkStackLimit() {
+    if (vSP() <= mStackLimit) mFailed = true;
+  }
+  void checkStackAlign() {
+    if ((vSP() & 3) != 0) mFailed = true;
+  }
+  void checkStack() {
+    checkStackBase();
+    checkStackLimit();
+    checkStackAlign();
+  }
+
+  void popRange(uint8_t first, uint8_t last, uint16_t mask) {
+    bool hasSP = false;
+    uint32_t tmpSP;
+    if (mask == 0) mFailed = true;
+    for (uint8_t r = first; r <= last; ++r) {
+      if (mask & 1) {
+        if (r == R_SP) {
+          hasSP = true;
+          tmpSP = *ptrSP();
+        } else
+          mState[r] = *ptrSP();
+        vSP() += 4;
+        checkStackBase();
+        if (mFailed) return;
+      }
+      mask >>= 1;
+    }
+    if (hasSP) {
+      vSP() = tmpSP;
+      checkStack();
+    }
+  }
+};
+
+bool EHState::unwind(const EHEntry* aEntry, const void* stackBasePtr) {
+  // The unwinding program cannot set SP to less than the initial value.
+  uint32_t stackLimit = mRegs[R_SP] - 4;
+  uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr);
+  EHInterp interp(*this, aEntry, stackLimit, stackBase);
+  return interp.unwind();
+}
+
+bool EHInterp::unwind() {
+  mState[R_PC] = 0;
+  checkStack();
+  while (!mFailed) {
+    uint8_t insn = next();
+#if DEBUG_EHABI_UNWIND
+    LOG("unwind insn = %02x", (unsigned)insn);
+#endif
+    // Try to put the common cases first.
+
+    // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4
+    // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4
+    if ((insn & M_ADDSP) == I_ADDSP) {
+      uint32_t offset = ((insn & 0x3f) << 2) + 4;
+      if (insn & 0x40) {
+        vSP() -= offset;
+        checkStackLimit();
+      } else {
+        vSP() += offset;
+        checkStackBase();
+      }
+      continue;
+    }
+
+    // 10100nnn: Pop r4-r[4+nnn]
+    // 10101nnn: Pop r4-r[4+nnn], r14
+    if ((insn & M_POPN) == I_POPN) {
+      uint8_t n = (insn & 0x07) + 1;
+      bool lr = insn & 0x08;
+      uint32_t* ptr = ptrSP();
+      vSP() += (n + (lr ? 1 : 0)) * 4;
+      checkStackBase();
+      for (uint8_t r = 4; r < 4 + n; ++r) mState[r] = *ptr++;
+      if (lr) mState[R_LR] = *ptr++;
+      continue;
+    }
+
+    // 1011000: Finish
+    if (insn == I_FINISH) {
+      if (mState[R_PC] == 0) {
+        mState[R_PC] = mState[R_LR];
+        // Non-standard change (bug 916106): Prevent the caller from
+        // re-using LR.  Since the caller is by definition not a leaf
+        // routine, it will have to restore LR from somewhere to
+        // return to its own caller, so we can safely zero it here.
+        // This makes a difference only if an error in unwinding
+        // (e.g., caused by starting from within a prologue/epilogue)
+        // causes us to load a pointer to a leaf routine as LR; if we
+        // don't do something, we'll go into an infinite loop of
+        // "returning" to that same function.
+        mState[R_LR] = 0;
+      }
+      return true;
+    }
+
+    // 1001nnnn: Set vsp = r[nnnn]
+    if ((insn & M_MOVSP) == I_MOVSP) {
+      vSP() = mState[insn & 0x0f];
+      checkStack();
+      continue;
+    }
+
+    // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD)
+    // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD)
+    if ((insn & M_POPFDD) == I_POPFDD) {
+      uint8_t n = (next() & 0x0f) + 1;
+      // Note: if the 16+ssss+cccc > 31, the encoding is reserved.
+      // As the space is currently unused, we don't try to check.
+      vSP() += 8 * n;
+      checkStackBase();
+      continue;
+    }
+
+    // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD)
+    if ((insn & M_POPFDD8) == I_POPFDD8) {
+      uint8_t n = (insn & 0x07) + 1;
+      vSP() += 8 * n;
+      checkStackBase();
+      continue;
+    }
+
+    // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2)
+    if (insn == I_ADDSPBIG) {
+      uint32_t acc = 0;
+      uint8_t shift = 0;
+      uint8_t byte;
+      do {
+        if (shift >= 32) return false;
+        byte = next();
+        acc |= (byte & 0x7f) << shift;
+        shift += 7;
+      } while (byte & 0x80);
+      uint32_t offset = 0x204 + (acc << 2);
+      // The calculations above could have overflowed.
+      // But the one we care about is this:
+      if (vSP() + offset < vSP()) mFailed = true;
+      vSP() += offset;
+      // ...so that this is the only other check needed:
+      checkStackBase();
+      continue;
+    }
+
+    // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4}
+    if ((insn & M_POPMASK) == I_POPMASK) {
+      popRange(4, 15, ((insn & 0x0f) << 8) | next());
+      continue;
+    }
+
+    // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0}
+    if (insn == I_POPLO) {
+      popRange(0, 3, next() & 0x0f);
+      continue;
+    }
+
+    // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX)
+    if (insn == I_POPFDX) {
+      uint8_t n = (next() & 0x0f) + 1;
+      vSP() += 8 * n + 4;
+      checkStackBase();
+      continue;
+    }
+
+    // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX)
+    if ((insn & M_POPFDX8) == I_POPFDX8) {
+      uint8_t n = (insn & 0x07) + 1;
+      vSP() += 8 * n + 4;
+      checkStackBase();
+      continue;
+    }
+
+    // unhandled instruction
+#ifdef DEBUG_EHABI_UNWIND
+    LOG("Unhandled EHABI instruction 0x%02x", insn);
+#endif
+    mFailed = true;
+  }
+  return false;
+}
+
+bool operator<(const EHTable& lhs, const EHTable& rhs) {
+  return lhs.startPC() < rhs.startPC();
+}
+
+// Async signal unsafe.
+EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables)
+    : mTables(aTables) {
+  std::sort(mTables.begin(), mTables.end());
+  DebugOnly<uint32_t> lastEnd = 0;
+  for (std::vector<EHTable>::iterator i = mTables.begin(); i != mTables.end();
+       ++i) {
+    MOZ_ASSERT(i->startPC() >= lastEnd);
+    mStarts.push_back(i->startPC());
+    lastEnd = i->endPC();
+  }
+}
+
+const EHTable* EHAddrSpace::lookup(uint32_t aPC) const {
+  ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) -
+                 mStarts.begin()) -
+                1;
+
+  if (i < 0 || aPC >= mTables[i].endPC()) return 0;
+  return &mTables[i];
+}
+
+const EHEntry* EHTable::lookup(uint32_t aPC) const {
+  MOZ_ASSERT(aPC >= mStartPC);
+  if (aPC >= mEndPC) return nullptr;
+
+  const EHEntry* begin = mEntriesBegin;
+  const EHEntry* end = mEntriesEnd;
+  MOZ_ASSERT(begin < end);
+  if (aPC < reinterpret_cast<uint32_t>(begin->startPC.compute()))
+    return nullptr;
+
+  while (end - begin > 1) {
+#ifdef EHABI_UNWIND_MORE_ASSERTS
+    if ((end - 1)->startPC.compute() < begin->startPC.compute()) {
+      MOZ_CRASH("unsorted exidx");
+    }
+#endif
+    const EHEntry* mid = begin + (end - begin) / 2;
+    if (aPC < reinterpret_cast<uint32_t>(mid->startPC.compute()))
+      end = mid;
+    else
+      begin = mid;
+  }
+  return begin;
+}
+
+#if MOZ_LITTLE_ENDIAN()
+static const unsigned char hostEndian = ELFDATA2LSB;
+#elif MOZ_BIG_ENDIAN()
+static const unsigned char hostEndian = ELFDATA2MSB;
+#else
+#  error "No endian?"
+#endif
+
+// Async signal unsafe: std::vector::reserve, std::string copy ctor.
+EHTable::EHTable(const void* aELF, size_t aSize, const std::string& aName)
+    : mStartPC(~0),  // largest uint32_t
+      mEndPC(0),
+      mEntriesBegin(nullptr),
+      mEntriesEnd(nullptr),
+      mName(aName) {
+  const uint32_t fileHeaderAddr = reinterpret_cast<uint32_t>(aELF);
+
+  if (aSize < sizeof(Elf32_Ehdr)) return;
+
+  const Elf32_Ehdr& file = *(reinterpret_cast<Elf32_Ehdr*>(fileHeaderAddr));
+  if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 ||
+      file.e_ident[EI_CLASS] != ELFCLASS32 ||
+      file.e_ident[EI_DATA] != hostEndian ||
+      file.e_ident[EI_VERSION] != EV_CURRENT || file.e_machine != EM_ARM ||
+      file.e_version != EV_CURRENT)
+    // e_flags?
+    return;
+
+  MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize);
+  const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0;
+  for (unsigned i = 0; i < file.e_phnum; ++i) {
+    const Elf32_Phdr& phdr = *(reinterpret_cast<Elf32_Phdr*>(
+        fileHeaderAddr + file.e_phoff + i * file.e_phentsize));
+    if (phdr.p_type == PT_ARM_EXIDX) {
+      exidxHdr = &phdr;
+    } else if (phdr.p_type == PT_LOAD) {
+      if (phdr.p_offset == 0) {
+        zeroHdr = &phdr;
+      }
+      if (phdr.p_flags & PF_X) {
+        mStartPC = std::min(mStartPC, phdr.p_vaddr);
+        mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz);
+      }
+    }
+  }
+  if (!exidxHdr) return;
+  if (!zeroHdr) return;
+  mBaseAddress = fileHeaderAddr - zeroHdr->p_vaddr;
+  mStartPC += mBaseAddress;
+  mEndPC += mBaseAddress;
+  mEntriesBegin =
+      reinterpret_cast<const EHEntry*>(mBaseAddress + exidxHdr->p_vaddr);
+  mEntriesEnd = reinterpret_cast<const EHEntry*>(
+      mBaseAddress + exidxHdr->p_vaddr + exidxHdr->p_memsz);
+}
+
+Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr);
+
+// Async signal safe; can fail if Update() hasn't returned yet.
+const EHAddrSpace* EHAddrSpace::Get() { return sCurrent; }
+
+// Collect unwinding information from loaded objects.  Calls after the
+// first have no effect.  Async signal unsafe.
+void EHAddrSpace::Update() {
+  const EHAddrSpace* space = sCurrent;
+  if (space) return;
+
+  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+  std::vector<EHTable> tables;
+
+  for (size_t i = 0; i < info.GetSize(); ++i) {
+    const SharedLibrary& lib = info.GetEntry(i);
+    // FIXME: This isn't correct if the start address isn't p_offset 0, because
+    // the start address will not point at the file header. But this is worked
+    // around by magic number checks in the EHTable constructor.
+    EHTable tab(reinterpret_cast<const void*>(lib.GetStart()),
+                lib.GetEnd() - lib.GetStart(), lib.GetDebugPath());
+    if (tab.isValid()) tables.push_back(tab);
+  }
+  space = new EHAddrSpace(tables);
+
+  if (!sCurrent.compareExchange(nullptr, space)) {
+    delete space;
+    space = sCurrent;
+  }
+}
+
+EHState::EHState(const mcontext_t& context) {
+#ifdef linux
+  mRegs[0] = context.arm_r0;
+  mRegs[1] = context.arm_r1;
+  mRegs[2] = context.arm_r2;
+  mRegs[3] = context.arm_r3;
+  mRegs[4] = context.arm_r4;
+  mRegs[5] = context.arm_r5;
+  mRegs[6] = context.arm_r6;
+  mRegs[7] = context.arm_r7;
+  mRegs[8] = context.arm_r8;
+  mRegs[9] = context.arm_r9;
+  mRegs[10] = context.arm_r10;
+  mRegs[11] = context.arm_fp;
+  mRegs[12] = context.arm_ip;
+  mRegs[13] = context.arm_sp;
+  mRegs[14] = context.arm_lr;
+  mRegs[15] = context.arm_pc;
+#else
+#  error "Unhandled OS for ARM EHABI unwinding"
+#endif
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/EHABIStackWalk.h b/mozglue/baseprofiler/core/EHABIStackWalk.h
new file mode 100644
index 0000000000..d5f4edc0d7
--- /dev/null
+++ b/mozglue/baseprofiler/core/EHABIStackWalk.h
@@ -0,0 +1,30 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI; see the comment at the top of
+ * the .cpp file for details.
+ */
+
+#ifndef mozilla_EHABIStackWalk_h__
+#define mozilla_EHABIStackWalk_h__
+
+#include <stddef.h>
+#include <ucontext.h>
+
+namespace mozilla {
+namespace baseprofiler {
+
+void EHABIStackWalkInit();
+
+size_t EHABIStackWalk(const mcontext_t& aContext, void* stackBase, void** aSPs,
+                      void** aPCs, size_t aNumFrames);
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif
diff --git a/mozglue/baseprofiler/core/PageInformation.cpp b/mozglue/baseprofiler/core/PageInformation.cpp
new file mode 100644
index 0000000000..7ce47e86d7
--- /dev/null
+++ b/mozglue/baseprofiler/core/PageInformation.cpp
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PageInformation.h"
+
+#include "BaseProfiler.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+PageInformation::PageInformation(uint64_t aBrowsingContextID,
+                                 uint64_t aInnerWindowID,
+                                 const std::string& aUrl,
+                                 uint64_t aEmbedderInnerWindowID)
+    : mBrowsingContextID(aBrowsingContextID),
+      mInnerWindowID(aInnerWindowID),
+      mUrl(aUrl),
+      mEmbedderInnerWindowID(aEmbedderInnerWindowID),
+      mRefCnt(0) {}
+
+bool PageInformation::Equals(PageInformation* aOtherPageInfo) const {
+  // It's enough to check inner window IDs because they are unique for each
+  // page. Therefore, we don't have to check browsing context ID or url.
+  return InnerWindowID() == aOtherPageInfo->InnerWindowID();
+}
+
+void PageInformation::StreamJSON(SpliceableJSONWriter& aWriter) const {
+  aWriter.StartObjectElement();
+  // Here, we are converting uint64_t to double. Both Browsing Context and Inner
+  // Window IDs are creating using `nsContentUtils::GenerateProcessSpecificId`,
+  // which is specifically designed to only use 53 of the 64 bits to be lossless
+  // when passed into and out of JS as a double.
+  aWriter.DoubleProperty("browsingContextID", BrowsingContextID());
+  aWriter.DoubleProperty("innerWindowID", InnerWindowID());
+  aWriter.StringProperty("url", Url());
+  aWriter.DoubleProperty("embedderInnerWindowID", EmbedderInnerWindowID());
+  aWriter.EndObject();
+}
+
+size_t PageInformation::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+  return aMallocSizeOf(this);
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/PageInformation.h b/mozglue/baseprofiler/core/PageInformation.h
new file mode 100644
index 0000000000..158b172bdf
--- /dev/null
+++ b/mozglue/baseprofiler/core/PageInformation.h
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PageInformation_h
+#define PageInformation_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MemoryReporting.h"
+
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class SpliceableJSONWriter;
+
+// This class contains information that's relevant to a single page only
+// while the page information is important and registered with the profiler,
+// but regardless of whether the profiler is running. All accesses to it are
+// protected by the profiler state lock.
+// When the page gets unregistered, we keep the profiler buffer position
+// to determine if we are still using this page. If not, we unregister
+// it in the next page registration.
+class PageInformation final {
+ public:
+  PageInformation(uint64_t aBrowsingContextID, uint64_t aInnerWindowID,
+                  const std::string& aUrl, uint64_t aEmbedderInnerWindowID);
+
+  // Using hand-rolled ref-counting, because RefCounted.h macros don't produce
+  // the same code between mozglue and libxul, see bug 1536656.
+  MFBT_API void AddRef() const { ++mRefCnt; }
+  MFBT_API void Release() const {
+    MOZ_ASSERT(int32_t(mRefCnt) > 0);
+    if (--mRefCnt) {
+      delete this;
+    }
+  }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+  bool Equals(PageInformation* aOtherPageInfo) const;
+  void StreamJSON(SpliceableJSONWriter& aWriter) const;
+
+  uint64_t InnerWindowID() const { return mInnerWindowID; }
+  uint64_t BrowsingContextID() const { return mBrowsingContextID; }
+  const std::string& Url() const { return mUrl; }
+  uint64_t EmbedderInnerWindowID() const { return mEmbedderInnerWindowID; }
+
+  Maybe<uint64_t> BufferPositionWhenUnregistered() const {
+    return mBufferPositionWhenUnregistered;
+  }
+
+  void NotifyUnregistered(uint64_t aBufferPosition) {
+    mBufferPositionWhenUnregistered = Some(aBufferPosition);
+  }
+
+ private:
+  const uint64_t mBrowsingContextID;
+  const uint64_t mInnerWindowID;
+  const std::string mUrl;
+  const uint64_t mEmbedderInnerWindowID;
+
+  // Holds the buffer position when page is unregistered.
+  // It's used to determine if we still use this page in the profiler or
+  // not.
+  Maybe<uint64_t> mBufferPositionWhenUnregistered;
+
+  mutable Atomic<int32_t, MemoryOrdering::ReleaseAcquire> mRefCnt;
+};
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // PageInformation_h
diff --git a/mozglue/baseprofiler/core/PlatformMacros.h b/mozglue/baseprofiler/core/PlatformMacros.h
new file mode 100644
index 0000000000..c72e94c128
--- /dev/null
+++ b/mozglue/baseprofiler/core/PlatformMacros.h
@@ -0,0 +1,130 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PLATFORM_MACROS_H
+#define PLATFORM_MACROS_H
+
+// Define platform selection macros in a consistent way. Don't add anything
+// else to this file, so it can remain freestanding. The primary factorisation
+// is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ macros are defined
+// too, since they are sometimes convenient.
+//
+// Note: "GP" is short for "Gecko Profiler".
+
+#undef GP_PLAT_x86_android
+#undef GP_PLAT_amd64_android
+#undef GP_PLAT_arm_android
+#undef GP_PLAT_arm64_android
+#undef GP_PLAT_x86_linux
+#undef GP_PLAT_amd64_linux
+#undef GP_PLAT_arm_linux
+#undef GP_PLAT_mips64_linux
+#undef GP_PLAT_amd64_darwin
+#undef GP_PLAT_arm64_darwin
+#undef GP_PLAT_x86_windows
+#undef GP_PLAT_amd64_windows
+#undef GP_PLAT_arm64_windows
+
+#undef GP_ARCH_x86
+#undef GP_ARCH_amd64
+#undef GP_ARCH_arm
+#undef GP_ARCH_arm64
+#undef GP_ARCH_mips64
+
+#undef GP_OS_android
+#undef GP_OS_linux
+#undef GP_OS_darwin
+#undef GP_OS_windows
+
+// We test __ANDROID__ before __linux__ because __linux__ is defined on both
+// Android and Linux, whereas GP_OS_android is not defined on vanilla Linux.
+
+#if defined(__ANDROID__) && defined(__i386__)
+#  define GP_PLAT_x86_android 1
+#  define GP_ARCH_x86 1
+#  define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__x86_64__)
+#  define GP_PLAT_amd64_android 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__arm__)
+#  define GP_PLAT_arm_android 1
+#  define GP_ARCH_arm 1
+#  define GP_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__aarch64__)
+#  define GP_PLAT_arm64_android 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_android 1
+
+#elif defined(__linux__) && defined(__i386__)
+#  define GP_PLAT_x86_linux 1
+#  define GP_ARCH_x86 1
+#  define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__x86_64__)
+#  define GP_PLAT_amd64_linux 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__arm__)
+#  define GP_PLAT_arm_linux 1
+#  define GP_ARCH_arm 1
+#  define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__aarch64__)
+#  define GP_PLAT_arm64_linux 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_linux 1
+
+#elif defined(__linux__) && defined(__mips64)
+#  define GP_PLAT_mips64_linux 1
+#  define GP_ARCH_mips64 1
+#  define GP_OS_linux 1
+
+#elif defined(__APPLE__) && defined(__aarch64__)
+#  define GP_PLAT_arm64_darwin 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_darwin 1
+
+#elif defined(__APPLE__) && defined(__x86_64__)
+#  define GP_PLAT_amd64_darwin 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_darwin 1
+
+#elif defined(__FreeBSD__) && defined(__x86_64__)
+#  define GP_PLAT_amd64_freebsd 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_freebsd 1
+
+#elif defined(__FreeBSD__) && defined(__aarch64__)
+#  define GP_PLAT_arm64_freebsd 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_freebsd 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \
+    (defined(_M_IX86) || defined(__i386__))
+#  define GP_PLAT_x86_windows 1
+#  define GP_ARCH_x86 1
+#  define GP_OS_windows 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && \
+    (defined(_M_X64) || defined(__x86_64__))
+#  define GP_PLAT_amd64_windows 1
+#  define GP_ARCH_amd64 1
+#  define GP_OS_windows 1
+
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+#  define GP_PLAT_arm64_windows 1
+#  define GP_ARCH_arm64 1
+#  define GP_OS_windows 1
+
+#else
+#  error "Unsupported platform"
+#endif
+
+#endif /* ndef PLATFORM_MACROS_H */
diff --git a/mozglue/baseprofiler/core/ProfileBuffer.cpp b/mozglue/baseprofiler/core/ProfileBuffer.cpp
new file mode 100644
index 0000000000..f39244ee91
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBuffer.cpp
@@ -0,0 +1,210 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBuffer.h"
+
+#include "mozilla/MathAlgorithms.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfileBuffer::ProfileBuffer(ProfileChunkedBuffer& aBuffer)
+    : mEntries(aBuffer) {
+  // Assume the given buffer is in-session.
+  MOZ_ASSERT(mEntries.IsInSession());
+}
+
+/* static */
+ProfileBufferBlockIndex ProfileBuffer::AddEntry(
+    ProfileChunkedBuffer& aProfileChunkedBuffer,
+    const ProfileBufferEntry& aEntry) {
+  switch (aEntry.GetKind()) {
+#define SWITCH_KIND(KIND, TYPE, SIZE)                          \
+  case ProfileBufferEntry::Kind::KIND: {                       \
+    return aProfileChunkedBuffer.PutFrom(&aEntry, 1 + (SIZE)); \
+    break;                                                     \
+  }
+
+    FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(SWITCH_KIND)
+
+#undef SWITCH_KIND
+    default:
+      MOZ_ASSERT(false, "Unhandled baseprofiler::ProfilerBuffer entry KIND");
+      return ProfileBufferBlockIndex{};
+  }
+}
+
+// Called from signal, call only reentrant functions
+uint64_t ProfileBuffer::AddEntry(const ProfileBufferEntry& aEntry) {
+  return AddEntry(mEntries, aEntry).ConvertToProfileBufferIndex();
+}
+
+/* static */
+ProfileBufferBlockIndex ProfileBuffer::AddThreadIdEntry(
+    ProfileChunkedBuffer& aProfileChunkedBuffer, int aThreadId) {
+  return AddEntry(aProfileChunkedBuffer,
+                  ProfileBufferEntry::ThreadId(aThreadId));
+}
+
+uint64_t ProfileBuffer::AddThreadIdEntry(int aThreadId) {
+  return AddThreadIdEntry(mEntries, aThreadId).ConvertToProfileBufferIndex();
+}
+
+void ProfileBuffer::CollectCodeLocation(
+    const char* aLabel, const char* aStr, uint32_t aFrameFlags,
+    uint64_t aInnerWindowID, const Maybe<uint32_t>& aLineNumber,
+    const Maybe<uint32_t>& aColumnNumber,
+    const Maybe<ProfilingCategoryPair>& aCategoryPair) {
+  AddEntry(ProfileBufferEntry::Label(aLabel));
+  AddEntry(ProfileBufferEntry::FrameFlags(uint64_t(aFrameFlags)));
+
+  if (aStr) {
+    // Store the string using one or more DynamicStringFragment entries.
+    size_t strLen = strlen(aStr) + 1;  // +1 for the null terminator
+    // If larger than the prescribed limit, we will cut the string and end it
+    // with an ellipsis.
+    const bool tooBig = strLen > kMaxFrameKeyLength;
+    if (tooBig) {
+      strLen = kMaxFrameKeyLength;
+    }
+    char chars[ProfileBufferEntry::kNumChars];
+    for (size_t j = 0;; j += ProfileBufferEntry::kNumChars) {
+      // Store up to kNumChars characters in the entry.
+      size_t len = ProfileBufferEntry::kNumChars;
+      const bool last = j + len >= strLen;
+      if (last) {
+        // Only the last entry may be smaller than kNumChars.
+        len = strLen - j;
+        if (tooBig) {
+          // That last entry is part of a too-big string, replace the end
+          // characters with an ellipsis "...".
+          len = std::max(len, size_t(4));
+          chars[len - 4] = '.';
+          chars[len - 3] = '.';
+          chars[len - 2] = '.';
+          chars[len - 1] = '\0';
+          // Make sure the memcpy will not overwrite our ellipsis!
+          len -= 4;
+        }
+      }
+      memcpy(chars, &aStr[j], len);
+      AddEntry(ProfileBufferEntry::DynamicStringFragment(chars));
+      if (last) {
+        break;
+      }
+    }
+  }
+
+  if (aInnerWindowID) {
+    AddEntry(ProfileBufferEntry::InnerWindowID(aInnerWindowID));
+  }
+
+  if (aLineNumber) {
+    AddEntry(ProfileBufferEntry::LineNumber(*aLineNumber));
+  }
+
+  if (aColumnNumber) {
+    AddEntry(ProfileBufferEntry::ColumnNumber(*aColumnNumber));
+  }
+
+  if (aCategoryPair.isSome()) {
+    AddEntry(ProfileBufferEntry::CategoryPair(int(*aCategoryPair)));
+  }
+}
+
+size_t ProfileBuffer::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+  // Measurement of the following members may be added later if DMD finds it
+  // is worthwhile:
+  // - memory pointed to by the elements within mEntries
+  return mEntries.SizeOfExcludingThis(aMallocSizeOf);
+}
+
+size_t ProfileBuffer::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+  return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+}
+
+void ProfileBuffer::CollectOverheadStats(TimeDuration aSamplingTime,
+                                         TimeDuration aLocking,
+                                         TimeDuration aCleaning,
+                                         TimeDuration aCounters,
+                                         TimeDuration aThreads) {
+  double timeUs = aSamplingTime.ToMilliseconds() * 1000.0;
+  if (mFirstSamplingTimeUs == 0.0) {
+    mFirstSamplingTimeUs = timeUs;
+  } else {
+    // Note that we'll have 1 fewer interval than other numbers (because
+    // we need both ends of an interval to know its duration). The final
+    // difference should be insignificant over the expected many thousands
+    // of iterations.
+    mIntervalsUs.Count(timeUs - mLastSamplingTimeUs);
+  }
+  mLastSamplingTimeUs = timeUs;
+  // Time to take the lock before sampling.
+  double lockingUs = aLocking.ToMilliseconds() * 1000.0;
+  // Time to discard expired data.
+  double cleaningUs = aCleaning.ToMilliseconds() * 1000.0;
+  // Time to gather all counters.
+  double countersUs = aCounters.ToMilliseconds() * 1000.0;
+  // Time to sample all threads.
+  double threadsUs = aThreads.ToMilliseconds() * 1000.0;
+
+  // Add to our gathered stats.
+  mOverheadsUs.Count(lockingUs + cleaningUs + countersUs + threadsUs);
+  mLockingsUs.Count(lockingUs);
+  mCleaningsUs.Count(cleaningUs);
+  mCountersUs.Count(countersUs);
+  mThreadsUs.Count(threadsUs);
+
+  // Record details in buffer.
+  AddEntry(ProfileBufferEntry::ProfilerOverheadTime(timeUs));
+  AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(lockingUs));
+  AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(cleaningUs));
+  AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(countersUs));
+  AddEntry(ProfileBufferEntry::ProfilerOverheadDuration(threadsUs));
+}
+
+ProfilerBufferInfo ProfileBuffer::GetProfilerBufferInfo() const {
+  return {BufferRangeStart(),
+          BufferRangeEnd(),
+          static_cast<uint32_t>(*mEntries.BufferLength() /
+                                8),  // 8 bytes per entry.
+          mIntervalsUs,
+          mOverheadsUs,
+          mLockingsUs,
+          mCleaningsUs,
+          mCountersUs,
+          mThreadsUs};
+}
+
+/* ProfileBufferCollector */
+
+void ProfileBufferCollector::CollectNativeLeafAddr(void* aAddr) {
+  mBuf.AddEntry(ProfileBufferEntry::NativeLeafAddr(aAddr));
+}
+
+void ProfileBufferCollector::CollectProfilingStackFrame(
+    const ProfilingStackFrame& aFrame) {
+  // WARNING: this function runs within the profiler's "critical section".
+
+  MOZ_ASSERT(aFrame.isLabelFrame() ||
+             (aFrame.isJsFrame() && !aFrame.isOSRFrame()));
+
+  const char* label = aFrame.label();
+  const char* dynamicString = aFrame.dynamicString();
+  Maybe<uint32_t> line;
+  Maybe<uint32_t> column;
+
+  MOZ_ASSERT(aFrame.isLabelFrame());
+
+  mBuf.CollectCodeLocation(label, dynamicString, aFrame.flags(),
+                           aFrame.realmID(), line, column,
+                           Some(aFrame.categoryPair()));
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfileBuffer.h b/mozglue/baseprofiler/core/ProfileBuffer.h
new file mode 100644
index 0000000000..b7a0af5e93
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBuffer.h
@@ -0,0 +1,186 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PROFILE_BUFFER_H
+#define MOZ_PROFILE_BUFFER_H
+
+#include "ProfileBufferEntry.h"
+
+#include "mozilla/Maybe.h"
+#include "mozilla/PowerOfTwo.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// Class storing most profiling data in a ProfileChunkedBuffer.
+//
+// This class is used as a queue of entries which, after construction, never
+// allocates. This makes it safe to use in the profiler's "critical section".
+class ProfileBuffer final {
+ public:
+  // ProfileBuffer constructor
+  // @param aBuffer The in-session ProfileChunkedBuffer to use as buffer
+  // manager.
+  explicit ProfileBuffer(ProfileChunkedBuffer& aBuffer);
+
+  ProfileChunkedBuffer& UnderlyingChunkedBuffer() const { return mEntries; }
+
+  bool IsThreadSafe() const { return mEntries.IsThreadSafe(); }
+
+  // Add |aEntry| to the buffer, ignoring what kind of entry it is.
+  // Returns the position of the entry.
+  uint64_t AddEntry(const ProfileBufferEntry& aEntry);
+
+  // Add to the buffer a sample start (ThreadId) entry for aThreadId.
+  // Returns the position of the entry.
+  uint64_t AddThreadIdEntry(int aThreadId);
+
+  void CollectCodeLocation(const char* aLabel, const char* aStr,
+                           uint32_t aFrameFlags, uint64_t aInnerWindowID,
+                           const Maybe<uint32_t>& aLineNumber,
+                           const Maybe<uint32_t>& aColumnNumber,
+                           const Maybe<ProfilingCategoryPair>& aCategoryPair);
+
+  // Maximum size of a frameKey string that we'll handle.
+  static const size_t kMaxFrameKeyLength = 512;
+
+  // Stream JSON for samples in the buffer to aWriter, using the supplied
+  // UniqueStacks object.
+  // Only streams samples for the given thread ID and which were taken at or
+  // after aSinceTime. If ID is 0, ignore the stored thread ID; this should only
+  // be used when the buffer contains only one sample.
+  // Return the thread ID of the streamed sample(s), or 0.
+  int StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
+                          double aSinceTime, UniqueStacks& aUniqueStacks) const;
+
+  void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
+                           const TimeStamp& aProcessStartTime,
+                           double aSinceTime,
+                           UniqueStacks& aUniqueStacks) const;
+  void StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter,
+                                double aSinceTime) const;
+  void StreamProfilerOverheadToJSON(SpliceableJSONWriter& aWriter,
+                                    const TimeStamp& aProcessStartTime,
+                                    double aSinceTime) const;
+  void StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+                            const TimeStamp& aProcessStartTime,
+                            double aSinceTime) const;
+
+  // Find (via |aLastSample|) the most recent sample for the thread denoted by
+  // |aThreadId| and clone it, patching in the current time as appropriate.
+  // Mutate |aLastSample| to point to the newly inserted sample.
+  // Returns whether duplication was successful.
+  bool DuplicateLastSample(int aThreadId, const TimeStamp& aProcessStartTime,
+                           Maybe<uint64_t>& aLastSample);
+
+  void DiscardSamplesBeforeTime(double aTime);
+
+  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const;
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+
+  void CollectOverheadStats(TimeDuration aSamplingTime, TimeDuration aLocking,
+                            TimeDuration aCleaning, TimeDuration aCounters,
+                            TimeDuration aThreads);
+
+  ProfilerBufferInfo GetProfilerBufferInfo() const;
+
+ private:
+  // Add |aEntry| to the provider ProfileChunkedBuffer.
+  // `static` because it may be used to add an entry to a `ProfileChunkedBuffer`
+  // that is not attached to a `ProfileBuffer`.
+  static ProfileBufferBlockIndex AddEntry(
+      ProfileChunkedBuffer& aProfileChunkedBuffer,
+      const ProfileBufferEntry& aEntry);
+
+  // Add a sample start (ThreadId) entry for aThreadId to the provided
+  // ProfileChunkedBuffer. Returns the position of the entry.
+  // `static` because it may be used to add an entry to a `ProfileChunkedBuffer`
+  // that is not attached to a `ProfileBuffer`.
+  static ProfileBufferBlockIndex AddThreadIdEntry(
+      ProfileChunkedBuffer& aProfileChunkedBuffer, int aThreadId);
+
+  // The storage in which this ProfileBuffer stores its entries.
+  ProfileChunkedBuffer& mEntries;
+
+ public:
+  // `BufferRangeStart()` and `BufferRangeEnd()` return `uint64_t` values
+  // corresponding to the first entry and past the last entry stored in
+  // `mEntries`.
+  //
+  // The returned values are not guaranteed to be stable, because other threads
+  // may also be accessing the buffer concurrently. But they will always
+  // increase, and can therefore give an indication of how far these values have
+  // *at least* reached. In particular:
+  // - Entries whose index is strictly less that `BufferRangeStart()` have been
+  //   discarded by now, so any related data may also be safely discarded.
+  // - It is safe to try and read entries at any index strictly less than
+  //   `BufferRangeEnd()` -- but note that these reads may fail by the time you
+  //   request them, as old entries get overwritten by new ones.
+  uint64_t BufferRangeStart() const { return mEntries.GetState().mRangeStart; }
+  uint64_t BufferRangeEnd() const { return mEntries.GetState().mRangeEnd; }
+
+ private:
+  // Single pre-allocated chunk (to avoid spurious mallocs), used when:
+  // - Duplicating sleeping stacks (hence scExpectedMaximumStackSize).
+  // - Adding JIT info.
+  // - Streaming stacks to JSON.
+  // Mutable because it's accessed from non-multithreaded const methods.
+  mutable ProfileBufferChunkManagerSingle mWorkerChunkManager{
+      ProfileBufferChunk::Create(
+          ProfileBufferChunk::SizeofChunkMetadata() +
+          ProfileBufferChunkManager::scExpectedMaximumStackSize)};
+
+  // Time from launch (us) when first sampling was recorded.
+  double mFirstSamplingTimeUs = 0.0;
+  // Time from launch (us) when last sampling was recorded.
+  double mLastSamplingTimeUs = 0.0;
+  // Sampling stats: Interval (us) between successive samplings.
+  ProfilerStats mIntervalsUs;
+  // Sampling stats: Total duration (us) of each sampling. (Split detail below.)
+  ProfilerStats mOverheadsUs;
+  // Sampling stats: Time (us) to acquire the lock before sampling.
+  ProfilerStats mLockingsUs;
+  // Sampling stats: Time (us) to discard expired data.
+  ProfilerStats mCleaningsUs;
+  // Sampling stats: Time (us) to collect counter data.
+  ProfilerStats mCountersUs;
+  // Sampling stats: Time (us) to sample thread stacks.
+  ProfilerStats mThreadsUs;
+};
+
+/**
+ * Helper type used to implement ProfilerStackCollector. This type is used as
+ * the collector for MergeStacks by ProfileBuffer. It holds a reference to the
+ * buffer, as well as additional feature flags which are needed to control the
+ * data collection strategy
+ */
+class ProfileBufferCollector final : public ProfilerStackCollector {
+ public:
+  ProfileBufferCollector(ProfileBuffer& aBuf, uint64_t aSamplePos)
+      : mBuf(aBuf), mSamplePositionInBuffer(aSamplePos) {}
+
+  Maybe<uint64_t> SamplePositionInBuffer() override {
+    return Some(mSamplePositionInBuffer);
+  }
+
+  Maybe<uint64_t> BufferRangeStart() override {
+    return Some(mBuf.BufferRangeStart());
+  }
+
+  virtual void CollectNativeLeafAddr(void* aAddr) override;
+  virtual void CollectProfilingStackFrame(
+      const ProfilingStackFrame& aFrame) override;
+
+ private:
+  ProfileBuffer& mBuf;
+  uint64_t mSamplePositionInBuffer;
+};
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif
diff --git a/mozglue/baseprofiler/core/ProfileBufferEntry.cpp b/mozglue/baseprofiler/core/ProfileBufferEntry.cpp
new file mode 100644
index 0000000000..3d3f68b655
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBufferEntry.cpp
@@ -0,0 +1,1337 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBufferEntry.h"
+
+#include <ostream>
+#include <type_traits>
+
+#include "mozilla/Logging.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/StackWalk.h"
+
+#include "BaseProfiler.h"
+#include "mozilla/BaseProfilerMarkers.h"
+#include "platform.h"
+#include "ProfileBuffer.h"
+#include "ProfilerBacktrace.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN ProfileBufferEntry
+
+ProfileBufferEntry::ProfileBufferEntry()
+    : mKind(Kind::INVALID), mStorage{0, 0, 0, 0, 0, 0, 0, 0} {}
+
+// aString must be a static string.
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, const char* aString)
+    : mKind(aKind) {
+  memcpy(mStorage, &aString, sizeof(aString));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, char aChars[kNumChars])
+    : mKind(aKind) {
+  memcpy(mStorage, aChars, kNumChars);
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, void* aPtr) : mKind(aKind) {
+  memcpy(mStorage, &aPtr, sizeof(aPtr));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, double aDouble)
+    : mKind(aKind) {
+  memcpy(mStorage, &aDouble, sizeof(aDouble));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int aInt) : mKind(aKind) {
+  memcpy(mStorage, &aInt, sizeof(aInt));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, int64_t aInt64)
+    : mKind(aKind) {
+  memcpy(mStorage, &aInt64, sizeof(aInt64));
+}
+
+ProfileBufferEntry::ProfileBufferEntry(Kind aKind, uint64_t aUint64)
+    : mKind(aKind) {
+  memcpy(mStorage, &aUint64, sizeof(aUint64));
+}
+
+const char* ProfileBufferEntry::GetString() const {
+  const char* result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+void* ProfileBufferEntry::GetPtr() const {
+  void* result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+double ProfileBufferEntry::GetDouble() const {
+  double result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+int ProfileBufferEntry::GetInt() const {
+  int result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+int64_t ProfileBufferEntry::GetInt64() const {
+  int64_t result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+uint64_t ProfileBufferEntry::GetUint64() const {
+  uint64_t result;
+  memcpy(&result, mStorage, sizeof(result));
+  return result;
+}
+
+void ProfileBufferEntry::CopyCharsInto(char (&aOutArray)[kNumChars]) const {
+  memcpy(aOutArray, mStorage, kNumChars);
+}
+
+// END ProfileBufferEntry
+////////////////////////////////////////////////////////////////////////
+
+// As mentioned in ProfileBufferEntry.h, the JSON format contains many
+// arrays whose elements are laid out according to various schemas to help
+// de-duplication. This RAII class helps write these arrays by keeping track of
+// the last non-null element written and adding the appropriate number of null
+// elements when writing new non-null elements. It also automatically opens and
+// closes an array element on the given JSON writer.
+//
+// You grant the AutoArraySchemaWriter exclusive access to the JSONWriter and
+// the UniqueJSONStrings objects for the lifetime of AutoArraySchemaWriter. Do
+// not access them independently while the AutoArraySchemaWriter is alive.
+// If you need to add complex objects, call FreeFormElement(), which will give
+// you temporary access to the writer.
+//
+// Example usage:
+//
+//     // Define the schema of elements in this type of array: [FOO, BAR, BAZ]
+//     enum Schema : uint32_t {
+//       FOO = 0,
+//       BAR = 1,
+//       BAZ = 2
+//     };
+//
+//     AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings);
+//     if (shouldWriteFoo) {
+//       writer.IntElement(FOO, getFoo());
+//     }
+//     ... etc ...
+//
+//     The elements need to be added in-order.
+class MOZ_RAII AutoArraySchemaWriter {
+ public:
+  explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter)
+      : mJSONWriter(aWriter), mNextFreeIndex(0) {
+    mJSONWriter.StartArrayElement(SpliceableJSONWriter::SingleLineStyle);
+  }
+
+  ~AutoArraySchemaWriter() { mJSONWriter.EndArray(); }
+
+  template <typename T>
+  void IntElement(uint32_t aIndex, T aValue) {
+    static_assert(!std::is_same_v<T, uint64_t>,
+                  "Narrowing uint64 -> int64 conversion not allowed");
+    FillUpTo(aIndex);
+    mJSONWriter.IntElement(static_cast<int64_t>(aValue));
+  }
+
+  void DoubleElement(uint32_t aIndex, double aValue) {
+    FillUpTo(aIndex);
+    mJSONWriter.DoubleElement(aValue);
+  }
+
+  void BoolElement(uint32_t aIndex, bool aValue) {
+    FillUpTo(aIndex);
+    mJSONWriter.BoolElement(aValue);
+  }
+
+ protected:
+  SpliceableJSONWriter& Writer() { return mJSONWriter; }
+
+  void FillUpTo(uint32_t aIndex) {
+    MOZ_ASSERT(aIndex >= mNextFreeIndex);
+    mJSONWriter.NullElements(aIndex - mNextFreeIndex);
+    mNextFreeIndex = aIndex + 1;
+  }
+
+ private:
+  SpliceableJSONWriter& mJSONWriter;
+  uint32_t mNextFreeIndex;
+};
+
+// Same as AutoArraySchemaWriter, but this can also write strings (output as
+// indexes into the table of unique strings).
+class MOZ_RAII AutoArraySchemaWithStringsWriter : public AutoArraySchemaWriter {
+ public:
+  AutoArraySchemaWithStringsWriter(SpliceableJSONWriter& aWriter,
+                                   UniqueJSONStrings& aStrings)
+      : AutoArraySchemaWriter(aWriter), mStrings(aStrings) {}
+
+  void StringElement(uint32_t aIndex, const Span<const char>& aValue) {
+    FillUpTo(aIndex);
+    mStrings.WriteElement(Writer(), aValue);
+  }
+
+ private:
+  UniqueJSONStrings& mStrings;
+};
+
+UniqueStacks::StackKey UniqueStacks::BeginStack(const FrameKey& aFrame) {
+  return StackKey(GetOrAddFrameIndex(aFrame));
+}
+
+UniqueStacks::StackKey UniqueStacks::AppendFrame(const StackKey& aStack,
+                                                 const FrameKey& aFrame) {
+  return StackKey(aStack, GetOrAddStackIndex(aStack),
+                  GetOrAddFrameIndex(aFrame));
+}
+
+bool UniqueStacks::FrameKey::NormalFrameData::operator==(
+    const NormalFrameData& aOther) const {
+  return mLocation == aOther.mLocation &&
+         mRelevantForJS == aOther.mRelevantForJS &&
+         mInnerWindowID == aOther.mInnerWindowID && mLine == aOther.mLine &&
+         mColumn == aOther.mColumn && mCategoryPair == aOther.mCategoryPair;
+}
+
+UniqueStacks::UniqueStacks() : mUniqueStrings(MakeUnique<UniqueJSONStrings>()) {
+  mFrameTableWriter.StartBareList();
+  mStackTableWriter.StartBareList();
+}
+
+uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) {
+  uint32_t count = mStackToIndexMap.count();
+  auto entry = mStackToIndexMap.lookupForAdd(aStack);
+  if (entry) {
+    MOZ_ASSERT(entry->value() < count);
+    return entry->value();
+  }
+
+  MOZ_RELEASE_ASSERT(mStackToIndexMap.add(entry, aStack, count));
+  StreamStack(aStack);
+  return count;
+}
+
+uint32_t UniqueStacks::GetOrAddFrameIndex(const FrameKey& aFrame) {
+  uint32_t count = mFrameToIndexMap.count();
+  auto entry = mFrameToIndexMap.lookupForAdd(aFrame);
+  if (entry) {
+    MOZ_ASSERT(entry->value() < count);
+    return entry->value();
+  }
+
+  MOZ_RELEASE_ASSERT(mFrameToIndexMap.add(entry, aFrame, count));
+  StreamNonJITFrame(aFrame);
+  return count;
+}
+
+void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) {
+  mFrameTableWriter.EndBareList();
+  aWriter.TakeAndSplice(mFrameTableWriter.TakeChunkedWriteFunc());
+}
+
+void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) {
+  mStackTableWriter.EndBareList();
+  aWriter.TakeAndSplice(mStackTableWriter.TakeChunkedWriteFunc());
+}
+
+void UniqueStacks::StreamStack(const StackKey& aStack) {
+  enum Schema : uint32_t { PREFIX = 0, FRAME = 1 };
+
+  AutoArraySchemaWriter writer(mStackTableWriter);
+  if (aStack.mPrefixStackIndex.isSome()) {
+    writer.IntElement(PREFIX, *aStack.mPrefixStackIndex);
+  }
+  writer.IntElement(FRAME, aStack.mFrameIndex);
+}
+
+void UniqueStacks::StreamNonJITFrame(const FrameKey& aFrame) {
+  using NormalFrameData = FrameKey::NormalFrameData;
+
+  enum Schema : uint32_t {
+    LOCATION = 0,
+    RELEVANT_FOR_JS = 1,
+    INNER_WINDOW_ID = 2,
+    IMPLEMENTATION = 3,
+    OPTIMIZATIONS = 4,
+    LINE = 5,
+    COLUMN = 6,
+    CATEGORY = 7,
+    SUBCATEGORY = 8
+  };
+
+  AutoArraySchemaWithStringsWriter writer(mFrameTableWriter, *mUniqueStrings);
+
+  const NormalFrameData& data = aFrame.mData.as<NormalFrameData>();
+  writer.StringElement(LOCATION, data.mLocation);
+  writer.BoolElement(RELEVANT_FOR_JS, data.mRelevantForJS);
+
+  // It's okay to convert uint64_t to double here because DOM always creates IDs
+  // that are convertible to double.
+  writer.DoubleElement(INNER_WINDOW_ID, data.mInnerWindowID);
+
+  if (data.mLine.isSome()) {
+    writer.IntElement(LINE, *data.mLine);
+  }
+  if (data.mColumn.isSome()) {
+    writer.IntElement(COLUMN, *data.mColumn);
+  }
+  if (data.mCategoryPair.isSome()) {
+    const ProfilingCategoryPairInfo& info =
+        GetProfilingCategoryPairInfo(*data.mCategoryPair);
+    writer.IntElement(CATEGORY, uint32_t(info.mCategory));
+    writer.IntElement(SUBCATEGORY, info.mSubcategoryIndex);
+  }
+}
+
+struct CStringWriteFunc : public JSONWriteFunc {
+  std::string& mBuffer;  // The struct must not outlive this buffer
+  explicit CStringWriteFunc(std::string& aBuffer) : mBuffer(aBuffer) {}
+
+  void Write(const Span<const char>& aStr) override {
+    mBuffer.append(aStr.data(), aStr.size());
+  }
+};
+
+struct ProfileSample {
+  uint32_t mStack;
+  double mTime;
+  Maybe<double> mResponsiveness;
+};
+
+static void WriteSample(SpliceableJSONWriter& aWriter,
+                        const ProfileSample& aSample) {
+  enum Schema : uint32_t {
+    STACK = 0,
+    TIME = 1,
+    EVENT_DELAY = 2,
+  };
+
+  AutoArraySchemaWriter writer(aWriter);
+
+  writer.IntElement(STACK, aSample.mStack);
+
+  writer.DoubleElement(TIME, aSample.mTime);
+
+  if (aSample.mResponsiveness.isSome()) {
+    writer.DoubleElement(EVENT_DELAY, *aSample.mResponsiveness);
+  }
+}
+
+class EntryGetter {
+ public:
+  explicit EntryGetter(ProfileChunkedBuffer::Reader& aReader,
+                       uint64_t aInitialReadPos = 0)
+      : mBlockIt(
+            aReader.At(ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                aInitialReadPos))),
+        mBlockItEnd(aReader.end()) {
+    if (!ReadLegacyOrEnd()) {
+      // Find and read the next non-legacy entry.
+      Next();
+    }
+  }
+
+  bool Has() const { return mBlockIt != mBlockItEnd; }
+
+  const ProfileBufferEntry& Get() const {
+    MOZ_ASSERT(Has(), "Caller should have checked `Has()` before `Get()`");
+    return mEntry;
+  }
+
+  void Next() {
+    MOZ_ASSERT(Has(), "Caller should have checked `Has()` before `Next()`");
+    for (;;) {
+      ++mBlockIt;
+      if (ReadLegacyOrEnd()) {
+        // Either we're at the end, or we could read a legacy entry -> Done.
+        break;
+      }
+      // Otherwise loop around until we hit the end or a legacy entry.
+    }
+  }
+
+  ProfileBufferBlockIndex CurBlockIndex() const {
+    return mBlockIt.CurrentBlockIndex();
+  }
+
+  uint64_t CurPos() const {
+    return CurBlockIndex().ConvertToProfileBufferIndex();
+  }
+
+ private:
+  // Try to read the entry at the current `mBlockIt` position.
+  // * If we're at the end of the buffer, just return `true`.
+  // * If there is a "legacy" entry (containing a real `ProfileBufferEntry`),
+  //   read it into `mEntry`, and return `true` as well.
+  // * Otherwise the entry contains a "modern" type that cannot be read into
+  // `mEntry`, return `false` (so `EntryGetter` can skip to another entry).
+  bool ReadLegacyOrEnd() {
+    if (!Has()) {
+      return true;
+    }
+    // Read the entry "kind", which is always at the start of all entries.
+    ProfileBufferEntryReader aER = *mBlockIt;
+    auto type = static_cast<ProfileBufferEntry::Kind>(
+        aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+    MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) <
+               static_cast<ProfileBufferEntry::KindUnderlyingType>(
+                   ProfileBufferEntry::Kind::MODERN_LIMIT));
+    if (type >= ProfileBufferEntry::Kind::LEGACY_LIMIT) {
+      aER.SetRemainingBytes(0);
+      return false;
+    }
+    // Here, we have a legacy item, we need to read it from the start.
+    // Because the above `ReadObject` moved the reader, we ned to reset it to
+    // the start of the entry before reading the whole entry.
+    aER = *mBlockIt;
+    aER.ReadBytes(&mEntry, aER.RemainingBytes());
+    return true;
+  }
+
+  ProfileBufferEntry mEntry;
+  ProfileChunkedBuffer::BlockIterator mBlockIt;
+  const ProfileChunkedBuffer::BlockIterator mBlockItEnd;
+};
+
+// The following grammar shows legal sequences of profile buffer entries.
+// The sequences beginning with a ThreadId entry are known as "samples".
+//
+// (
+//   ( /* Samples */
+//     ThreadId
+//     Time
+//     ( NativeLeafAddr
+//     | Label FrameFlags? DynamicStringFragment* LineNumber? CategoryPair?
+//     | JitReturnAddr
+//     )+
+//     Responsiveness?
+//   )
+//   | MarkerData
+//   | ( /* Counters */
+//       CounterId
+//       Time
+//       (
+//         CounterKey
+//         Count
+//         Number?
+//       )*
+//     )
+//   | CollectionStart
+//   | CollectionEnd
+//   | Pause
+//   | Resume
+//   | ( ProfilerOverheadTime /* Sampling start timestamp */
+//       ProfilerOverheadDuration /* Lock acquisition */
+//       ProfilerOverheadDuration /* Expired data cleaning */
+//       ProfilerOverheadDuration /* Counters */
+//       ProfilerOverheadDuration /* Threads */
+//     )
+// )*
+//
+// The most complicated part is the stack entry sequence that begins with
+// Label. Here are some examples.
+//
+// - ProfilingStack frames without a dynamic string:
+//
+//     Label("js::RunScript")
+//     CategoryPair(ProfilingCategoryPair::JS)
+//
+//     Label("XREMain::XRE_main")
+//     LineNumber(4660)
+//     CategoryPair(ProfilingCategoryPair::OTHER)
+//
+//     Label("ElementRestyler::ComputeStyleChangeFor")
+//     LineNumber(3003)
+//     CategoryPair(ProfilingCategoryPair::CSS)
+//
+// - ProfilingStack frames with a dynamic string:
+//
+//     Label("nsObserverService::NotifyObservers")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+//     DynamicStringFragment("domwindo")
+//     DynamicStringFragment("wopened")
+//     LineNumber(291)
+//     CategoryPair(ProfilingCategoryPair::OTHER)
+//
+//     Label("")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME))
+//     DynamicStringFragment("closeWin")
+//     DynamicStringFragment("dow (chr")
+//     DynamicStringFragment("ome://gl")
+//     DynamicStringFragment("obal/con")
+//     DynamicStringFragment("tent/glo")
+//     DynamicStringFragment("balOverl")
+//     DynamicStringFragment("ay.js:5)")
+//     DynamicStringFragment("")          # this string holds the closing '\0'
+//     LineNumber(25)
+//     CategoryPair(ProfilingCategoryPair::JS)
+//
+//     Label("")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_JS_FRAME))
+//     DynamicStringFragment("bound (s")
+//     DynamicStringFragment("elf-host")
+//     DynamicStringFragment("ed:914)")
+//     LineNumber(945)
+//     CategoryPair(ProfilingCategoryPair::JS)
+//
+// - A profiling stack frame with an overly long dynamic string:
+//
+//     Label("")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+//     DynamicStringFragment("(too lon")
+//     DynamicStringFragment("g)")
+//     LineNumber(100)
+//     CategoryPair(ProfilingCategoryPair::NETWORK)
+//
+// - A wasm JIT frame:
+//
+//     Label("")
+//     FrameFlags(uint64_t(0))
+//     DynamicStringFragment("wasm-fun")
+//     DynamicStringFragment("ction[87")
+//     DynamicStringFragment("36] (blo")
+//     DynamicStringFragment("b:http:/")
+//     DynamicStringFragment("/webasse")
+//     DynamicStringFragment("mbly.org")
+//     DynamicStringFragment("/3dc5759")
+//     DynamicStringFragment("4-ce58-4")
+//     DynamicStringFragment("626-975b")
+//     DynamicStringFragment("-08ad116")
+//     DynamicStringFragment("30bc1:38")
+//     DynamicStringFragment("29856)")
+//
+// - A JS frame in a synchronous sample:
+//
+//     Label("")
+//     FrameFlags(uint64_t(ProfilingStackFrame::Flags::IS_LABEL_FRAME))
+//     DynamicStringFragment("u (https")
+//     DynamicStringFragment("://perf-")
+//     DynamicStringFragment("html.io/")
+//     DynamicStringFragment("ac0da204")
+//     DynamicStringFragment("aaa44d75")
+//     DynamicStringFragment("a800.bun")
+//     DynamicStringFragment("dle.js:2")
+//     DynamicStringFragment("5)")
+
+// Because this is a format entirely internal to the Profiler, any parsing
+// error indicates a bug in the ProfileBuffer writing or the parser itself,
+// or possibly flaky hardware.
+#define ERROR_AND_CONTINUE(msg)                            \
+  {                                                        \
+    fprintf(stderr, "ProfileBuffer parse error: %s", msg); \
+    MOZ_ASSERT(false, msg);                                \
+    continue;                                              \
+  }
+
+int ProfileBuffer::StreamSamplesToJSON(SpliceableJSONWriter& aWriter,
+                                       int aThreadId, double aSinceTime,
+                                       UniqueStacks& aUniqueStacks) const {
+  UniquePtr<char[]> dynStrBuf = MakeUnique<char[]>(kMaxFrameKeyLength);
+
+  return mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    int processedThreadId = 0;
+
+    EntryGetter e(*aReader);
+
+    for (;;) {
+      // This block skips entries until we find the start of the next sample.
+      // This is useful in three situations.
+      //
+      // - The circular buffer overwrites old entries, so when we start parsing
+      //   we might be in the middle of a sample, and we must skip forward to
+      //   the start of the next sample.
+      //
+      // - We skip samples that don't have an appropriate ThreadId or Time.
+      //
+      // - We skip range Pause, Resume, CollectionStart, Counter and
+      //   CollectionEnd entries between samples.
+      while (e.Has()) {
+        if (e.Get().IsThreadId()) {
+          break;
+        }
+        e.Next();
+      }
+
+      if (!e.Has()) {
+        break;
+      }
+
+      // Due to the skip_to_next_sample block above, if we have an entry here it
+      // must be a ThreadId entry.
+      MOZ_ASSERT(e.Get().IsThreadId());
+
+      int threadId = e.Get().GetInt();
+      e.Next();
+
+      // Ignore samples that are for the wrong thread.
+      if (threadId != aThreadId && aThreadId != 0) {
+        continue;
+      }
+
+      MOZ_ASSERT(aThreadId != 0 || processedThreadId == 0,
+                 "aThreadId==0 should only be used with 1-sample buffer");
+
+      ProfileSample sample;
+
+      if (e.Has() && e.Get().IsTime()) {
+        sample.mTime = e.Get().GetDouble();
+        e.Next();
+
+        // Ignore samples that are too old.
+        if (sample.mTime < aSinceTime) {
+          continue;
+        }
+      } else {
+        ERROR_AND_CONTINUE("expected a Time entry");
+      }
+
+      UniqueStacks::StackKey stack =
+          aUniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)"));
+
+      int numFrames = 0;
+      while (e.Has()) {
+        if (e.Get().IsNativeLeafAddr()) {
+          numFrames++;
+
+          void* pc = e.Get().GetPtr();
+          e.Next();
+
+          static const uint32_t BUF_SIZE = 256;
+          char buf[BUF_SIZE];
+
+          // Bug 753041: We need a double cast here to tell GCC that we don't
+          // want to sign extend 32-bit addresses starting with 0xFXXXXXX.
+          unsigned long long pcULL = (unsigned long long)(uintptr_t)pc;
+          SprintfLiteral(buf, "%#llx", pcULL);
+
+          // If the "MOZ_PROFILER_SYMBOLICATE" env-var is set, we add a local
+          // symbolication description to the PC address. This is off by
+          // default, and mainly intended for local development.
+          static const bool preSymbolicate = []() {
+            const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE");
+            return symbolicate && symbolicate[0] != '\0';
+          }();
+          if (preSymbolicate) {
+            MozCodeAddressDetails details;
+            if (MozDescribeCodeAddress(pc, &details)) {
+              // Replace \0 terminator with space.
+              const uint32_t pcLen = strlen(buf);
+              buf[pcLen] = ' ';
+              // Add description after space. Note: Using a frame number of 0,
+              // as using `numFrames` wouldn't help here, and would prevent
+              // combining same function calls that happen at different depths.
+              // TODO: Remove unsightly "#00: " if too annoying. :-)
+              MozFormatCodeAddressDetails(
+                  buf + pcLen + 1, BUF_SIZE - (pcLen + 1), 0, pc, &details);
+            }
+          }
+
+          stack = aUniqueStacks.AppendFrame(stack, UniqueStacks::FrameKey(buf));
+
+        } else if (e.Get().IsLabel()) {
+          numFrames++;
+
+          const char* label = e.Get().GetString();
+          e.Next();
+
+          using FrameFlags = ProfilingStackFrame::Flags;
+          uint32_t frameFlags = 0;
+          if (e.Has() && e.Get().IsFrameFlags()) {
+            frameFlags = uint32_t(e.Get().GetUint64());
+            e.Next();
+          }
+
+          bool relevantForJS =
+              frameFlags & uint32_t(FrameFlags::RELEVANT_FOR_JS);
+
+          // Copy potential dynamic string fragments into dynStrBuf, so that
+          // dynStrBuf will then contain the entire dynamic string.
+          size_t i = 0;
+          dynStrBuf[0] = '\0';
+          while (e.Has()) {
+            if (e.Get().IsDynamicStringFragment()) {
+              char chars[ProfileBufferEntry::kNumChars];
+              e.Get().CopyCharsInto(chars);
+              for (char c : chars) {
+                if (i < kMaxFrameKeyLength) {
+                  dynStrBuf[i] = c;
+                  i++;
+                }
+              }
+              e.Next();
+            } else {
+              break;
+            }
+          }
+          dynStrBuf[kMaxFrameKeyLength - 1] = '\0';
+          bool hasDynamicString = (i != 0);
+
+          std::string frameLabel;
+          if (label[0] != '\0' && hasDynamicString) {
+            if (frameFlags & uint32_t(FrameFlags::STRING_TEMPLATE_METHOD)) {
+              frameLabel += label;
+              frameLabel += '.';
+              frameLabel += dynStrBuf.get();
+            } else if (frameFlags &
+                       uint32_t(FrameFlags::STRING_TEMPLATE_GETTER)) {
+              frameLabel += "get ";
+              frameLabel += label;
+              frameLabel += '.';
+              frameLabel += dynStrBuf.get();
+            } else if (frameFlags &
+                       uint32_t(FrameFlags::STRING_TEMPLATE_SETTER)) {
+              frameLabel += "set ";
+              frameLabel += label;
+              frameLabel += '.';
+              frameLabel += dynStrBuf.get();
+            } else {
+              frameLabel += label;
+              frameLabel += ' ';
+              frameLabel += dynStrBuf.get();
+            }
+          } else if (hasDynamicString) {
+            frameLabel += dynStrBuf.get();
+          } else {
+            frameLabel += label;
+          }
+
+          uint64_t innerWindowID = 0;
+          if (e.Has() && e.Get().IsInnerWindowID()) {
+            innerWindowID = uint64_t(e.Get().GetUint64());
+            e.Next();
+          }
+
+          Maybe<unsigned> line;
+          if (e.Has() && e.Get().IsLineNumber()) {
+            line = Some(unsigned(e.Get().GetInt()));
+            e.Next();
+          }
+
+          Maybe<unsigned> column;
+          if (e.Has() && e.Get().IsColumnNumber()) {
+            column = Some(unsigned(e.Get().GetInt()));
+            e.Next();
+          }
+
+          Maybe<ProfilingCategoryPair> categoryPair;
+          if (e.Has() && e.Get().IsCategoryPair()) {
+            categoryPair =
+                Some(ProfilingCategoryPair(uint32_t(e.Get().GetInt())));
+            e.Next();
+          }
+
+          stack = aUniqueStacks.AppendFrame(
+              stack, UniqueStacks::FrameKey(std::move(frameLabel),
+                                            relevantForJS, innerWindowID, line,
+                                            column, categoryPair));
+
+        } else {
+          break;
+        }
+      }
+
+      if (numFrames == 0) {
+        // It is possible to have empty stacks if native stackwalking is
+        // disabled. Skip samples with empty stacks. (See Bug 1497985).
+        // Thus, don't use ERROR_AND_CONTINUE, but just continue.
+        continue;
+      }
+
+      sample.mStack = aUniqueStacks.GetOrAddStackIndex(stack);
+
+      if (e.Has() && e.Get().IsResponsiveness()) {
+        sample.mResponsiveness = Some(e.Get().GetDouble());
+        e.Next();
+      }
+
+      WriteSample(aWriter, sample);
+
+      processedThreadId = threadId;
+    }
+
+    return processedThreadId;
+  });
+}
+
+void ProfileBuffer::StreamMarkersToJSON(SpliceableJSONWriter& aWriter,
+                                        int aThreadId,
+                                        const TimeStamp& aProcessStartTime,
+                                        double aSinceTime,
+                                        UniqueStacks& aUniqueStacks) const {
+  mEntries.ReadEach([&](ProfileBufferEntryReader& aER) {
+    auto type = static_cast<ProfileBufferEntry::Kind>(
+        aER.ReadObject<ProfileBufferEntry::KindUnderlyingType>());
+    MOZ_ASSERT(static_cast<ProfileBufferEntry::KindUnderlyingType>(type) <
+               static_cast<ProfileBufferEntry::KindUnderlyingType>(
+                   ProfileBufferEntry::Kind::MODERN_LIMIT));
+    bool entryWasFullyRead = false;
+
+    if (type == ProfileBufferEntry::Kind::Marker) {
+      entryWasFullyRead = ::mozilla::base_profiler_markers_detail::
+          DeserializeAfterKindAndStream(
+              aER, aWriter, aThreadId,
+              [&](ProfileChunkedBuffer& aChunkedBuffer) {
+                ProfilerBacktrace backtrace("", &aChunkedBuffer);
+                backtrace.StreamJSON(aWriter, TimeStamp::ProcessCreation(),
+                                     aUniqueStacks);
+              });
+    }
+
+    if (!entryWasFullyRead) {
+      // Not a marker, or marker for another thread.
+      // We probably didn't read the whole entry, so we need to skip to the end.
+      aER.SetRemainingBytes(0);
+    }
+  });
+}
+
+void ProfileBuffer::StreamProfilerOverheadToJSON(
+    SpliceableJSONWriter& aWriter, const TimeStamp& aProcessStartTime,
+    double aSinceTime) const {
+  mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    EntryGetter e(*aReader);
+
+    enum Schema : uint32_t {
+      TIME = 0,
+      LOCKING = 1,
+      MARKER_CLEANING = 2,
+      COUNTERS = 3,
+      THREADS = 4
+    };
+
+    aWriter.StartObjectProperty("profilerOverhead");
+    aWriter.StartObjectProperty("samples");
+    // Stream all sampling overhead data. We skip other entries, because we
+    // process them in StreamSamplesToJSON()/etc.
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("time");
+      schema.WriteField("locking");
+      schema.WriteField("expiredMarkerCleaning");
+      schema.WriteField("counters");
+      schema.WriteField("threads");
+    }
+
+    aWriter.StartArrayProperty("data");
+    double firstTime = 0.0;
+    double lastTime = 0.0;
+    ProfilerStats intervals, overheads, lockings, cleanings, counters, threads;
+    while (e.Has()) {
+      // valid sequence: ProfilerOverheadTime, ProfilerOverheadDuration * 4
+      if (e.Get().IsProfilerOverheadTime()) {
+        double time = e.Get().GetDouble();
+        if (time >= aSinceTime) {
+          e.Next();
+          if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+            ERROR_AND_CONTINUE(
+                "expected a ProfilerOverheadDuration entry after "
+                "ProfilerOverheadTime");
+          }
+          double locking = e.Get().GetDouble();
+          e.Next();
+          if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+            ERROR_AND_CONTINUE(
+                "expected a ProfilerOverheadDuration entry after "
+                "ProfilerOverheadTime,ProfilerOverheadDuration");
+          }
+          double cleaning = e.Get().GetDouble();
+          e.Next();
+          if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+            ERROR_AND_CONTINUE(
+                "expected a ProfilerOverheadDuration entry after "
+                "ProfilerOverheadTime,ProfilerOverheadDuration*2");
+          }
+          double counter = e.Get().GetDouble();
+          e.Next();
+          if (!e.Has() || !e.Get().IsProfilerOverheadDuration()) {
+            ERROR_AND_CONTINUE(
+                "expected a ProfilerOverheadDuration entry after "
+                "ProfilerOverheadTime,ProfilerOverheadDuration*3");
+          }
+          double thread = e.Get().GetDouble();
+
+          if (firstTime == 0.0) {
+            firstTime = time;
+          } else {
+            // Note that we'll have 1 fewer interval than other numbers (because
+            // we need both ends of an interval to know its duration). The final
+            // difference should be insignificant over the expected many
+            // thousands of iterations.
+            intervals.Count(time - lastTime);
+          }
+          lastTime = time;
+          overheads.Count(locking + cleaning + counter + thread);
+          lockings.Count(locking);
+          cleanings.Count(cleaning);
+          counters.Count(counter);
+          threads.Count(thread);
+
+          AutoArraySchemaWriter writer(aWriter);
+          writer.DoubleElement(TIME, time);
+          writer.DoubleElement(LOCKING, locking);
+          writer.DoubleElement(MARKER_CLEANING, cleaning);
+          writer.DoubleElement(COUNTERS, counter);
+          writer.DoubleElement(THREADS, thread);
+        }
+      }
+      e.Next();
+    }
+    aWriter.EndArray();   // data
+    aWriter.EndObject();  // samples
+
+    // Only output statistics if there is at least one full interval (and
+    // therefore at least two samplings.)
+    if (intervals.n > 0) {
+      aWriter.StartObjectProperty("statistics");
+      aWriter.DoubleProperty("profiledDuration", lastTime - firstTime);
+      aWriter.IntProperty("samplingCount", overheads.n);
+      aWriter.DoubleProperty("overheadDurations", overheads.sum);
+      aWriter.DoubleProperty("overheadPercentage",
+                             overheads.sum / (lastTime - firstTime));
+#define PROFILER_STATS(name, var)                           \
+  aWriter.DoubleProperty("mean" name, (var).sum / (var).n); \
+  aWriter.DoubleProperty("min" name, (var).min);            \
+  aWriter.DoubleProperty("max" name, (var).max);
+      PROFILER_STATS("Interval", intervals);
+      PROFILER_STATS("Overhead", overheads);
+      PROFILER_STATS("Lockings", lockings);
+      PROFILER_STATS("Cleaning", cleanings);
+      PROFILER_STATS("Counter", counters);
+      PROFILER_STATS("Thread", threads);
+#undef PROFILER_STATS
+      aWriter.EndObject();  // statistics
+    }
+    aWriter.EndObject();  // profilerOverhead
+  });
+}
+
+struct CounterKeyedSample {
+  double mTime;
+  uint64_t mNumber;
+  int64_t mCount;
+};
+
+using CounterKeyedSamples = Vector<CounterKeyedSample>;
+
+using CounterMap = HashMap<uint64_t, CounterKeyedSamples>;
+
+// HashMap lookup, if not found, a default value is inserted.
+// Returns reference to (existing or new) value inside the HashMap.
+template <typename HashM, typename Key>
+static auto& LookupOrAdd(HashM& aMap, Key&& aKey) {
+  auto addPtr = aMap.lookupForAdd(aKey);
+  if (!addPtr) {
+    MOZ_RELEASE_ASSERT(aMap.add(addPtr, std::forward<Key>(aKey),
+                                typename HashM::Entry::ValueType{}));
+    MOZ_ASSERT(!!addPtr);
+  }
+  return addPtr->value();
+}
+
+void ProfileBuffer::StreamCountersToJSON(SpliceableJSONWriter& aWriter,
+                                         const TimeStamp& aProcessStartTime,
+                                         double aSinceTime) const {
+  // Because this is a format entirely internal to the Profiler, any parsing
+  // error indicates a bug in the ProfileBuffer writing or the parser itself,
+  // or possibly flaky hardware.
+
+  mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    EntryGetter e(*aReader);
+
+    enum Schema : uint32_t { TIME = 0, NUMBER = 1, COUNT = 2 };
+
+    // Stream all counters. We skip other entries, because we process them in
+    // StreamSamplesToJSON()/etc.
+    //
+    // Valid sequence in the buffer:
+    // CounterID
+    // Time
+    // ( CounterKey Count Number? )*
+    //
+    // And the JSON (example):
+    // "counters": {
+    //  "name": "malloc",
+    //  "category": "Memory",
+    //  "description": "Amount of allocated memory",
+    //  "sample_groups": {
+    //   "id": 0,
+    //   "samples": {
+    //    "schema": {"time": 0, "number": 1, "count": 2},
+    //    "data": [
+    //     [
+    //      16117.033968000002,
+    //      2446216,
+    //      6801320
+    //     ],
+    //     [
+    //      16118.037638,
+    //      2446216,
+    //      6801320
+    //     ],
+    //    ],
+    //   }
+    //  }
+    // },
+
+    // Build the map of counters and populate it
+    HashMap<void*, CounterMap> counters;
+
+    while (e.Has()) {
+      // skip all non-Counters, including if we start in the middle of a counter
+      if (e.Get().IsCounterId()) {
+        void* id = e.Get().GetPtr();
+        CounterMap& counter = LookupOrAdd(counters, id);
+        e.Next();
+        if (!e.Has() || !e.Get().IsTime()) {
+          ERROR_AND_CONTINUE("expected a Time entry");
+        }
+        double time = e.Get().GetDouble();
+        if (time >= aSinceTime) {
+          e.Next();
+          while (e.Has() && e.Get().IsCounterKey()) {
+            uint64_t key = e.Get().GetUint64();
+            CounterKeyedSamples& data = LookupOrAdd(counter, key);
+            e.Next();
+            if (!e.Has() || !e.Get().IsCount()) {
+              ERROR_AND_CONTINUE("expected a Count entry");
+            }
+            int64_t count = e.Get().GetUint64();
+            e.Next();
+            uint64_t number;
+            if (!e.Has() || !e.Get().IsNumber()) {
+              number = 0;
+            } else {
+              number = e.Get().GetInt64();
+            }
+            CounterKeyedSample sample = {time, number, count};
+            MOZ_RELEASE_ASSERT(data.append(sample));
+          }
+        } else {
+          // skip counter sample - only need to skip the initial counter
+          // id, then let the loop at the top skip the rest
+        }
+      }
+      e.Next();
+    }
+    // we have a map of a map of counter entries; dump them to JSON
+    if (counters.count() == 0) {
+      return;
+    }
+
+    aWriter.StartArrayProperty("counters");
+    for (auto iter = counters.iter(); !iter.done(); iter.next()) {
+      CounterMap& counter = iter.get().value();
+      const BaseProfilerCount* base_counter =
+          static_cast<const BaseProfilerCount*>(iter.get().key());
+
+      aWriter.Start();
+      aWriter.StringProperty("name", MakeStringSpan(base_counter->mLabel));
+      aWriter.StringProperty("category",
+                             MakeStringSpan(base_counter->mCategory));
+      aWriter.StringProperty("description",
+                             MakeStringSpan(base_counter->mDescription));
+
+      aWriter.StartArrayProperty("sample_groups");
+      for (auto counter_iter = counter.iter(); !counter_iter.done();
+           counter_iter.next()) {
+        CounterKeyedSamples& samples = counter_iter.get().value();
+        uint64_t key = counter_iter.get().key();
+
+        size_t size = samples.length();
+        if (size == 0) {
+          continue;
+        }
+
+        aWriter.StartObjectElement();
+        {
+          aWriter.IntProperty("id", static_cast<int64_t>(key));
+          aWriter.StartObjectProperty("samples");
+          {
+            // XXX Can we assume a missing count means 0?
+            JSONSchemaWriter schema(aWriter);
+            schema.WriteField("time");
+            schema.WriteField("number");
+            schema.WriteField("count");
+          }
+
+          aWriter.StartArrayProperty("data");
+          uint64_t previousNumber = 0;
+          int64_t previousCount = 0;
+          for (size_t i = 0; i < size; i++) {
+            // Encode as deltas, and only encode if different than the last
+            // sample
+            if (i == 0 || samples[i].mNumber != previousNumber ||
+                samples[i].mCount != previousCount) {
+              MOZ_ASSERT(i == 0 || samples[i].mTime >= samples[i - 1].mTime);
+              MOZ_ASSERT(samples[i].mNumber >= previousNumber);
+              MOZ_ASSERT(samples[i].mNumber - previousNumber <=
+                         uint64_t(std::numeric_limits<int64_t>::max()));
+
+              AutoArraySchemaWriter writer(aWriter);
+              writer.DoubleElement(TIME, samples[i].mTime);
+              writer.IntElement(
+                  NUMBER,
+                  static_cast<int64_t>(samples[i].mNumber - previousNumber));
+              writer.IntElement(COUNT, samples[i].mCount - previousCount);
+              previousNumber = samples[i].mNumber;
+              previousCount = samples[i].mCount;
+            }
+          }
+          aWriter.EndArray();   // data
+          aWriter.EndObject();  // samples
+        }
+        aWriter.EndObject();  // sample_groups item
+      }
+      aWriter.EndArray();  // sample groups
+      aWriter.End();       // for each counter
+    }
+    aWriter.EndArray();  // counters
+  });
+}
+
+#undef ERROR_AND_CONTINUE
+
+static void AddPausedRange(SpliceableJSONWriter& aWriter, const char* aReason,
+                           const Maybe<double>& aStartTime,
+                           const Maybe<double>& aEndTime) {
+  aWriter.Start();
+  if (aStartTime) {
+    aWriter.DoubleProperty("startTime", *aStartTime);
+  } else {
+    aWriter.NullProperty("startTime");
+  }
+  if (aEndTime) {
+    aWriter.DoubleProperty("endTime", *aEndTime);
+  } else {
+    aWriter.NullProperty("endTime");
+  }
+  aWriter.StringProperty("reason", MakeStringSpan(aReason));
+  aWriter.End();
+}
+
+void ProfileBuffer::StreamPausedRangesToJSON(SpliceableJSONWriter& aWriter,
+                                             double aSinceTime) const {
+  mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    EntryGetter e(*aReader);
+
+    Maybe<double> currentPauseStartTime;
+    Maybe<double> currentCollectionStartTime;
+
+    while (e.Has()) {
+      if (e.Get().IsPause()) {
+        currentPauseStartTime = Some(e.Get().GetDouble());
+      } else if (e.Get().IsResume()) {
+        AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime,
+                       Some(e.Get().GetDouble()));
+        currentPauseStartTime = Nothing();
+      } else if (e.Get().IsCollectionStart()) {
+        currentCollectionStartTime = Some(e.Get().GetDouble());
+      } else if (e.Get().IsCollectionEnd()) {
+        AddPausedRange(aWriter, "collecting", currentCollectionStartTime,
+                       Some(e.Get().GetDouble()));
+        currentCollectionStartTime = Nothing();
+      }
+      e.Next();
+    }
+
+    if (currentPauseStartTime) {
+      AddPausedRange(aWriter, "profiler-paused", currentPauseStartTime,
+                     Nothing());
+    }
+    if (currentCollectionStartTime) {
+      AddPausedRange(aWriter, "collecting", currentCollectionStartTime,
+                     Nothing());
+    }
+  });
+}
+
+bool ProfileBuffer::DuplicateLastSample(int aThreadId,
+                                        const TimeStamp& aProcessStartTime,
+                                        Maybe<uint64_t>& aLastSample) {
+  if (!aLastSample) {
+    return false;
+  }
+
+  ProfileChunkedBuffer tempBuffer(
+      ProfileChunkedBuffer::ThreadSafety::WithoutMutex, mWorkerChunkManager);
+
+  auto retrieveWorkerChunk = MakeScopeExit(
+      [&]() { mWorkerChunkManager.Reset(tempBuffer.GetAllChunks()); });
+
+  const bool ok = mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader,
+               "ProfileChunkedBuffer cannot be out-of-session when sampler is "
+               "running");
+
+    EntryGetter e(*aReader, *aLastSample);
+
+    if (e.CurPos() != *aLastSample) {
+      // The last sample is no longer within the buffer range, so we cannot
+      // use it. Reset the stored buffer position to Nothing().
+      aLastSample.reset();
+      return false;
+    }
+
+    MOZ_RELEASE_ASSERT(e.Has() && e.Get().IsThreadId() &&
+                       e.Get().GetInt() == aThreadId);
+
+    e.Next();
+
+    // Go through the whole entry and duplicate it, until we find the next
+    // one.
+    while (e.Has()) {
+      switch (e.Get().GetKind()) {
+        case ProfileBufferEntry::Kind::Pause:
+        case ProfileBufferEntry::Kind::Resume:
+        case ProfileBufferEntry::Kind::PauseSampling:
+        case ProfileBufferEntry::Kind::ResumeSampling:
+        case ProfileBufferEntry::Kind::CollectionStart:
+        case ProfileBufferEntry::Kind::CollectionEnd:
+        case ProfileBufferEntry::Kind::ThreadId:
+          // We're done.
+          return true;
+        case ProfileBufferEntry::Kind::Time:
+          // Copy with new time
+          AddEntry(tempBuffer,
+                   ProfileBufferEntry::Time(
+                       (TimeStamp::NowUnfuzzed() - aProcessStartTime)
+                           .ToMilliseconds()));
+          break;
+        case ProfileBufferEntry::Kind::CounterKey:
+        case ProfileBufferEntry::Kind::Number:
+        case ProfileBufferEntry::Kind::Count:
+        case ProfileBufferEntry::Kind::Responsiveness:
+          // Don't copy anything not part of a thread's stack sample
+          break;
+        case ProfileBufferEntry::Kind::CounterId:
+          // CounterId is normally followed by Time - if so, we'd like
+          // to skip it.  If we duplicate Time, it won't hurt anything, just
+          // waste buffer space (and this can happen if the CounterId has
+          // fallen off the end of the buffer, but Time (and Number/Count)
+          // are still in the buffer).
+          e.Next();
+          if (e.Has() && e.Get().GetKind() != ProfileBufferEntry::Kind::Time) {
+            // this would only happen if there was an invalid sequence
+            // in the buffer.  Don't skip it.
+            continue;
+          }
+          // we've skipped Time
+          break;
+        case ProfileBufferEntry::Kind::ProfilerOverheadTime:
+          // ProfilerOverheadTime is normally followed by
+          // ProfilerOverheadDuration*4 - if so, we'd like to skip it. Don't
+          // duplicate, as we are in the middle of a sampling and will soon
+          // capture its own overhead.
+          e.Next();
+          // A missing Time would only happen if there was an invalid
+          // sequence in the buffer. Don't skip unexpected entry.
+          if (e.Has() &&
+              e.Get().GetKind() !=
+                  ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+            continue;
+          }
+          e.Next();
+          if (e.Has() &&
+              e.Get().GetKind() !=
+                  ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+            continue;
+          }
+          e.Next();
+          if (e.Has() &&
+              e.Get().GetKind() !=
+                  ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+            continue;
+          }
+          e.Next();
+          if (e.Has() &&
+              e.Get().GetKind() !=
+                  ProfileBufferEntry::Kind::ProfilerOverheadDuration) {
+            continue;
+          }
+          // we've skipped ProfilerOverheadTime and
+          // ProfilerOverheadDuration*4.
+          break;
+        default: {
+          // Copy anything else we don't know about.
+          AddEntry(tempBuffer, e.Get());
+          break;
+        }
+      }
+      e.Next();
+    }
+    return true;
+  });
+
+  if (!ok) {
+    return false;
+  }
+
+  // If the buffer was big enough, there won't be any cleared blocks.
+  if (tempBuffer.GetState().mClearedBlockCount != 0) {
+    // No need to try to read stack again as it won't fit. Reset the stored
+    // buffer position to Nothing().
+    aLastSample.reset();
+    return false;
+  }
+
+  aLastSample = Some(AddThreadIdEntry(aThreadId));
+
+  tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    MOZ_ASSERT(aReader, "tempBuffer cannot be out-of-session");
+
+    EntryGetter e(*aReader);
+
+    while (e.Has()) {
+      AddEntry(e.Get());
+      e.Next();
+    }
+  });
+
+  return true;
+}
+
+void ProfileBuffer::DiscardSamplesBeforeTime(double aTime) {
+  // This function does nothing!
+  // The duration limit will be removed from Firefox, see bug 1632365.
+  Unused << aTime;
+}
+
+// END ProfileBuffer
+////////////////////////////////////////////////////////////////////////
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfileBufferEntry.h b/mozglue/baseprofiler/core/ProfileBufferEntry.h
new file mode 100644
index 0000000000..6422a34a85
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileBufferEntry.h
@@ -0,0 +1,358 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntry_h
+#define ProfileBufferEntry_h
+
+#include "BaseProfilingCategory.h"
+#include "gtest/MozGtestFriend.h"
+#include "mozilla/BaseProfileJSONWriter.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/HashTable.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileBufferEntryKinds.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Variant.h"
+#include "mozilla/Vector.h"
+
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class ProfileBufferEntry {
+ public:
+  using KindUnderlyingType = ::mozilla::ProfileBufferEntryKindUnderlyingType;
+  using Kind = ::mozilla::ProfileBufferEntryKind;
+
+  ProfileBufferEntry();
+
+  static constexpr size_t kNumChars = ::mozilla::ProfileBufferEntryNumChars;
+
+ private:
+  // aString must be a static string.
+  ProfileBufferEntry(Kind aKind, const char* aString);
+  ProfileBufferEntry(Kind aKind, char aChars[kNumChars]);
+  ProfileBufferEntry(Kind aKind, void* aPtr);
+  ProfileBufferEntry(Kind aKind, double aDouble);
+  ProfileBufferEntry(Kind aKind, int64_t aInt64);
+  ProfileBufferEntry(Kind aKind, uint64_t aUint64);
+  ProfileBufferEntry(Kind aKind, int aInt);
+
+ public:
+#define CTOR(KIND, TYPE, SIZE)                   \
+  static ProfileBufferEntry KIND(TYPE aVal) {    \
+    return ProfileBufferEntry(Kind::KIND, aVal); \
+  }
+  FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(CTOR)
+#undef CTOR
+
+  Kind GetKind() const { return mKind; }
+
+#define IS_KIND(KIND, TYPE, SIZE) \
+  bool Is##KIND() const { return mKind == Kind::KIND; }
+  FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(IS_KIND)
+#undef IS_KIND
+
+ private:
+  FRIEND_TEST(ThreadProfile, InsertOneEntry);
+  FRIEND_TEST(ThreadProfile, InsertOneEntryWithTinyBuffer);
+  FRIEND_TEST(ThreadProfile, InsertEntriesNoWrap);
+  FRIEND_TEST(ThreadProfile, InsertEntriesWrap);
+  FRIEND_TEST(ThreadProfile, MemoryMeasure);
+  friend class ProfileBuffer;
+
+  Kind mKind;
+  uint8_t mStorage[kNumChars];
+
+  const char* GetString() const;
+  void* GetPtr() const;
+  double GetDouble() const;
+  int GetInt() const;
+  int64_t GetInt64() const;
+  uint64_t GetUint64() const;
+  void CopyCharsInto(char (&aOutArray)[kNumChars]) const;
+};
+
+// Packed layout: 1 byte for the tag + 8 bytes for the value.
+static_assert(sizeof(ProfileBufferEntry) == 9, "bad ProfileBufferEntry size");
+
+class UniqueStacks {
+ public:
+  struct FrameKey {
+    explicit FrameKey(const char* aLocation)
+        : mData(NormalFrameData{std::string(aLocation), false, 0, Nothing(),
+                                Nothing()}) {}
+
+    FrameKey(std::string&& aLocation, bool aRelevantForJS,
+             uint64_t aInnerWindowID, const Maybe<unsigned>& aLine,
+             const Maybe<unsigned>& aColumn,
+             const Maybe<ProfilingCategoryPair>& aCategoryPair)
+        : mData(NormalFrameData{aLocation, aRelevantForJS, aInnerWindowID,
+                                aLine, aColumn, aCategoryPair}) {}
+
+    FrameKey(const FrameKey& aToCopy) = default;
+
+    uint32_t Hash() const;
+    bool operator==(const FrameKey& aOther) const {
+      return mData == aOther.mData;
+    }
+
+    struct NormalFrameData {
+      bool operator==(const NormalFrameData& aOther) const;
+
+      std::string mLocation;
+      bool mRelevantForJS;
+      uint64_t mInnerWindowID;
+      Maybe<unsigned> mLine;
+      Maybe<unsigned> mColumn;
+      Maybe<ProfilingCategoryPair> mCategoryPair;
+    };
+    Variant<NormalFrameData> mData;
+  };
+
+  struct FrameKeyHasher {
+    using Lookup = FrameKey;
+
+    static HashNumber hash(const FrameKey& aLookup) {
+      HashNumber hash = 0;
+      if (aLookup.mData.is<FrameKey::NormalFrameData>()) {
+        const FrameKey::NormalFrameData& data =
+            aLookup.mData.as<FrameKey::NormalFrameData>();
+        if (!data.mLocation.empty()) {
+          hash = AddToHash(hash, HashString(data.mLocation.c_str()));
+        }
+        hash = AddToHash(hash, data.mRelevantForJS);
+        hash = mozilla::AddToHash(hash, data.mInnerWindowID);
+        if (data.mLine.isSome()) {
+          hash = AddToHash(hash, *data.mLine);
+        }
+        if (data.mColumn.isSome()) {
+          hash = AddToHash(hash, *data.mColumn);
+        }
+        if (data.mCategoryPair.isSome()) {
+          hash = AddToHash(hash, static_cast<uint32_t>(*data.mCategoryPair));
+        }
+      }
+      return hash;
+    }
+
+    static bool match(const FrameKey& aKey, const FrameKey& aLookup) {
+      return aKey == aLookup;
+    }
+
+    static void rekey(FrameKey& aKey, const FrameKey& aNewKey) {
+      aKey = aNewKey;
+    }
+  };
+
+  struct StackKey {
+    Maybe<uint32_t> mPrefixStackIndex;
+    uint32_t mFrameIndex;
+
+    explicit StackKey(uint32_t aFrame)
+        : mFrameIndex(aFrame), mHash(HashGeneric(aFrame)) {}
+
+    StackKey(const StackKey& aPrefix, uint32_t aPrefixStackIndex,
+             uint32_t aFrame)
+        : mPrefixStackIndex(Some(aPrefixStackIndex)),
+          mFrameIndex(aFrame),
+          mHash(AddToHash(aPrefix.mHash, aFrame)) {}
+
+    HashNumber Hash() const { return mHash; }
+
+    bool operator==(const StackKey& aOther) const {
+      return mPrefixStackIndex == aOther.mPrefixStackIndex &&
+             mFrameIndex == aOther.mFrameIndex;
+    }
+
+   private:
+    HashNumber mHash;
+  };
+
+  struct StackKeyHasher {
+    using Lookup = StackKey;
+
+    static HashNumber hash(const StackKey& aLookup) { return aLookup.Hash(); }
+
+    static bool match(const StackKey& aKey, const StackKey& aLookup) {
+      return aKey == aLookup;
+    }
+
+    static void rekey(StackKey& aKey, const StackKey& aNewKey) {
+      aKey = aNewKey;
+    }
+  };
+
+  UniqueStacks();
+
+  // Return a StackKey for aFrame as the stack's root frame (no prefix).
+  [[nodiscard]] StackKey BeginStack(const FrameKey& aFrame);
+
+  // Return a new StackKey that is obtained by appending aFrame to aStack.
+  [[nodiscard]] StackKey AppendFrame(const StackKey& aStack,
+                                     const FrameKey& aFrame);
+
+  [[nodiscard]] uint32_t GetOrAddFrameIndex(const FrameKey& aFrame);
+  [[nodiscard]] uint32_t GetOrAddStackIndex(const StackKey& aStack);
+
+  void SpliceFrameTableElements(SpliceableJSONWriter& aWriter);
+  void SpliceStackTableElements(SpliceableJSONWriter& aWriter);
+
+ private:
+  void StreamNonJITFrame(const FrameKey& aFrame);
+  void StreamStack(const StackKey& aStack);
+
+ public:
+  UniquePtr<UniqueJSONStrings> mUniqueStrings;
+
+ private:
+  SpliceableChunkedJSONWriter mFrameTableWriter;
+  HashMap<FrameKey, uint32_t, FrameKeyHasher> mFrameToIndexMap;
+
+  SpliceableChunkedJSONWriter mStackTableWriter;
+  HashMap<StackKey, uint32_t, StackKeyHasher> mStackToIndexMap;
+};
+
+//
+// Thread profile JSON Format
+// --------------------------
+//
+// The profile contains much duplicate information. The output JSON of the
+// profile attempts to deduplicate strings, frames, and stack prefixes, to cut
+// down on size and to increase JSON streaming speed. Deduplicated values are
+// streamed as indices into their respective tables.
+//
+// Further, arrays of objects with the same set of properties (e.g., samples,
+// frames) are output as arrays according to a schema instead of an object
+// with property names. A property that is not present is represented in the
+// array as null or undefined.
+//
+// The format of the thread profile JSON is shown by the following example
+// with 1 sample and 1 marker:
+//
+// {
+//   "name": "Foo",
+//   "tid": 42,
+//   "samples":
+//   {
+//     "schema":
+//     {
+//       "stack": 0,          /* index into stackTable */
+//       "time": 1,           /* number */
+//       "eventDelay": 2, /* number */
+//     },
+//     "data":
+//     [
+//       [ 1, 0.0, 0.0 ]      /* { stack: 1, time: 0.0, eventDelay: 0.0 } */
+//     ]
+//   },
+//
+//   "markers":
+//   {
+//     "schema":
+//     {
+//       "name": 0,           /* index into stringTable */
+//       "time": 1,           /* number */
+//       "data": 2            /* arbitrary JSON */
+//     },
+//     "data":
+//     [
+//       [ 3, 0.1 ]           /* { name: 'example marker', time: 0.1 } */
+//     ]
+//   },
+//
+//   "stackTable":
+//   {
+//     "schema":
+//     {
+//       "prefix": 0,         /* index into stackTable */
+//       "frame": 1           /* index into frameTable */
+//     },
+//     "data":
+//     [
+//       [ null, 0 ],         /* (root) */
+//       [ 0,    1 ]          /* (root) > foo.js */
+//     ]
+//   },
+//
+//   "frameTable":
+//   {
+//     "schema":
+//     {
+//       "location": 0,       /* index into stringTable */
+//       "relevantForJS": 1,  /* bool */
+//       "innerWindowID": 2,  /* inner window ID of global JS `window` object */
+//       "implementation": 3, /* index into stringTable */
+//       "optimizations": 4,  /* arbitrary JSON */
+//       "line": 5,           /* number */
+//       "column": 6,         /* number */
+//       "category": 7,       /* index into profile.meta.categories */
+//       "subcategory": 8     /* index into
+//       profile.meta.categories[category].subcategories */
+//     },
+//     "data":
+//     [
+//       [ 0 ],               /* { location: '(root)' } */
+//       [ 1, 2 ]             /* { location: 'foo.js',
+//                                 implementation: 'baseline' } */
+//     ]
+//   },
+//
+//   "stringTable":
+//   [
+//     "(root)",
+//     "foo.js",
+//     "baseline",
+//     "example marker"
+//   ]
+// }
+//
+// Process:
+// {
+//   "name": "Bar",
+//   "pid": 24,
+//   "threads":
+//   [
+//     <0-N threads from above>
+//   ],
+//   "counters": /* includes the memory counter */
+//   [
+//     {
+//       "name": "qwerty",
+//       "category": "uiop",
+//       "description": "this is qwerty uiop",
+//       "sample_groups:
+//       [
+//         {
+//           "id": 42, /* number (thread id, or object identifier (tab), etc) */
+//           "samples:
+//           {
+//             "schema":
+//             {
+//               "time": 1,   /* number */
+//               "number": 2, /* number (of times the counter was touched) */
+//               "count": 3   /* number (total for the counter) */
+//             },
+//             "data":
+//             [
+//               [ 0.1, 1824,
+//                 454622 ]   /* { time: 0.1, number: 1824, count: 454622 } */
+//             ]
+//           },
+//         },
+//         /* more sample-group objects with different id's */
+//       ]
+//     },
+//     /* more counters */
+//   ],
+// }
+//
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif /* ndef ProfileBufferEntry_h */
diff --git a/mozglue/baseprofiler/core/ProfileJSONWriter.cpp b/mozglue/baseprofiler/core/ProfileJSONWriter.cpp
new file mode 100644
index 0000000000..966ff2f515
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfileJSONWriter.cpp
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla::baseprofiler {
+
+UniqueJSONStrings::UniqueJSONStrings(JSONWriter::CollectionStyle aStyle) {
+  mStringTableWriter.StartBareList(aStyle);
+}
+
+UniqueJSONStrings::UniqueJSONStrings(const UniqueJSONStrings& aOther,
+                                     JSONWriter::CollectionStyle aStyle) {
+  mStringTableWriter.StartBareList(aStyle);
+  uint32_t count = aOther.mStringHashToIndexMap.count();
+  if (count != 0) {
+    MOZ_RELEASE_ASSERT(mStringHashToIndexMap.reserve(count));
+    for (auto iter = aOther.mStringHashToIndexMap.iter(); !iter.done();
+         iter.next()) {
+      mStringHashToIndexMap.putNewInfallible(iter.get().key(),
+                                             iter.get().value());
+    }
+    mStringTableWriter.CopyAndSplice(
+        aOther.mStringTableWriter.ChunkedWriteFunc());
+  }
+}
+
+UniqueJSONStrings::~UniqueJSONStrings() = default;
+
+void UniqueJSONStrings::SpliceStringTableElements(
+    SpliceableJSONWriter& aWriter) {
+  aWriter.TakeAndSplice(mStringTableWriter.TakeChunkedWriteFunc());
+}
+
+uint32_t UniqueJSONStrings::GetOrAddIndex(const Span<const char>& aStr) {
+  uint32_t count = mStringHashToIndexMap.count();
+  HashNumber hash = HashString(aStr.data(), aStr.size());
+  auto entry = mStringHashToIndexMap.lookupForAdd(hash);
+  if (entry) {
+    MOZ_ASSERT(entry->value() < count);
+    return entry->value();
+  }
+
+  MOZ_RELEASE_ASSERT(mStringHashToIndexMap.add(entry, hash, count));
+  mStringTableWriter.StringElement(aStr);
+  return count;
+}
+
+}  // namespace mozilla::baseprofiler
diff --git a/mozglue/baseprofiler/core/ProfiledThreadData.cpp b/mozglue/baseprofiler/core/ProfiledThreadData.cpp
new file mode 100644
index 0000000000..4dc600d97c
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfiledThreadData.cpp
@@ -0,0 +1,187 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfiledThreadData.h"
+
+#include "BaseProfiler.h"
+#include "ProfileBuffer.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+#if defined(GP_OS_darwin)
+#  include <pthread.h>
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfiledThreadData::ProfiledThreadData(ThreadInfo* aThreadInfo)
+    : mThreadInfo(aThreadInfo) {}
+
+ProfiledThreadData::~ProfiledThreadData() {}
+
+void ProfiledThreadData::StreamJSON(const ProfileBuffer& aBuffer,
+                                    SpliceableJSONWriter& aWriter,
+                                    const std::string& aProcessName,
+                                    const std::string& aETLDplus1,
+                                    const TimeStamp& aProcessStartTime,
+                                    double aSinceTime) {
+  UniqueStacks uniqueStacks;
+
+  MOZ_ASSERT(uniqueStacks.mUniqueStrings);
+  aWriter.SetUniqueStrings(*uniqueStacks.mUniqueStrings);
+
+  aWriter.Start();
+  {
+    StreamSamplesAndMarkers(mThreadInfo->Name(), mThreadInfo->ThreadId(),
+                            aBuffer, aWriter, aProcessName, aETLDplus1,
+                            aProcessStartTime, mThreadInfo->RegisterTime(),
+                            mUnregisterTime, aSinceTime, uniqueStacks);
+
+    aWriter.StartObjectProperty("stackTable");
+    {
+      {
+        JSONSchemaWriter schema(aWriter);
+        schema.WriteField("prefix");
+        schema.WriteField("frame");
+      }
+
+      aWriter.StartArrayProperty("data");
+      { uniqueStacks.SpliceStackTableElements(aWriter); }
+      aWriter.EndArray();
+    }
+    aWriter.EndObject();
+
+    aWriter.StartObjectProperty("frameTable");
+    {
+      {
+        JSONSchemaWriter schema(aWriter);
+        schema.WriteField("location");
+        schema.WriteField("relevantForJS");
+        schema.WriteField("innerWindowID");
+        schema.WriteField("implementation");
+        schema.WriteField("optimizations");
+        schema.WriteField("line");
+        schema.WriteField("column");
+        schema.WriteField("category");
+        schema.WriteField("subcategory");
+      }
+
+      aWriter.StartArrayProperty("data");
+      { uniqueStacks.SpliceFrameTableElements(aWriter); }
+      aWriter.EndArray();
+    }
+    aWriter.EndObject();
+
+    aWriter.StartArrayProperty("stringTable");
+    {
+      std::move(*uniqueStacks.mUniqueStrings)
+          .SpliceStringTableElements(aWriter);
+    }
+    aWriter.EndArray();
+  }
+  aWriter.End();
+
+  aWriter.ResetUniqueStrings();
+}
+
+int StreamSamplesAndMarkers(
+    const char* aName, int aThreadId, const ProfileBuffer& aBuffer,
+    SpliceableJSONWriter& aWriter, const std::string& aProcessName,
+    const std::string& aETLDplus1, const TimeStamp& aProcessStartTime,
+    const TimeStamp& aRegisterTime, const TimeStamp& aUnregisterTime,
+    double aSinceTime, UniqueStacks& aUniqueStacks) {
+  int processedThreadId = 0;
+
+  aWriter.StringProperty(
+      "processType",
+      "(unknown)" /* XRE_GeckoProcessTypeToString(XRE_GetProcessType()) */);
+
+  {
+    std::string name = aName;
+    // We currently need to distinguish threads output by Base Profiler from
+    // those in Gecko Profiler, as the frontend could get confused and lose
+    // tracks with the same name.
+    // TODO: As part of the profilers de-duplication, thread data from both
+    // profilers should end up in the same track, at which point this won't be
+    // necessary anymore. See meta bug 1557566.
+    name += " (pre-xul)";
+    aWriter.StringProperty("name", name);
+  }
+
+  // Use given process name (if any).
+  if (!aProcessName.empty()) {
+    aWriter.StringProperty("processName", aProcessName);
+  }
+  if (!aETLDplus1.empty()) {
+    aWriter.StringProperty("eTLD+1", aETLDplus1);
+  }
+
+  if (aRegisterTime) {
+    aWriter.DoubleProperty(
+        "registerTime", (aRegisterTime - aProcessStartTime).ToMilliseconds());
+  } else {
+    aWriter.NullProperty("registerTime");
+  }
+
+  if (aUnregisterTime) {
+    aWriter.DoubleProperty(
+        "unregisterTime",
+        (aUnregisterTime - aProcessStartTime).ToMilliseconds());
+  } else {
+    aWriter.NullProperty("unregisterTime");
+  }
+
+  aWriter.StartObjectProperty("samples");
+  {
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("stack");
+      schema.WriteField("time");
+      schema.WriteField("eventDelay");
+    }
+
+    aWriter.StartArrayProperty("data");
+    {
+      processedThreadId = aBuffer.StreamSamplesToJSON(
+          aWriter, aThreadId, aSinceTime, aUniqueStacks);
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+
+  aWriter.StartObjectProperty("markers");
+  {
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("name");
+      schema.WriteField("startTime");
+      schema.WriteField("endTime");
+      schema.WriteField("phase");
+      schema.WriteField("category");
+      schema.WriteField("data");
+    }
+
+    aWriter.StartArrayProperty("data");
+    {
+      aBuffer.StreamMarkersToJSON(aWriter, aThreadId, aProcessStartTime,
+                                  aSinceTime, aUniqueStacks);
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+
+  aWriter.IntProperty("pid",
+                      static_cast<int64_t>(profiler_current_process_id()));
+  aWriter.IntProperty(
+      "tid",
+      static_cast<int64_t>(aThreadId != 0 ? aThreadId : processedThreadId));
+
+  return processedThreadId;
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfiledThreadData.h b/mozglue/baseprofiler/core/ProfiledThreadData.h
new file mode 100644
index 0000000000..c45c02a7bb
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfiledThreadData.h
@@ -0,0 +1,119 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfiledThreadData_h
+#define ProfiledThreadData_h
+
+#include "BaseProfilingStack.h"
+#include "platform.h"
+#include "ProfileBufferEntry.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class ProfileBuffer;
+
+// This class contains information about a thread that is only relevant while
+// the profiler is running, for any threads (both alive and dead) whose thread
+// name matches the "thread filter" in the current profiler run.
+// ProfiledThreadData objects may be kept alive even after the thread is
+// unregistered, as long as there is still data for that thread in the profiler
+// buffer.
+//
+// Accesses to this class are protected by the profiler state lock.
+//
+// Created as soon as the following are true for the thread:
+//  - The profiler is running, and
+//  - the thread matches the profiler's thread filter, and
+//  - the thread is registered with the profiler.
+// So it gets created in response to either (1) the profiler being started (for
+// an existing registered thread) or (2) the thread being registered (if the
+// profiler is already running).
+//
+// The thread may be unregistered during the lifetime of ProfiledThreadData.
+// If that happens, NotifyUnregistered() is called.
+//
+// This class is the right place to store buffer positions. Profiler buffer
+// positions become invalid if the profiler buffer is destroyed, which happens
+// when the profiler is stopped.
+class ProfiledThreadData final {
+ public:
+  explicit ProfiledThreadData(ThreadInfo* aThreadInfo);
+  ~ProfiledThreadData();
+
+  void NotifyUnregistered(uint64_t aBufferPosition) {
+    mLastSample = Nothing();
+    MOZ_ASSERT(!mBufferPositionWhenReceivedJSContext,
+               "JSContext should have been cleared before the thread was "
+               "unregistered");
+    mUnregisterTime = TimeStamp::NowUnfuzzed();
+    mBufferPositionWhenUnregistered = Some(aBufferPosition);
+  }
+  Maybe<uint64_t> BufferPositionWhenUnregistered() {
+    return mBufferPositionWhenUnregistered;
+  }
+
+  Maybe<uint64_t>& LastSample() { return mLastSample; }
+
+  void StreamJSON(const ProfileBuffer& aBuffer, SpliceableJSONWriter& aWriter,
+                  const std::string& aProcessName,
+                  const std::string& aETLDplus1,
+                  const TimeStamp& aProcessStartTime, double aSinceTime);
+
+  const RefPtr<ThreadInfo> Info() const { return mThreadInfo; }
+
+  void NotifyReceivedJSContext(uint64_t aCurrentBufferPosition) {
+    mBufferPositionWhenReceivedJSContext = Some(aCurrentBufferPosition);
+  }
+
+ private:
+  // Group A:
+  // The following fields are interesting for the entire lifetime of a
+  // ProfiledThreadData object.
+
+  // This thread's thread info.
+  const RefPtr<ThreadInfo> mThreadInfo;
+
+  // Group B:
+  // The following fields are only used while this thread is alive and
+  // registered. They become Nothing() once the thread is unregistered.
+
+  // When sampling, this holds the position in ActivePS::mBuffer of the most
+  // recent sample for this thread, or Nothing() if there is no sample for this
+  // thread in the buffer.
+  Maybe<uint64_t> mLastSample;
+
+  // Only non-Nothing() if the thread currently has a JSContext.
+  Maybe<uint64_t> mBufferPositionWhenReceivedJSContext;
+
+  // Group C:
+  // The following fields are only used once this thread has been unregistered.
+
+  Maybe<uint64_t> mBufferPositionWhenUnregistered;
+  TimeStamp mUnregisterTime;
+};
+
+// Stream all samples and markers from aBuffer with the given aThreadId (or 0
+// for everything, which is assumed to be a single backtrace sample.)
+// Returns the thread id of the output sample(s), or 0 if none was present.
+int StreamSamplesAndMarkers(
+    const char* aName, int aThreadId, const ProfileBuffer& aBuffer,
+    SpliceableJSONWriter& aWriter, const std::string& aProcessName,
+    const std::string& aETLDplus1, const TimeStamp& aProcessStartTime,
+    const TimeStamp& aRegisterTime, const TimeStamp& aUnregisterTime,
+    double aSinceTime, UniqueStacks& aUniqueStacks);
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // ProfiledThreadData_h
diff --git a/mozglue/baseprofiler/core/ProfilerBacktrace.cpp b/mozglue/baseprofiler/core/ProfilerBacktrace.cpp
new file mode 100644
index 0000000000..166e72fd9c
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerBacktrace.cpp
@@ -0,0 +1,123 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerBacktrace.h"
+
+#include "BaseProfiler.h"
+#include "ProfileBuffer.h"
+#include "ProfiledThreadData.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/BaseProfileJSONWriter.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfilerBacktrace::ProfilerBacktrace(
+    const char* aName,
+    UniquePtr<ProfileChunkedBuffer> aProfileChunkedBufferStorage,
+    UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull /* = nullptr */)
+    : mName(aName),
+      mOptionalProfileChunkedBufferStorage(
+          std::move(aProfileChunkedBufferStorage)),
+      mProfileChunkedBuffer(mOptionalProfileChunkedBufferStorage.get()),
+      mOptionalProfileBufferStorage(std::move(aProfileBufferStorageOrNull)),
+      mProfileBuffer(mOptionalProfileBufferStorage.get()) {
+  if (mProfileBuffer) {
+    MOZ_RELEASE_ASSERT(mProfileChunkedBuffer,
+                       "If we take ownership of a ProfileBuffer, we must also "
+                       "receive ownership of a ProfileChunkedBuffer");
+    MOZ_RELEASE_ASSERT(
+        mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(),
+        "If we take ownership of a ProfileBuffer, we must also receive "
+        "ownership of its ProfileChunkedBuffer");
+  }
+  MOZ_ASSERT(
+      !mProfileChunkedBuffer || !mProfileChunkedBuffer->IsThreadSafe(),
+      "ProfilerBacktrace only takes a non-thread-safe ProfileChunkedBuffer");
+}
+
+ProfilerBacktrace::ProfilerBacktrace(
+    const char* aName,
+    ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull /* = nullptr */,
+    ProfileBuffer* aExternalProfileBufferOrNull /* = nullptr */)
+    : mName(aName),
+      mProfileChunkedBuffer(aExternalProfileChunkedBufferOrNull),
+      mProfileBuffer(aExternalProfileBufferOrNull) {
+  if (!mProfileChunkedBuffer) {
+    if (mProfileBuffer) {
+      // We don't have a ProfileChunkedBuffer but we have a ProfileBuffer, use
+      // the latter's ProfileChunkedBuffer.
+      mProfileChunkedBuffer = &mProfileBuffer->UnderlyingChunkedBuffer();
+      MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(),
+                 "ProfilerBacktrace only takes a non-thread-safe "
+                 "ProfileChunkedBuffer");
+    }
+  } else {
+    if (mProfileBuffer) {
+      MOZ_RELEASE_ASSERT(
+          mProfileChunkedBuffer == &mProfileBuffer->UnderlyingChunkedBuffer(),
+          "If we reference both ProfileChunkedBuffer and ProfileBuffer, they "
+          "must already be connected");
+    }
+    MOZ_ASSERT(!mProfileChunkedBuffer->IsThreadSafe(),
+               "ProfilerBacktrace only takes a non-thread-safe "
+               "ProfileChunkedBuffer");
+  }
+}
+
+ProfilerBacktrace::~ProfilerBacktrace() {}
+
+int ProfilerBacktrace::StreamJSON(SpliceableJSONWriter& aWriter,
+                                  const TimeStamp& aProcessStartTime,
+                                  UniqueStacks& aUniqueStacks) {
+  int processedThreadId = 0;
+
+  // Unlike ProfiledThreadData::StreamJSON, we don't need to call
+  // ProfileBuffer::AddJITInfoForRange because ProfileBuffer does not contain
+  // any JitReturnAddr entries. For synchronous samples, JIT frames get expanded
+  // at sample time.
+  if (mProfileBuffer) {
+    processedThreadId = StreamSamplesAndMarkers(
+        mName.c_str(), 0, *mProfileBuffer, aWriter, "", "", aProcessStartTime,
+        /* aRegisterTime */ TimeStamp(),
+        /* aUnregisterTime */ TimeStamp(),
+        /* aSinceTime */ 0, aUniqueStacks);
+  } else if (mProfileChunkedBuffer) {
+    ProfileBuffer profileBuffer(*mProfileChunkedBuffer);
+    processedThreadId = StreamSamplesAndMarkers(
+        mName.c_str(), 0, profileBuffer, aWriter, "", "", aProcessStartTime,
+        /* aRegisterTime */ TimeStamp(),
+        /* aUnregisterTime */ TimeStamp(),
+        /* aSinceTime */ 0, aUniqueStacks);
+  }
+  // If there are no buffers, the backtrace is empty and nothing is streamed.
+
+  return processedThreadId;
+}
+
+}  // namespace baseprofiler
+
+// static
+template <typename Destructor>
+UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>
+ProfileBufferEntryReader::
+    Deserializer<UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>>::Read(
+        ProfileBufferEntryReader& aER) {
+  auto profileChunkedBuffer = aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>();
+  if (!profileChunkedBuffer) {
+    return nullptr;
+  }
+  MOZ_ASSERT(
+      !profileChunkedBuffer->IsThreadSafe(),
+      "ProfilerBacktrace only stores non-thread-safe ProfileChunkedBuffers");
+  std::string name = aER.ReadObject<std::string>();
+  return UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>{
+      new baseprofiler::ProfilerBacktrace(name.c_str(),
+                                          std::move(profileChunkedBuffer))};
+};
+
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfilerBacktrace.h b/mozglue/baseprofiler/core/ProfilerBacktrace.h
new file mode 100644
index 0000000000..b5365cd4ac
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerBacktrace.h
@@ -0,0 +1,162 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __PROFILER_BACKTRACE_H
+#define __PROFILER_BACKTRACE_H
+
+#include "mozilla/UniquePtrExtensions.h"
+
+#include <string>
+
+namespace mozilla {
+
+class ProfileChunkedBuffer;
+class TimeStamp;
+
+namespace baseprofiler {
+
+class ProfileBuffer;
+class SpliceableJSONWriter;
+class ThreadInfo;
+class UniqueStacks;
+
+// ProfilerBacktrace encapsulates a synchronous sample.
+// It can work with a ProfileBuffer and/or a ProfileChunkedBuffer (if both, they
+// must already be linked together). The ProfileChunkedBuffer contains all the
+// data; the ProfileBuffer is not strictly needed, only provide it if it is
+// already available at the call site.
+// And these buffers can either be:
+// - owned here, so that the ProfilerBacktrace object can be kept for later
+//   use), OR
+// - referenced through pointers (in cases where the backtrace is immediately
+//   streamed out, so we only need temporary references to external buffers);
+//   these pointers may be null for empty backtraces.
+class ProfilerBacktrace {
+ public:
+  // Take ownership of external buffers and use them to keep, and to stream a
+  // backtrace. If a ProfileBuffer is given, its underlying chunked buffer must
+  // be provided as well.
+  explicit ProfilerBacktrace(
+      const char* aName,
+      UniquePtr<ProfileChunkedBuffer> aProfileChunkedBufferStorage,
+      UniquePtr<ProfileBuffer> aProfileBufferStorageOrNull = nullptr);
+
+  // Take pointers to external buffers and use them to stream a backtrace.
+  // If null, the backtrace is effectively empty.
+  // If both are provided, they must already be connected.
+  explicit ProfilerBacktrace(
+      const char* aName,
+      ProfileChunkedBuffer* aExternalProfileChunkedBufferOrNull = nullptr,
+      ProfileBuffer* aExternalProfileBufferOrNull = nullptr);
+
+  ~ProfilerBacktrace();
+
+  [[nodiscard]] bool IsEmpty() const {
+    return !mProfileChunkedBuffer ||
+           ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer>::Bytes(
+               *mProfileChunkedBuffer) <= ULEB128Size(0u);
+  }
+
+  // ProfilerBacktraces' stacks are deduplicated in the context of the
+  // profile that contains the backtrace as a marker payload.
+  //
+  // That is, markers that contain backtraces should not need their own stack,
+  // frame, and string tables. They should instead reuse their parent
+  // profile's tables.
+  int StreamJSON(SpliceableJSONWriter& aWriter,
+                 const TimeStamp& aProcessStartTime,
+                 UniqueStacks& aUniqueStacks);
+
+ private:
+  // Used to de/serialize a ProfilerBacktrace.
+  friend ProfileBufferEntryWriter::Serializer<ProfilerBacktrace>;
+  friend ProfileBufferEntryReader::Deserializer<ProfilerBacktrace>;
+
+  std::string mName;
+
+  // `ProfileChunkedBuffer` in which `mProfileBuffer` stores its data; must be
+  // located before `mProfileBuffer` so that it's destroyed after.
+  UniquePtr<ProfileChunkedBuffer> mOptionalProfileChunkedBufferStorage;
+  // If null, there is no need to check mProfileBuffer's (if present) underlying
+  // buffer because this is done when constructed.
+  ProfileChunkedBuffer* mProfileChunkedBuffer;
+
+  UniquePtr<ProfileBuffer> mOptionalProfileBufferStorage;
+  ProfileBuffer* mProfileBuffer;
+};
+
+}  // namespace baseprofiler
+
+// Format: [ UniquePtr<BlockRingsBuffer> | name ]
+// Initial len==0 marks a nullptr or empty backtrace.
+template <>
+struct ProfileBufferEntryWriter::Serializer<baseprofiler::ProfilerBacktrace> {
+  static Length Bytes(const baseprofiler::ProfilerBacktrace& aBacktrace) {
+    if (!aBacktrace.mProfileChunkedBuffer) {
+      // No buffer.
+      return ULEB128Size(0u);
+    }
+    auto bufferBytes = SumBytes(*aBacktrace.mProfileChunkedBuffer);
+    if (bufferBytes <= ULEB128Size(0u)) {
+      // Empty buffer.
+      return ULEB128Size(0u);
+    }
+    return bufferBytes + SumBytes(aBacktrace.mName);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const baseprofiler::ProfilerBacktrace& aBacktrace) {
+    if (!aBacktrace.mProfileChunkedBuffer ||
+        SumBytes(*aBacktrace.mProfileChunkedBuffer) <= ULEB128Size(0u)) {
+      // No buffer, or empty buffer.
+      aEW.WriteULEB128(0u);
+      return;
+    }
+    aEW.WriteObject(*aBacktrace.mProfileChunkedBuffer);
+    aEW.WriteObject(aBacktrace.mName);
+  }
+};
+
+template <typename Destructor>
+struct ProfileBufferEntryWriter::Serializer<
+    UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>> {
+  static Length Bytes(const UniquePtr<baseprofiler::ProfilerBacktrace,
+                                      Destructor>& aBacktrace) {
+    if (!aBacktrace) {
+      // Null backtrace pointer (treated like an empty backtrace).
+      return ULEB128Size(0u);
+    }
+    return SumBytes(*aBacktrace);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const UniquePtr<baseprofiler::ProfilerBacktrace,
+                                    Destructor>& aBacktrace) {
+    if (!aBacktrace) {
+      // Null backtrace pointer (treated like an empty backtrace).
+      aEW.WriteULEB128(0u);
+      return;
+    }
+    aEW.WriteObject(*aBacktrace);
+  }
+};
+
+template <typename Destructor>
+struct ProfileBufferEntryReader::Deserializer<
+    UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>> {
+  static void ReadInto(
+      ProfileBufferEntryReader& aER,
+      UniquePtr<baseprofiler::ProfilerBacktrace, Destructor>& aBacktrace) {
+    aBacktrace = Read(aER);
+  }
+
+  static UniquePtr<baseprofiler::ProfilerBacktrace, Destructor> Read(
+      ProfileBufferEntryReader& aER);
+};
+
+}  // namespace mozilla
+
+#endif  // __PROFILER_BACKTRACE_H
diff --git a/mozglue/baseprofiler/core/ProfilerMarkers.cpp b/mozglue/baseprofiler/core/ProfilerMarkers.cpp
new file mode 100644
index 0000000000..bff2a9ebdd
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilerMarkers.cpp
@@ -0,0 +1,221 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/BaseProfilerMarkers.h"
+
+#include "mozilla/Likely.h"
+
+#include <limits>
+
+namespace mozilla {
+namespace base_profiler_markers_detail {
+
+// We need an atomic type that can hold a `DeserializerTag`. (Atomic doesn't
+// work with too-small types.)
+using DeserializerTagAtomic = unsigned;
+
+// Number of currently-registered deserializers and other marker type functions.
+static Atomic<DeserializerTagAtomic, MemoryOrdering::Relaxed>
+    sDeserializerCount{0};
+
+// This needs to be big enough to handle all possible marker types. If one day
+// this needs to be higher, the underlying DeserializerTag type will have to be
+// changed.
+static constexpr DeserializerTagAtomic DeserializerMax = 250;
+
+static_assert(
+    DeserializerMax <= std::numeric_limits<Streaming::DeserializerTag>::max(),
+    "The maximum number of deserializers must fit in the DeserializerTag type");
+
+// Array of marker type functions.
+// 1-based, i.e.: [0] -> tag 1, [DeserializerMax - 1] -> tag DeserializerMax.
+// Elements are added at the next available atomically-incremented
+// `sDeserializerCount` (minus 1) whenever a new marker type is used in a
+// Firefox session; the content is kept between profiler runs in that session.
+// There is theoretically a race between the increment and the time the entry is
+// fully written, but in practice all new elements are written (during
+// profiling, using a marker type for the first time) long before they are read
+// (after profiling is paused).
+static Streaming::MarkerTypeFunctions
+    sMarkerTypeFunctions1Based[DeserializerMax];
+
+/* static */ Streaming::DeserializerTag Streaming::TagForMarkerTypeFunctions(
+    Streaming::MarkerDataDeserializer aDeserializer,
+    Streaming::MarkerTypeNameFunction aMarkerTypeNameFunction,
+    Streaming::MarkerSchemaFunction aMarkerSchemaFunction) {
+  MOZ_RELEASE_ASSERT(!!aDeserializer);
+  MOZ_RELEASE_ASSERT(!!aMarkerTypeNameFunction);
+  MOZ_RELEASE_ASSERT(!!aMarkerSchemaFunction);
+
+  DeserializerTagAtomic tag = ++sDeserializerCount;
+  MOZ_RELEASE_ASSERT(
+      tag <= DeserializerMax,
+      "Too many deserializers, consider increasing DeserializerMax. "
+      "Or is a deserializer stored again and again?");
+  sMarkerTypeFunctions1Based[tag - 1] = {aDeserializer, aMarkerTypeNameFunction,
+                                         aMarkerSchemaFunction};
+
+  return static_cast<DeserializerTag>(tag);
+}
+
+/* static */ Streaming::MarkerDataDeserializer Streaming::DeserializerForTag(
+    Streaming::DeserializerTag aTag) {
+  MOZ_RELEASE_ASSERT(
+      aTag > 0 && static_cast<DeserializerTagAtomic>(aTag) <=
+                      static_cast<DeserializerTagAtomic>(sDeserializerCount),
+      "Out-of-range tag value");
+  return sMarkerTypeFunctions1Based[aTag - 1].mMarkerDataDeserializer;
+}
+
+/* static */ Span<const Streaming::MarkerTypeFunctions>
+Streaming::MarkerTypeFunctionsArray() {
+  return {sMarkerTypeFunctions1Based, sDeserializerCount};
+}
+
+}  // namespace base_profiler_markers_detail
+
+void MarkerSchema::Stream(JSONWriter& aWriter,
+                          const Span<const char>& aName) && {
+  // The caller should have started a JSON array, in which we can add an object
+  // that defines a marker schema.
+
+  if (mLocations.empty()) {
+    // SpecialFrontendLocation case, don't output anything for this type.
+    return;
+  }
+
+  aWriter.StartObjectElement();
+  {
+    aWriter.StringProperty("name", aName);
+
+    if (!mChartLabel.empty()) {
+      aWriter.StringProperty("chartLabel", mChartLabel);
+    }
+
+    if (!mTooltipLabel.empty()) {
+      aWriter.StringProperty("tooltipLabel", mTooltipLabel);
+    }
+
+    if (!mTableLabel.empty()) {
+      aWriter.StringProperty("tableLabel", mTableLabel);
+    }
+
+    aWriter.StartArrayProperty("display");
+    {
+      for (Location location : mLocations) {
+        aWriter.StringElement(LocationToStringSpan(location));
+      }
+    }
+    aWriter.EndArray();
+
+    aWriter.StartArrayProperty("data");
+    {
+      for (const DataRow& row : mData) {
+        aWriter.StartObjectElement();
+        {
+          row.match(
+              [&aWriter](const DynamicData& aData) {
+                aWriter.StringProperty("key", aData.mKey);
+                if (aData.mLabel) {
+                  aWriter.StringProperty("label", *aData.mLabel);
+                }
+                aWriter.StringProperty("format",
+                                       FormatToStringSpan(aData.mFormat));
+                if (aData.mSearchable) {
+                  aWriter.BoolProperty(
+                      "searchable",
+                      *aData.mSearchable == Searchable::searchable);
+                }
+              },
+              [&aWriter](const StaticData& aStaticData) {
+                aWriter.StringProperty("label", aStaticData.mLabel);
+                aWriter.StringProperty("value", aStaticData.mValue);
+              });
+        }
+        aWriter.EndObject();
+      }
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+}
+
+/* static */
+Span<const char> MarkerSchema::LocationToStringSpan(
+    MarkerSchema::Location aLocation) {
+  switch (aLocation) {
+    case Location::markerChart:
+      return mozilla::MakeStringSpan("marker-chart");
+    case Location::markerTable:
+      return mozilla::MakeStringSpan("marker-table");
+    case Location::timelineOverview:
+      return mozilla::MakeStringSpan("timeline-overview");
+    case Location::timelineMemory:
+      return mozilla::MakeStringSpan("timeline-memory");
+    case Location::timelineIPC:
+      return mozilla::MakeStringSpan("timeline-ipc");
+    case Location::timelineFileIO:
+      return mozilla::MakeStringSpan("timeline-fileio");
+    case Location::stackChart:
+      return mozilla::MakeStringSpan("stack-chart");
+    default:
+      MOZ_CRASH("Unexpected Location enum");
+      return {};
+  }
+}
+
+/* static */
+Span<const char> MarkerSchema::FormatToStringSpan(
+    MarkerSchema::Format aFormat) {
+  switch (aFormat) {
+    case Format::url:
+      return mozilla::MakeStringSpan("url");
+    case Format::filePath:
+      return mozilla::MakeStringSpan("file-path");
+    case Format::string:
+      return mozilla::MakeStringSpan("string");
+    case Format::duration:
+      return mozilla::MakeStringSpan("duration");
+    case Format::time:
+      return mozilla::MakeStringSpan("time");
+    case Format::seconds:
+      return mozilla::MakeStringSpan("seconds");
+    case Format::milliseconds:
+      return mozilla::MakeStringSpan("milliseconds");
+    case Format::microseconds:
+      return mozilla::MakeStringSpan("microseconds");
+    case Format::nanoseconds:
+      return mozilla::MakeStringSpan("nanoseconds");
+    case Format::bytes:
+      return mozilla::MakeStringSpan("bytes");
+    case Format::percentage:
+      return mozilla::MakeStringSpan("percentage");
+    case Format::integer:
+      return mozilla::MakeStringSpan("integer");
+    case Format::decimal:
+      return mozilla::MakeStringSpan("decimal");
+    default:
+      MOZ_CRASH("Unexpected Format enum");
+      return {};
+  }
+}
+
+}  // namespace mozilla
+
+namespace mozilla::baseprofiler {
+template MFBT_API ProfileBufferBlockIndex AddMarker(const ProfilerString8View&,
+                                                    const MarkerCategory&,
+                                                    MarkerOptions&&,
+                                                    markers::TextMarker,
+                                                    const std::string&);
+
+template MFBT_API ProfileBufferBlockIndex
+AddMarkerToBuffer(ProfileChunkedBuffer&, const ProfilerString8View&,
+                  const MarkerCategory&, MarkerOptions&&, markers::NoPayload);
+
+template MFBT_API ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer&, const ProfilerString8View&, const MarkerCategory&,
+    MarkerOptions&&, markers::TextMarker, const std::string&);
+}  // namespace mozilla::baseprofiler
diff --git a/mozglue/baseprofiler/core/ProfilingCategory.cpp b/mozglue/baseprofiler/core/ProfilingCategory.cpp
new file mode 100644
index 0000000000..8ff2b15555
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilingCategory.cpp
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilingCategory.h"
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Assertions.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// clang-format off
+
+// ProfilingSubcategory_X:
+// One enum for each category X, listing that category's subcategories. This
+// allows the sProfilingCategoryInfo macro construction below to look up a
+// per-category index for a subcategory.
+#define SUBCATEGORY_ENUMS_BEGIN_CATEGORY(name, labelAsString, color) \
+  enum class ProfilingSubcategory_##name : uint32_t {
+#define SUBCATEGORY_ENUMS_SUBCATEGORY(category, name, labelAsString) \
+    name,
+#define SUBCATEGORY_ENUMS_END_CATEGORY \
+  };
+MOZ_PROFILING_CATEGORY_LIST(SUBCATEGORY_ENUMS_BEGIN_CATEGORY,
+                            SUBCATEGORY_ENUMS_SUBCATEGORY,
+                            SUBCATEGORY_ENUMS_END_CATEGORY)
+#undef SUBCATEGORY_ENUMS_BEGIN_CATEGORY
+#undef SUBCATEGORY_ENUMS_SUBCATEGORY
+#undef SUBCATEGORY_ENUMS_END_CATEGORY
+
+// sProfilingCategoryPairInfo:
+// A list of ProfilingCategoryPairInfos with the same order as
+// ProfilingCategoryPair, which can be used to map a ProfilingCategoryPair to
+// its information.
+#define CATEGORY_INFO_BEGIN_CATEGORY(name, labelAsString, color)
+#define CATEGORY_INFO_SUBCATEGORY(category, name, labelAsString) \
+  {ProfilingCategory::category,                                  \
+   uint32_t(ProfilingSubcategory_##category::name), labelAsString},
+#define CATEGORY_INFO_END_CATEGORY
+const ProfilingCategoryPairInfo sProfilingCategoryPairInfo[] = {
+  MOZ_PROFILING_CATEGORY_LIST(CATEGORY_INFO_BEGIN_CATEGORY,
+                              CATEGORY_INFO_SUBCATEGORY,
+                              CATEGORY_INFO_END_CATEGORY)
+};
+#undef CATEGORY_INFO_BEGIN_CATEGORY
+#undef CATEGORY_INFO_SUBCATEGORY
+#undef CATEGORY_INFO_END_CATEGORY
+
+// clang-format on
+
+const ProfilingCategoryPairInfo& GetProfilingCategoryPairInfo(
+    ProfilingCategoryPair aCategoryPair) {
+  static_assert(
+      MOZ_ARRAY_LENGTH(sProfilingCategoryPairInfo) ==
+          uint32_t(ProfilingCategoryPair::COUNT),
+      "sProfilingCategoryPairInfo and ProfilingCategory need to have the "
+      "same order and the same length");
+
+  uint32_t categoryPairIndex = uint32_t(aCategoryPair);
+  MOZ_RELEASE_ASSERT(categoryPairIndex <=
+                     uint32_t(ProfilingCategoryPair::LAST));
+  return sProfilingCategoryPairInfo[categoryPairIndex];
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/ProfilingStack.cpp b/mozglue/baseprofiler/core/ProfilingStack.cpp
new file mode 100644
index 0000000000..f5cd2ddd04
--- /dev/null
+++ b/mozglue/baseprofiler/core/ProfilingStack.cpp
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilingStack.h"
+
+#include <algorithm>
+
+#include "mozilla/IntegerRange.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/UniquePtrExtensions.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+ProfilingStack::~ProfilingStack() {
+  // The label macros keep a reference to the ProfilingStack to avoid a TLS
+  // access. If these are somehow not all cleared we will get a
+  // use-after-free so better to crash now.
+  MOZ_RELEASE_ASSERT(stackPointer == 0);
+
+  delete[] frames;
+}
+
+void ProfilingStack::ensureCapacitySlow() {
+  MOZ_ASSERT(stackPointer >= capacity);
+  const uint32_t kInitialCapacity = 128;
+
+  uint32_t sp = stackPointer;
+  auto newCapacity =
+      std::max(sp + 1, capacity ? capacity * 2 : kInitialCapacity);
+
+  auto* newFrames = new ProfilingStackFrame[newCapacity];
+
+  // It's important that `frames` / `capacity` / `stackPointer` remain
+  // consistent here at all times.
+  for (auto i : IntegerRange(capacity)) {
+    newFrames[i] = frames[i];
+  }
+
+  ProfilingStackFrame* oldFrames = frames;
+  frames = newFrames;
+  capacity = newCapacity;
+  delete[] oldFrames;
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/RegisteredThread.cpp b/mozglue/baseprofiler/core/RegisteredThread.cpp
new file mode 100644
index 0000000000..85a7fc2c6d
--- /dev/null
+++ b/mozglue/baseprofiler/core/RegisteredThread.cpp
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "RegisteredThread.h"
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+RegisteredThread::RegisteredThread(ThreadInfo* aInfo, void* aStackTop)
+    : mRacyRegisteredThread(aInfo->ThreadId()),
+      mPlatformData(AllocPlatformData(aInfo->ThreadId())),
+      mStackTop(aStackTop),
+      mThreadInfo(aInfo) {
+  // We don't have to guess on mac
+#if defined(GP_OS_darwin)
+  pthread_t self = pthread_self();
+  mStackTop = pthread_get_stackaddr_np(self);
+#endif
+}
+
+RegisteredThread::~RegisteredThread() {}
+
+size_t RegisteredThread::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+  size_t n = aMallocSizeOf(this);
+
+  // Measurement of the following members may be added later if DMD finds it
+  // is worthwhile:
+  // - mPlatformData
+  //
+  // The following members are not measured:
+  // - mThreadInfo: because it is non-owning
+
+  return n;
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/RegisteredThread.h b/mozglue/baseprofiler/core/RegisteredThread.h
new file mode 100644
index 0000000000..6ae12b823f
--- /dev/null
+++ b/mozglue/baseprofiler/core/RegisteredThread.h
@@ -0,0 +1,166 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RegisteredThread_h
+#define RegisteredThread_h
+
+#include "platform.h"
+#include "ThreadInfo.h"
+
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// This class contains the state for a single thread that is accessible without
+// protection from gPSMutex in platform.cpp. Because there is no external
+// protection against data races, it must provide internal protection. Hence
+// the "Racy" prefix.
+//
+class RacyRegisteredThread final {
+ public:
+  explicit RacyRegisteredThread(int aThreadId)
+      : mThreadId(aThreadId), mSleep(AWAKE), mIsBeingProfiled(false) {}
+
+  ~RacyRegisteredThread() {}
+
+  void SetIsBeingProfiled(bool aIsBeingProfiled) {
+    mIsBeingProfiled = aIsBeingProfiled;
+  }
+
+  bool IsBeingProfiled() const { return mIsBeingProfiled; }
+
+  // This is called on every profiler restart. Put things that should happen at
+  // that time here.
+  void ReinitializeOnResume() {
+    // This is needed to cause an initial sample to be taken from sleeping
+    // threads that had been observed prior to the profiler stopping and
+    // restarting. Otherwise sleeping threads would not have any samples to
+    // copy forward while sleeping.
+    (void)mSleep.compareExchange(SLEEPING_OBSERVED, SLEEPING_NOT_OBSERVED);
+  }
+
+  // This returns true for the second and subsequent calls in each sleep cycle.
+  bool CanDuplicateLastSampleDueToSleep() {
+    if (mSleep == AWAKE) {
+      return false;
+    }
+
+    if (mSleep.compareExchange(SLEEPING_NOT_OBSERVED, SLEEPING_OBSERVED)) {
+      return false;
+    }
+
+    return true;
+  }
+
+  // Call this whenever the current thread sleeps. Calling it twice in a row
+  // without an intervening setAwake() call is an error.
+  void SetSleeping() {
+    MOZ_ASSERT(mSleep == AWAKE);
+    mSleep = SLEEPING_NOT_OBSERVED;
+  }
+
+  // Call this whenever the current thread wakes. Calling it twice in a row
+  // without an intervening setSleeping() call is an error.
+  void SetAwake() {
+    MOZ_ASSERT(mSleep != AWAKE);
+    mSleep = AWAKE;
+  }
+
+  bool IsSleeping() { return mSleep != AWAKE; }
+
+  int ThreadId() const { return mThreadId; }
+
+  class ProfilingStack& ProfilingStack() {
+    return mProfilingStack;
+  }
+  const class ProfilingStack& ProfilingStack() const { return mProfilingStack; }
+
+ private:
+  class ProfilingStack mProfilingStack;
+
+  // mThreadId contains the thread ID of the current thread. It is safe to read
+  // this from multiple threads concurrently, as it will never be mutated.
+  const int mThreadId;
+
+  // mSleep tracks whether the thread is sleeping, and if so, whether it has
+  // been previously observed. This is used for an optimization: in some cases,
+  // when a thread is asleep, we duplicate the previous sample, which is
+  // cheaper than taking a new sample.
+  //
+  // mSleep is atomic because it is accessed from multiple threads.
+  //
+  // - It is written only by this thread, via setSleeping() and setAwake().
+  //
+  // - It is read by SamplerThread::Run().
+  //
+  // There are two cases where racing between threads can cause an issue.
+  //
+  // - If CanDuplicateLastSampleDueToSleep() returns false but that result is
+  //   invalidated before being acted upon, we will take a full sample
+  //   unnecessarily. This is additional work but won't cause any correctness
+  //   issues. (In actual fact, this case is impossible. In order to go from
+  //   CanDuplicateLastSampleDueToSleep() returning false to it returning true
+  //   requires an intermediate call to it in order for mSleep to go from
+  //   SLEEPING_NOT_OBSERVED to SLEEPING_OBSERVED.)
+  //
+  // - If CanDuplicateLastSampleDueToSleep() returns true but that result is
+  //   invalidated before being acted upon -- i.e. the thread wakes up before
+  //   DuplicateLastSample() is called -- we will duplicate the previous
+  //   sample. This is inaccurate, but only slightly... we will effectively
+  //   treat the thread as having slept a tiny bit longer than it really did.
+  //
+  // This latter inaccuracy could be avoided by moving the
+  // CanDuplicateLastSampleDueToSleep() check within the thread-freezing code,
+  // e.g. the section where Tick() is called. But that would reduce the
+  // effectiveness of the optimization because more code would have to be run
+  // before we can tell that duplication is allowed.
+  //
+  static const int AWAKE = 0;
+  static const int SLEEPING_NOT_OBSERVED = 1;
+  static const int SLEEPING_OBSERVED = 2;
+  Atomic<int> mSleep;
+
+  // Is this thread being profiled? (e.g., should markers be recorded?)
+  Atomic<bool, MemoryOrdering::Relaxed> mIsBeingProfiled;
+};
+
+// This class contains information that's relevant to a single thread only
+// while that thread is running and registered with the profiler, but
+// regardless of whether the profiler is running. All accesses to it are
+// protected by the profiler state lock.
+class RegisteredThread final {
+ public:
+  RegisteredThread(ThreadInfo* aInfo, void* aStackTop);
+  ~RegisteredThread();
+
+  class RacyRegisteredThread& RacyRegisteredThread() {
+    return mRacyRegisteredThread;
+  }
+  const class RacyRegisteredThread& RacyRegisteredThread() const {
+    return mRacyRegisteredThread;
+  }
+
+  PlatformData* GetPlatformData() const { return mPlatformData.get(); }
+  const void* StackTop() const { return mStackTop; }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const;
+
+  const RefPtr<ThreadInfo> Info() const { return mThreadInfo; }
+
+ private:
+  class RacyRegisteredThread mRacyRegisteredThread;
+
+  const UniquePlatformData mPlatformData;
+  const void* mStackTop;
+
+  const RefPtr<ThreadInfo> mThreadInfo;
+};
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // RegisteredThread_h
diff --git a/mozglue/baseprofiler/core/ThreadInfo.h b/mozglue/baseprofiler/core/ThreadInfo.h
new file mode 100644
index 0000000000..4be84a45a9
--- /dev/null
+++ b/mozglue/baseprofiler/core/ThreadInfo.h
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ThreadInfo_h
+#define ThreadInfo_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/TimeStamp.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// This class contains information about a thread which needs to be stored
+// across restarts of the profiler and which can be useful even after the
+// thread has stopped running.
+// It uses threadsafe refcounting and only contains immutable data.
+class ThreadInfo final {
+ public:
+  ThreadInfo(const char* aName, int aThreadId, bool aIsMainThread,
+             const TimeStamp& aRegisterTime = TimeStamp::NowUnfuzzed())
+      : mName(aName),
+        mRegisterTime(aRegisterTime),
+        mThreadId(aThreadId),
+        mIsMainThread(aIsMainThread),
+        mRefCnt(0) {
+    // I don't know if we can assert this. But we should warn.
+    MOZ_ASSERT(aThreadId >= 0, "native thread ID is < 0");
+    MOZ_ASSERT(aThreadId <= INT32_MAX, "native thread ID is > INT32_MAX");
+  }
+
+  // Using hand-rolled ref-counting, because RefCounted.h macros don't produce
+  // the same code between mozglue and libxul, see bug 1536656.
+  MFBT_API void AddRef() const { ++mRefCnt; }
+  MFBT_API void Release() const {
+    MOZ_ASSERT(int32_t(mRefCnt) > 0);
+    if (--mRefCnt == 0) {
+      delete this;
+    }
+  }
+
+  const char* Name() const { return mName.c_str(); }
+  TimeStamp RegisterTime() const { return mRegisterTime; }
+  int ThreadId() const { return mThreadId; }
+  bool IsMainThread() const { return mIsMainThread; }
+
+ private:
+  const std::string mName;
+  const TimeStamp mRegisterTime;
+  const int mThreadId;
+  const bool mIsMainThread;
+
+  mutable Atomic<int32_t, MemoryOrdering::ReleaseAcquire> mRefCnt;
+};
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // ThreadInfo_h
diff --git a/mozglue/baseprofiler/core/VTuneProfiler.cpp b/mozglue/baseprofiler/core/VTuneProfiler.cpp
new file mode 100644
index 0000000000..2911c39f08
--- /dev/null
+++ b/mozglue/baseprofiler/core/VTuneProfiler.cpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef XP_WIN
+#  undef UNICODE
+#  undef _UNICODE
+#endif
+
+#include "VTuneProfiler.h"
+
+#include <memory>
+
+#include "BaseProfiler.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+VTuneProfiler* VTuneProfiler::mInstance = nullptr;
+
+void VTuneProfiler::Initialize() {
+  // This is just a 'dirty trick' to find out if the ittnotify DLL was found.
+  // If it wasn't this function always returns 0, otherwise it returns
+  // incrementing numbers, if the library was found this wastes 2 events but
+  // that should be okay.
+  // TODO re-implement here if vtune is needed
+  // __itt_event testEvent =
+  //     __itt_event_create("Test event", strlen("Test event"));
+  // testEvent = __itt_event_create("Test event 2", strlen("Test event 2"));
+
+  // if (testEvent) {
+  //   mInstance = new VTuneProfiler();
+  // }
+}
+
+void VTuneProfiler::Shutdown() {}
+
+void VTuneProfiler::TraceInternal(const char* aName, TracingKind aKind) {
+  // TODO re-implement here if vtune is needed
+  // std::string str(aName);
+
+  // auto iter = mStrings.find(str);
+
+  // __itt_event event;
+  // if (iter != mStrings.end()) {
+  //   event = iter->second;
+  // } else {
+  //   event = __itt_event_create(aName, str.length());
+  //   mStrings.insert({str, event});
+  // }
+
+  // if (aKind == TRACING_INTERVAL_START || aKind == TRACING_EVENT) {
+  //   // VTune will consider starts not matched with an end to be single point
+  //   in
+  //   // time events.
+  //   __itt_event_start(event);
+  // } else {
+  //   __itt_event_end(event);
+  // }
+}
+
+void VTuneProfiler::RegisterThreadInternal(const char* aName) {
+  // TODO re-implement here if vtune is needed
+  // std::string str(aName);
+
+  // if (!str.compare("Main Thread (Base Profiler)")) {
+  //   // Process main thread.
+  //   switch (XRE_GetProcessType()) {
+  //     case GeckoProcessType::GeckoProcessType_Default:
+  //       __itt_thread_set_name("Main Process");
+  //       break;
+  //     case GeckoProcessType::GeckoProcessType_Content:
+  //       __itt_thread_set_name("Content Process");
+  //       break;
+  //     case GeckoProcessType::GeckoProcessType_GMPlugin:
+  //       __itt_thread_set_name("Plugin Process");
+  //       break;
+  //     case GeckoProcessType::GeckoProcessType_GPU:
+  //       __itt_thread_set_name("GPU Process");
+  //       break;
+  //     default:
+  //       __itt_thread_set_name("Unknown Process");
+  //   }
+  //   return;
+  // }
+  // __itt_thread_set_name(aName);
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/VTuneProfiler.h b/mozglue/baseprofiler/core/VTuneProfiler.h
new file mode 100644
index 0000000000..cf94ab7242
--- /dev/null
+++ b/mozglue/baseprofiler/core/VTuneProfiler.h
@@ -0,0 +1,84 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef VTuneProfiler_h
+#define VTuneProfiler_h
+
+// The intent here is to add 0 overhead for regular users. In order to build
+// the VTune profiler code at all --enable-vtune-instrumentation needs to be
+// set as a build option. Even then, when none of the environment variables
+// is specified that allow us to find the ittnotify DLL, these functions
+// should be minimal overhead. When starting Firefox under VTune, these
+// env vars will be automatically defined, otherwise INTEL_LIBITTNOTIFY32/64
+// should be set to point at the ittnotify DLL.
+#ifndef MOZ_VTUNE_INSTRUMENTATION
+
+#  define VTUNE_INIT()
+#  define VTUNE_SHUTDOWN()
+
+#  define VTUNE_TRACING(name, kind)
+#  define VTUNE_REGISTER_THREAD(name)
+
+#else
+
+#  include "BaseProfiler.h"
+
+// This is the regular Intel header, these functions are actually defined for
+// us inside js/src/vtune by an intel C file which actually dynamically resolves
+// them to the correct DLL. Through libxul these will 'magically' resolve.
+#  include "vtune/ittnotify.h"
+
+#  include <stddef.h>
+#  include <unordered_map>
+#  include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class VTuneProfiler {
+ public:
+  static void Initialize();
+  static void Shutdown();
+
+  enum TracingKind {
+    TRACING_EVENT,
+    TRACING_INTERVAL_START,
+    TRACING_INTERVAL_END,
+  };
+
+  static void Trace(const char* aName, TracingKind aKind) {
+    if (mInstance) {
+      mInstance->TraceInternal(aName, aKind);
+    }
+  }
+  static void RegisterThread(const char* aName) {
+    if (mInstance) {
+      mInstance->RegisterThreadInternal(aName);
+    }
+  }
+
+ private:
+  void TraceInternal(const char* aName, TracingKind aKind);
+  void RegisterThreadInternal(const char* aName);
+
+  // This is null when the ittnotify DLL could not be found.
+  static VTuneProfiler* mInstance;
+
+  std::unordered_map<std::string, __itt_event> mStrings;
+};
+
+#  define VTUNE_INIT() VTuneProfiler::Initialize()
+#  define VTUNE_SHUTDOWN() VTuneProfiler::Shutdown()
+
+#  define VTUNE_TRACING(name, kind) VTuneProfiler::Trace(name, kind)
+#  define VTUNE_REGISTER_THREAD(name) VTuneProfiler::RegisterThread(name)
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif
+
+#endif /* VTuneProfiler_h */
diff --git a/mozglue/baseprofiler/core/platform-linux-android.cpp b/mozglue/baseprofiler/core/platform-linux-android.cpp
new file mode 100644
index 0000000000..210bc4dd31
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-linux-android.cpp
@@ -0,0 +1,550 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+// This file is used for both Linux and Android.
+
+#include <stdio.h>
+#include <math.h>
+
+#include <pthread.h>
+#if defined(GP_OS_freebsd)
+#  include <sys/thr.h>
+#endif
+#include <semaphore.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <ucontext.h>
+// Ubuntu Dapper requires memory pages to be marked as
+// executable. Otherwise, OS raises an exception when executing code
+// in that page.
+#include <sys/types.h>  // mmap & munmap
+#include <sys/mman.h>   // mmap & munmap
+#include <sys/stat.h>   // open
+#include <fcntl.h>      // open
+#include <unistd.h>     // sysconf
+#include <semaphore.h>
+#ifdef __GLIBC__
+#  include <execinfo.h>  // backtrace, backtrace_symbols
+#endif                   // def __GLIBC__
+#include <strings.h>     // index
+#include <errno.h>
+#include <stdarg.h>
+
+#include "prenv.h"
+#include "mozilla/PodOperations.h"
+#include "mozilla/DebugOnly.h"
+
+#include <string.h>
+#include <list>
+
+using namespace mozilla;
+
+namespace mozilla {
+namespace baseprofiler {
+
+int profiler_current_process_id() { return getpid(); }
+
+int profiler_current_thread_id() {
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+  // glibc doesn't provide a wrapper for gettid().
+  return static_cast<int>(static_cast<pid_t>(syscall(SYS_gettid)));
+#elif defined(GP_OS_freebsd)
+  long id;
+  (void)thr_self(&id);
+  return static_cast<int>(id);
+#else
+#  error "bad platform"
+#endif
+}
+
+static int64_t MicrosecondsSince1970() {
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec);
+}
+
+void* GetStackTop(void* aGuess) { return aGuess; }
+
+static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) {
+  aRegs.mContext = aContext;
+  mcontext_t& mcontext = aContext->uc_mcontext;
+
+  // Extracting the sample from the context is extremely machine dependent.
+#if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
+  aRegs.mLR = 0;
+#elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
+  aRegs.mLR = 0;
+#elif defined(GP_PLAT_amd64_freebsd)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_rip);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_rsp);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_rbp);
+  aRegs.mLR = 0;
+#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
+  aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
+#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
+  aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
+#elif defined(GP_PLAT_arm64_freebsd)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_elr);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_sp);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[29]);
+  aRegs.mLR = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_lr);
+#elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android)
+  aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
+  aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
+  aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
+
+#else
+#  error "bad platform"
+#endif
+}
+
+#if defined(GP_OS_android)
+#  define SYS_tgkill __NR_tgkill
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+int tgkill(pid_t tgid, pid_t tid, int signalno) {
+  return syscall(SYS_tgkill, tgid, tid, signalno);
+}
+#endif
+
+#if defined(GP_OS_freebsd)
+#  define tgkill thr_kill2
+#endif
+
+class PlatformData {
+ public:
+  explicit PlatformData(int aThreadId) {}
+
+  ~PlatformData() {}
+};
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+// The only way to reliably interrupt a Linux thread and inspect its register
+// and stack state is by sending a signal to it, and doing the work inside the
+// signal handler.  But we don't want to run much code inside the signal
+// handler, since POSIX severely restricts what we can do in signal handlers.
+// So we use a system of semaphores to suspend the thread and allow the
+// sampler thread to do all the work of unwinding and copying out whatever
+// data it wants.
+//
+// A four-message protocol is used to reliably suspend and later resume the
+// thread to be sampled (the samplee):
+//
+// Sampler (signal sender) thread              Samplee (thread to be sampled)
+//
+// Prepare the SigHandlerCoordinator
+// and point sSigHandlerCoordinator at it
+//
+// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
+// wait(mMessage2)                             Copy register state
+//                                               into sSigHandlerCoordinator
+//                         <------ MSG 2 ----- post(mMessage2)
+// Samplee is now suspended.                   wait(mMessage3)
+//   Examine its stack/register
+//   state at leisure
+//
+// Release samplee:
+//   post(mMessage3)       ------- MSG 3 ----->
+// wait(mMessage4)                              Samplee now resumes.  Tell
+//                                                the sampler that we are done.
+//                         <------ MSG 4 ------ post(mMessage4)
+// Now we know the samplee's signal             (leave signal handler)
+//   handler has finished using
+//   sSigHandlerCoordinator.  We can
+//   safely reuse it for some other thread.
+//
+
+// A type used to coordinate between the sampler (signal sending) thread and
+// the thread currently being sampled (the samplee, which receives the
+// signals).
+//
+// The first message is sent using a SIGPROF signal delivery.  The subsequent
+// three are sent using sem_wait/sem_post pairs.  They are named accordingly
+// in the following struct.
+struct SigHandlerCoordinator {
+  SigHandlerCoordinator() {
+    PodZero(&mUContext);
+    int r = sem_init(&mMessage2, /* pshared */ 0, 0);
+    r |= sem_init(&mMessage3, /* pshared */ 0, 0);
+    r |= sem_init(&mMessage4, /* pshared */ 0, 0);
+    MOZ_ASSERT(r == 0);
+  }
+
+  ~SigHandlerCoordinator() {
+    int r = sem_destroy(&mMessage2);
+    r |= sem_destroy(&mMessage3);
+    r |= sem_destroy(&mMessage4);
+    MOZ_ASSERT(r == 0);
+  }
+
+  sem_t mMessage2;       // To sampler: "context is in sSigHandlerCoordinator"
+  sem_t mMessage3;       // To samplee: "resume"
+  sem_t mMessage4;       // To sampler: "finished with sSigHandlerCoordinator"
+  ucontext_t mUContext;  // Context at signal
+};
+
+struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
+
+static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
+  // Avoid TSan warning about clobbering errno.
+  int savedErrno = errno;
+
+  MOZ_ASSERT(aSignal == SIGPROF);
+  MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
+
+  // By sending us this signal, the sampler thread has sent us message 1 in
+  // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
+  // for use, please copy your register context into it."
+  Sampler::sSigHandlerCoordinator->mUContext =
+      *static_cast<ucontext_t*>(aContext);
+
+  // Send message 2: tell the sampler thread that the context has been copied
+  // into |sSigHandlerCoordinator->mUContext|.  sem_post can never fail by
+  // being interrupted by a signal, so there's no loop around this call.
+  int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
+  MOZ_ASSERT(r == 0);
+
+  // At this point, the sampler thread assumes we are suspended, so we must
+  // not touch any global state here.
+
+  // Wait for message 3: the sampler thread tells us to resume.
+  while (true) {
+    r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
+    if (r == -1 && errno == EINTR) {
+      // Interrupted by a signal.  Try again.
+      continue;
+    }
+    // We don't expect any other kind of failure
+    MOZ_ASSERT(r == 0);
+    break;
+  }
+
+  // Send message 4: tell the sampler thread that we are finished accessing
+  // |sSigHandlerCoordinator|.  After this point it is not safe to touch
+  // |sSigHandlerCoordinator|.
+  r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
+  MOZ_ASSERT(r == 0);
+
+  errno = savedErrno;
+}
+
+Sampler::Sampler(PSLockRef aLock)
+    : mMyPid(profiler_current_process_id())
+      // We don't know what the sampler thread's ID will be until it runs, so
+      // set mSamplerTid to a dummy value and fill it in for real in
+      // SuspendAndSampleAndResumeThread().
+      ,
+      mSamplerTid(-1) {
+#if defined(USE_EHABI_STACKWALK)
+  EHABIStackWalkInit();
+#endif
+
+  // NOTE: We don't initialize LUL here, instead initializing it in
+  // SamplerThread's constructor. This is because with the
+  // profiler_suspend_and_sample_thread entry point, we want to be able to
+  // sample without waiting for LUL to be initialized.
+
+  // Request profiling signals.
+  struct sigaction sa;
+  sa.sa_sigaction = SigprofHandler;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART | SA_SIGINFO;
+  if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
+    MOZ_CRASH("Error installing SIGPROF handler in the profiler");
+  }
+}
+
+void Sampler::Disable(PSLockRef aLock) {
+  // Restore old signal handler. This is global state so it's important that
+  // we do it now, while gPSMutex is locked.
+  sigaction(SIGPROF, &mOldSigprofHandler, 0);
+}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+    PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+    const TimeStamp& aNow, const Func& aProcessRegs) {
+  // Only one sampler thread can be sampling at once.  So we expect to have
+  // complete control over |sSigHandlerCoordinator|.
+  MOZ_ASSERT(!sSigHandlerCoordinator);
+
+  if (mSamplerTid == -1) {
+    mSamplerTid = profiler_current_thread_id();
+  }
+  int sampleeTid = aRegisteredThread.Info()->ThreadId();
+  MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
+
+  //----------------------------------------------------------------//
+  // Suspend the samplee thread and get its context.
+
+  SigHandlerCoordinator coord;  // on sampler thread's stack
+  sSigHandlerCoordinator = &coord;
+
+  // Send message 1 to the samplee (the thread to be sampled), by
+  // signalling at it.
+  // This could fail if the thread doesn't exist anymore.
+  int r = tgkill(mMyPid, sampleeTid, SIGPROF);
+  if (r == 0) {
+    // Wait for message 2 from the samplee, indicating that the context
+    // is available and that the thread is suspended.
+    while (true) {
+      r = sem_wait(&sSigHandlerCoordinator->mMessage2);
+      if (r == -1 && errno == EINTR) {
+        // Interrupted by a signal.  Try again.
+        continue;
+      }
+      // We don't expect any other kind of failure.
+      MOZ_ASSERT(r == 0);
+      break;
+    }
+
+    //----------------------------------------------------------------//
+    // Sample the target thread.
+
+    // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+    //
+    // The profiler's "critical section" begins here.  In the critical section,
+    // we must not do any dynamic memory allocation, nor try to acquire any lock
+    // or any other unshareable resource.  This is because the thread to be
+    // sampled has been suspended at some entirely arbitrary point, and we have
+    // no idea which unsharable resources (locks, essentially) it holds.  So any
+    // attempt to acquire any lock, including the implied locks used by the
+    // malloc implementation, risks deadlock.  This includes TimeStamp::Now(),
+    // which gets a lock on Windows.
+
+    // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
+    // valid.  We can poke around in it and unwind its stack as we like.
+
+    // Extract the current register values.
+    Registers regs;
+    PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext);
+    aProcessRegs(regs, aNow);
+
+    //----------------------------------------------------------------//
+    // Resume the target thread.
+
+    // Send message 3 to the samplee, which tells it to resume.
+    r = sem_post(&sSigHandlerCoordinator->mMessage3);
+    MOZ_ASSERT(r == 0);
+
+    // Wait for message 4 from the samplee, which tells us that it has
+    // finished with |sSigHandlerCoordinator|.
+    while (true) {
+      r = sem_wait(&sSigHandlerCoordinator->mMessage4);
+      if (r == -1 && errno == EINTR) {
+        continue;
+      }
+      MOZ_ASSERT(r == 0);
+      break;
+    }
+
+    // The profiler's critical section ends here.  After this point, none of the
+    // critical section limitations documented above apply.
+    //
+    // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+  }
+
+  // This isn't strictly necessary, but doing so does help pick up anomalies
+  // in which the signal handler is running when it shouldn't be.
+  sSigHandlerCoordinator = nullptr;
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static void* ThreadEntry(void* aArg) {
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+    : mSampler(aLock),
+      mActivityGeneration(aActivityGeneration),
+      mIntervalMicroseconds(
+          std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
+#if defined(USE_LUL_STACKWALK)
+  lul::LUL* lul = CorePS::Lul(aLock);
+  if (!lul) {
+    CorePS::SetLul(aLock, MakeUnique<lul::LUL>(logging_sink_for_LUL));
+    // Read all the unwind info currently available.
+    lul = CorePS::Lul(aLock);
+    read_procmaps(lul);
+
+    // Switch into unwind mode. After this point, we can't add or remove any
+    // unwind info to/from this LUL instance. The only thing we can do with
+    // it is Unwind() calls.
+    lul->EnableUnwinding();
+
+    // Has a test been requested?
+    if (getenv("MOZ_PROFILER_LUL_TEST")) {
+      int nTests = 0, nTestsPassed = 0;
+      RunLulUnitTests(&nTests, &nTestsPassed, lul);
+    }
+  }
+#endif
+
+  // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
+  // the signal ourselves instead of relying on itimer provides much better
+  // accuracy.
+  if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
+    MOZ_CRASH("pthread_create failed");
+  }
+}
+
+SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+  if (aMicroseconds >= 1000000) {
+    // Use usleep for larger intervals, because the nanosleep
+    // code below only supports intervals < 1 second.
+    MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
+    return;
+  }
+
+  struct timespec ts;
+  ts.tv_sec = 0;
+  ts.tv_nsec = aMicroseconds * 1000UL;
+
+  int rv = ::nanosleep(&ts, &ts);
+
+  while (rv != 0 && errno == EINTR) {
+    // Keep waiting in case of interrupt.
+    // nanosleep puts the remaining time back into ts.
+    rv = ::nanosleep(&ts, &ts);
+  }
+
+  MOZ_ASSERT(!rv, "nanosleep call failed");
+}
+
+void SamplerThread::Stop(PSLockRef aLock) {
+  // Restore old signal handler. This is global state so it's important that
+  // we do it now, while gPSMutex is locked. It's safe to do this now even
+  // though this SamplerThread is still alive, because the next time the main
+  // loop of Run() iterates it won't get past the mActivityGeneration check,
+  // and so won't send any signals.
+  mSampler.Disable(aLock);
+}
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+
+// We use pthread_atfork() to temporarily disable signal delivery during any
+// fork() call. Without that, fork() can be repeatedly interrupted by signal
+// delivery, requiring it to be repeatedly restarted, which can lead to *long*
+// delays. See bug 837390.
+//
+// We provide no paf_child() function to run in the child after forking. This
+// is fine because we always immediately exec() after fork(), and exec()
+// clobbers all process state. (At one point we did have a paf_child()
+// function, but it caused problems related to locking gPSMutex. See bug
+// 1348374.)
+//
+// Unfortunately all this is only doable on non-Android because Bionic doesn't
+// have pthread_atfork.
+
+// In the parent, before the fork, record IsSamplingPaused, and then pause.
+static void paf_prepare() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (ActivePS::Exists(lock)) {
+    ActivePS::SetWasSamplingPaused(lock, ActivePS::IsSamplingPaused(lock));
+    ActivePS::SetIsSamplingPaused(lock, true);
+  }
+}
+
+// In the parent, after the fork, return IsSamplingPaused to the pre-fork state.
+static void paf_parent() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (ActivePS::Exists(lock)) {
+    ActivePS::SetIsSamplingPaused(lock, ActivePS::WasSamplingPaused(lock));
+    ActivePS::SetWasSamplingPaused(lock, false);
+  }
+}
+
+static void PlatformInit(PSLockRef aLock) {
+  // Set up the fork handlers.
+  pthread_atfork(paf_prepare, paf_parent, nullptr);
+}
+
+#else
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#endif
+
+#if defined(HAVE_NATIVE_UNWIND)
+// Context used by synchronous samples. It's safe to have a single one because
+// only one synchronous sample can be taken at a time (due to
+// profiler_get_backtrace()'s PSAutoLock).
+// ucontext_t sSyncUContext;
+
+void Registers::SyncPopulate() {
+  // TODO port getcontext from breakpad, if profiler_get_backtrace is needed.
+  MOZ_CRASH("profiler_get_backtrace() unsupported");
+  // if (!getcontext(&sSyncUContext)) {
+  //   PopulateRegsFromContext(*this, &sSyncUContext);
+  // }
+}
+#endif
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform-macos.cpp b/mozglue/baseprofiler/core/platform-macos.cpp
new file mode 100644
index 0000000000..fc847886ee
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-macos.cpp
@@ -0,0 +1,233 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <mach/mach_init.h>
+#include <mach-o/getsect.h>
+
+#include <AvailabilityMacros.h>
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <libkern/OSAtomic.h>
+#include <mach/mach.h>
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/thread_act.h>
+#include <mach/vm_statistics.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+
+// this port is based off of v8 svn revision 9837
+
+namespace mozilla {
+namespace baseprofiler {
+
+int profiler_current_process_id() { return getpid(); }
+
+int profiler_current_thread_id() {
+  return static_cast<int>(static_cast<pid_t>(syscall(SYS_thread_selfid)));
+}
+
+static int64_t MicrosecondsSince1970() {
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec);
+}
+
+void* GetStackTop(void* aGuess) {
+  pthread_t thread = pthread_self();
+  return pthread_get_stackaddr_np(thread);
+}
+
+class PlatformData {
+ public:
+  explicit PlatformData(int aThreadId) : mProfiledThread(mach_thread_self()) {}
+
+  ~PlatformData() {
+    // Deallocate Mach port for thread.
+    mach_port_deallocate(mach_task_self(), mProfiledThread);
+  }
+
+  thread_act_t ProfiledThread() { return mProfiledThread; }
+
+ private:
+  // Note: for mProfiledThread Mach primitives are used instead of pthread's
+  // because the latter doesn't provide thread manipulation primitives required.
+  // For details, consult "Mac OS X Internals" book, Section 7.3.
+  thread_act_t mProfiledThread;
+};
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+Sampler::Sampler(PSLockRef aLock) {}
+
+void Sampler::Disable(PSLockRef aLock) {}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+    PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+    const TimeStamp& aNow, const Func& aProcessRegs) {
+  thread_act_t samplee_thread =
+      aRegisteredThread.GetPlatformData()->ProfiledThread();
+
+  //----------------------------------------------------------------//
+  // Suspend the samplee thread and get its context.
+
+  // We're using thread_suspend on OS X because pthread_kill (which is what we
+  // at one time used on Linux) has less consistent performance and causes
+  // strange crashes, see bug 1166778 and bug 1166808.  thread_suspend
+  // is also just a lot simpler to use.
+
+  if (KERN_SUCCESS != thread_suspend(samplee_thread)) {
+    return;
+  }
+
+  //----------------------------------------------------------------//
+  // Sample the target thread.
+
+  // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+  //
+  // The profiler's "critical section" begins here.  We must be very careful
+  // what we do here, or risk deadlock.  See the corresponding comment in
+  // platform-linux-android.cpp for details.
+
+#if defined(__x86_64__)
+  thread_state_flavor_t flavor = x86_THREAD_STATE64;
+  x86_thread_state64_t state;
+  mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
+#  if __DARWIN_UNIX03
+#    define REGISTER_FIELD(name) __r##name
+#  else
+#    define REGISTER_FIELD(name) r##name
+#  endif  // __DARWIN_UNIX03
+#elif defined(__aarch64__)
+  thread_state_flavor_t flavor = ARM_THREAD_STATE64;
+  arm_thread_state64_t state;
+  mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT;
+#  if __DARWIN_UNIX03
+#    define REGISTER_FIELD(name) __##name
+#  else
+#    define REGISTER_FIELD(name) name
+#  endif  // __DARWIN_UNIX03
+#else
+#  error "unknown architecture"
+#endif
+
+  if (thread_get_state(samplee_thread, flavor,
+                       reinterpret_cast<natural_t*>(&state),
+                       &count) == KERN_SUCCESS) {
+    Registers regs;
+#if defined(__x86_64__)
+    regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
+    regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+    regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
+#elif defined(__aarch64__)
+    regs.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(pc));
+    regs.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+    regs.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(fp));
+#else
+#  error "unknown architecture"
+#endif
+    regs.mLR = 0;
+
+    aProcessRegs(regs, aNow);
+  }
+
+#undef REGISTER_FIELD
+
+  //----------------------------------------------------------------//
+  // Resume the target thread.
+
+  thread_resume(samplee_thread);
+
+  // The profiler's critical section ends here.
+  //
+  // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static void* ThreadEntry(void* aArg) {
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+    : mSampler(aLock),
+      mActivityGeneration(aActivityGeneration),
+      mIntervalMicroseconds(
+          std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
+      mThread{nullptr} {
+  pthread_attr_t* attr_ptr = nullptr;
+  if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
+    MOZ_CRASH("pthread_create failed");
+  }
+}
+
+SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+  usleep(aMicroseconds);
+  // FIXME: the OSX 10.12 page for usleep says "The usleep() function is
+  // obsolescent.  Use nanosleep(2) instead."  This implementation could be
+  // merged with the linux-android version.  Also, this doesn't handle the
+  // case where the usleep call is interrupted by a signal.
+}
+
+void SamplerThread::Stop(PSLockRef aLock) { mSampler.Disable(aLock); }
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#if defined(HAVE_NATIVE_UNWIND)
+void Registers::SyncPopulate() {
+#  if defined(__x86_64__)
+  asm(
+      // Compute caller's %rsp by adding to %rbp:
+      // 8 bytes for previous %rbp, 8 bytes for return address
+      "leaq 0x10(%%rbp), %0\n\t"
+      // Dereference %rbp to get previous %rbp
+      "movq (%%rbp), %1\n\t"
+      : "=r"(mSP), "=r"(mFP));
+#  elif defined(__aarch64__)
+  asm(
+      // Compute caller's sp by adding to fp:
+      // 8 bytes for previous fp, 8 bytes for return address
+      "add %0, x29, #0x10\n\t"
+      // Dereference fp to get previous fp
+      "ldr %1, [x29]\n\t"
+      : "=r"(mSP), "=r"(mFP));
+#  else
+#    error "unknown architecture"
+#  endif
+  mPC = reinterpret_cast<Address>(
+      __builtin_extract_return_addr(__builtin_return_address(0)));
+  mLR = 0;
+}
+#endif
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform-win32.cpp b/mozglue/baseprofiler/core/platform-win32.cpp
new file mode 100644
index 0000000000..22b8a8462b
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform-win32.cpp
@@ -0,0 +1,351 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <windows.h>
+#include <mmsystem.h>
+#include <process.h>
+
+#include "nsWindowsDllInterceptor.h"
+#include "mozilla/StackWalk_windows.h"
+#include "mozilla/WindowsVersion.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+int profiler_current_process_id() { return _getpid(); }
+
+int profiler_current_thread_id() {
+  DWORD threadId = GetCurrentThreadId();
+  MOZ_ASSERT(threadId <= INT32_MAX, "native thread ID is > INT32_MAX");
+  return int(threadId);
+}
+
+static int64_t MicrosecondsSince1970() {
+  int64_t prt;
+  FILETIME ft;
+  SYSTEMTIME st;
+
+  GetSystemTime(&st);
+  SystemTimeToFileTime(&st, &ft);
+  static_assert(sizeof(ft) == sizeof(prt), "Expect FILETIME to be 64 bits");
+  memcpy(&prt, &ft, sizeof(prt));
+  const int64_t epochBias = 116444736000000000LL;
+  prt = (prt - epochBias) / 10;
+
+  return prt;
+}
+
+void* GetStackTop(void* aGuess) {
+  PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb());
+  return reinterpret_cast<void*>(pTib->StackBase);
+}
+
+static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) {
+#if defined(GP_ARCH_amd64)
+  aRegs.mPC = reinterpret_cast<Address>(aContext->Rip);
+  aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp);
+  aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp);
+#elif defined(GP_ARCH_x86)
+  aRegs.mPC = reinterpret_cast<Address>(aContext->Eip);
+  aRegs.mSP = reinterpret_cast<Address>(aContext->Esp);
+  aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp);
+#elif defined(GP_ARCH_arm64)
+  aRegs.mPC = reinterpret_cast<Address>(aContext->Pc);
+  aRegs.mSP = reinterpret_cast<Address>(aContext->Sp);
+  aRegs.mFP = reinterpret_cast<Address>(aContext->Fp);
+#else
+#  error "bad arch"
+#endif
+  aRegs.mLR = 0;
+}
+
+// Gets a real (i.e. not pseudo) handle for the current thread, with the
+// permissions needed for profiling.
+// @return a real HANDLE for the current thread.
+static HANDLE GetRealCurrentThreadHandleForProfiling() {
+  HANDLE realCurrentThreadHandle;
+  if (!::DuplicateHandle(
+          ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(),
+          &realCurrentThreadHandle,
+          THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
+          FALSE, 0)) {
+    return nullptr;
+  }
+
+  return realCurrentThreadHandle;
+}
+
+class PlatformData {
+ public:
+  // Get a handle to the calling thread. This is the thread that we are
+  // going to profile. We need a real handle because we are going to use it in
+  // the sampler thread.
+  explicit PlatformData(int aThreadId)
+      : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) {
+    MOZ_ASSERT(aThreadId == ::GetCurrentThreadId());
+  }
+
+  ~PlatformData() {
+    if (mProfiledThread != nullptr) {
+      CloseHandle(mProfiledThread);
+      mProfiledThread = nullptr;
+    }
+  }
+
+  HANDLE ProfiledThread() { return mProfiledThread; }
+
+ private:
+  HANDLE mProfiledThread;
+};
+
+#if defined(USE_MOZ_STACK_WALK)
+HANDLE
+GetThreadHandle(PlatformData* aData) { return aData->ProfiledThread(); }
+#endif
+
+static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler target specifics
+
+Sampler::Sampler(PSLockRef aLock) {}
+
+void Sampler::Disable(PSLockRef aLock) {}
+
+template <typename Func>
+void Sampler::SuspendAndSampleAndResumeThread(
+    PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+    const TimeStamp& aNow, const Func& aProcessRegs) {
+  HANDLE profiled_thread =
+      aRegisteredThread.GetPlatformData()->ProfiledThread();
+  if (profiled_thread == nullptr) {
+    return;
+  }
+
+  // Context used for sampling the register state of the profiled thread.
+  CONTEXT context;
+  memset(&context, 0, sizeof(context));
+
+  //----------------------------------------------------------------//
+  // Suspend the samplee thread and get its context.
+
+  static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
+  if (SuspendThread(profiled_thread) == kSuspendFailed) {
+    return;
+  }
+
+  // SuspendThread is asynchronous, so the thread may still be running.
+  // Call GetThreadContext first to ensure the thread is really suspended.
+  // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+
+  // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
+  // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
+#if defined(GP_ARCH_amd64)
+  context.ContextFlags = CONTEXT_FULL;
+#else
+  context.ContextFlags = CONTEXT_CONTROL;
+#endif
+  if (!GetThreadContext(profiled_thread, &context)) {
+    ResumeThread(profiled_thread);
+    return;
+  }
+
+  //----------------------------------------------------------------//
+  // Sample the target thread.
+
+  // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+  //
+  // The profiler's "critical section" begins here.  We must be very careful
+  // what we do here, or risk deadlock.  See the corresponding comment in
+  // platform-linux-android.cpp for details.
+
+  Registers regs;
+  PopulateRegsFromContext(regs, &context);
+  aProcessRegs(regs, aNow);
+
+  //----------------------------------------------------------------//
+  // Resume the target thread.
+
+  ResumeThread(profiled_thread);
+
+  // The profiler's critical section ends here.
+  //
+  // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+}
+
+// END Sampler target specifics
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread target specifics
+
+static unsigned int __stdcall ThreadEntry(void* aArg) {
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return 0;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+    : mSampler(aLock),
+      mActivityGeneration(aActivityGeneration),
+      mIntervalMicroseconds(
+          std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
+  // By default we'll not adjust the timer resolution which tends to be
+  // around 16ms. However, if the requested interval is sufficiently low
+  // we'll try to adjust the resolution to match.
+  if (mIntervalMicroseconds < 10 * 1000) {
+    ::timeBeginPeriod(mIntervalMicroseconds / 1000);
+  }
+
+  // Create a new thread. It is important to use _beginthreadex() instead of
+  // the Win32 function CreateThread(), because the CreateThread() does not
+  // initialize thread-specific structures in the C runtime library.
+  mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr,
+                                                    /* stack_size */ 0,
+                                                    ThreadEntry, this,
+                                                    /* initflag */ 0, nullptr));
+  if (mThread == 0) {
+    MOZ_CRASH("_beginthreadex failed");
+  }
+}
+
+SamplerThread::~SamplerThread() {
+  WaitForSingleObject(mThread, INFINITE);
+
+  // Close our own handle for the thread.
+  if (mThread != kNoThread) {
+    CloseHandle(mThread);
+  }
+}
+
+void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
+  // For now, keep the old behaviour of minimum Sleep(1), even for
+  // smaller-than-usual sleeps after an overshoot, unless the user has
+  // explicitly opted into a sub-millisecond profiler interval.
+  if (mIntervalMicroseconds >= 1000) {
+    ::Sleep(std::max(1u, aMicroseconds / 1000));
+  } else {
+    TimeStamp start = TimeStamp::NowUnfuzzed();
+    TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
+
+    // First, sleep for as many whole milliseconds as possible.
+    if (aMicroseconds >= 1000) {
+      ::Sleep(aMicroseconds / 1000);
+    }
+
+    // Then, spin until enough time has passed.
+    while (TimeStamp::NowUnfuzzed() < end) {
+      YieldProcessor();
+    }
+  }
+}
+
+void SamplerThread::Stop(PSLockRef aLock) {
+  // Disable any timer resolution changes we've made. Do it now while
+  // gPSMutex is locked, i.e. before any other SamplerThread can be created
+  // and call ::timeBeginPeriod().
+  //
+  // It's safe to do this now even though this SamplerThread is still alive,
+  // because the next time the main loop of Run() iterates it won't get past
+  // the mActivityGeneration check, and so it won't make any more ::Sleep()
+  // calls.
+  if (mIntervalMicroseconds < 10 * 1000) {
+    ::timeEndPeriod(mIntervalMicroseconds / 1000);
+  }
+
+  mSampler.Disable(aLock);
+}
+
+// END SamplerThread target specifics
+////////////////////////////////////////////////////////////////////////
+
+static void PlatformInit(PSLockRef aLock) {}
+
+#if defined(HAVE_NATIVE_UNWIND)
+void Registers::SyncPopulate() {
+  CONTEXT context;
+  RtlCaptureContext(&context);
+  PopulateRegsFromContext(*this, &context);
+}
+#endif
+
+#if defined(GP_PLAT_amd64_windows)
+static WindowsDllInterceptor NtDllIntercept;
+
+typedef NTSTATUS(NTAPI* LdrUnloadDll_func)(HMODULE module);
+static WindowsDllInterceptor::FuncHookType<LdrUnloadDll_func> stub_LdrUnloadDll;
+
+static NTSTATUS NTAPI patched_LdrUnloadDll(HMODULE module) {
+  // Prevent the stack walker from suspending this thread when LdrUnloadDll
+  // holds the RtlLookupFunctionEntry lock.
+  AutoSuppressStackWalking suppress;
+  return stub_LdrUnloadDll(module);
+}
+
+// These pointers are disguised as PVOID to avoid pulling in obscure headers
+typedef PVOID(WINAPI* LdrResolveDelayLoadedAPI_func)(
+    PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
+    PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags);
+static WindowsDllInterceptor::FuncHookType<LdrResolveDelayLoadedAPI_func>
+    stub_LdrResolveDelayLoadedAPI;
+
+static PVOID WINAPI patched_LdrResolveDelayLoadedAPI(
+    PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
+    PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags) {
+  // Prevent the stack walker from suspending this thread when
+  // LdrResolveDelayLoadAPI holds the RtlLookupFunctionEntry lock.
+  AutoSuppressStackWalking suppress;
+  return stub_LdrResolveDelayLoadedAPI(ParentModuleBase, DelayloadDescriptor,
+                                       FailureDllHook, FailureSystemHook,
+                                       ThunkAddress, Flags);
+}
+
+MFBT_API void InitializeWin64ProfilerHooks() {
+  // This function could be called by both profilers, but we only want to run
+  // it once.
+  static bool ran = false;
+  if (ran) {
+    return;
+  }
+  ran = true;
+
+  NtDllIntercept.Init("ntdll.dll");
+  stub_LdrUnloadDll.Set(NtDllIntercept, "LdrUnloadDll", &patched_LdrUnloadDll);
+  if (IsWin8OrLater()) {  // LdrResolveDelayLoadedAPI was introduced in Win8
+    stub_LdrResolveDelayLoadedAPI.Set(NtDllIntercept,
+                                      "LdrResolveDelayLoadedAPI",
+                                      &patched_LdrResolveDelayLoadedAPI);
+  }
+}
+#endif  // defined(GP_PLAT_amd64_windows)
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform.cpp b/mozglue/baseprofiler/core/platform.cpp
new file mode 100644
index 0000000000..14c48ce649
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform.cpp
@@ -0,0 +1,3712 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// There are three kinds of samples done by the profiler.
+//
+// - A "periodic" sample is the most complex kind. It is done in response to a
+//   timer while the profiler is active. It involves writing a stack trace plus
+//   a variety of other values (memory measurements, responsiveness
+//   measurements, etc.) into the main ProfileBuffer. The sampling is done from
+//   off-thread, and so SuspendAndSampleAndResumeThread() is used to get the
+//   register values.
+//
+// - A "synchronous" sample is a simpler kind. It is done in response to an API
+//   call (profiler_get_backtrace()). It involves writing a stack trace and
+//   little else into a temporary ProfileBuffer, and wrapping that up in a
+//   ProfilerBacktrace that can be subsequently used in a marker. The sampling
+//   is done on-thread, and so Registers::SyncPopulate() is used to get the
+//   register values.
+//
+// - A "backtrace" sample is the simplest kind. It is done in response to an
+//   API call (profiler_suspend_and_sample_thread()). It involves getting a
+//   stack trace via a ProfilerStackCollector; it does not write to a
+//   ProfileBuffer. The sampling is done from off-thread, and so uses
+//   SuspendAndSampleAndResumeThread() to get the register values.
+
+#include "platform.h"
+
+#include <algorithm>
+#include <errno.h>
+#include <fstream>
+#include <ostream>
+#include <set>
+#include <sstream>
+
+// #include "memory_hooks.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/AutoProfilerLabel.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/DoubleConversion.h"
+#include "mozilla/Printf.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
+#include "mozilla/ProfileChunkedBuffer.h"
+#include "mozilla/Services.h"
+#include "mozilla/Span.h"
+#include "mozilla/StackWalk.h"
+#include "mozilla/StaticPtr.h"
+#include "mozilla/ThreadLocal.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Tuple.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "prdtoa.h"
+#include "prtime.h"
+
+#include "BaseProfiler.h"
+#include "BaseProfilingCategory.h"
+#include "PageInformation.h"
+#include "ProfiledThreadData.h"
+#include "ProfilerBacktrace.h"
+#include "ProfileBuffer.h"
+#include "RegisteredThread.h"
+#include "BaseProfilerSharedLibraries.h"
+#include "ThreadInfo.h"
+#include "VTuneProfiler.h"
+
+// Win32 builds always have frame pointers, so FramePointerStackWalk() always
+// works.
+#if defined(GP_PLAT_x86_windows)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_FRAME_POINTER_STACK_WALK
+#endif
+
+// Win64 builds always omit frame pointers, so we use the slower
+// MozStackWalk(), which works in that case.
+#if defined(GP_PLAT_amd64_windows)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_MOZ_STACK_WALK
+#endif
+
+// AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
+// MozStackWalk().
+#if defined(GP_PLAT_arm64_windows)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_MOZ_STACK_WALK
+#endif
+
+// Mac builds only have frame pointers when MOZ_PROFILING is specified, so
+// FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
+// on Mac.
+#if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
+#  define HAVE_NATIVE_UNWIND
+#  define USE_FRAME_POINTER_STACK_WALK
+#endif
+
+// No stack-walking in baseprofiler on linux, android, bsd.
+// APIs now make it easier to capture backtraces from the Base Profiler, which
+// is currently not supported on these platform, and would lead to a MOZ_CRASH
+// in Registers::SyncPopulate(). `#if 0` added in bug 1658232, follow-up bugs
+// should be referenced in meta bug 1557568.
+#if 0
+// Android builds use the ARM Exception Handling ABI to unwind.
+#  if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+#    define HAVE_NATIVE_UNWIND
+#    define USE_EHABI_STACKWALK
+#    include "EHABIStackWalk.h"
+#  endif
+
+// Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
+#  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||       \
+      defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||   \
+      defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) ||    \
+      defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
+      defined(GP_PLAT_arm64_freebsd)
+#    define HAVE_NATIVE_UNWIND
+#    define USE_LUL_STACKWALK
+#    include "lul/LulMain.h"
+#    include "lul/platform-linux-lul.h"
+
+// On linux we use LUL for periodic samples and synchronous samples, but we use
+// FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
+// (See the comment at the top of the file for a definition of
+// periodic/synchronous/backtrace.).
+//
+// FramePointerStackWalk can produce incomplete stacks when the current entry is
+// in a shared library without framepointers, however LUL can take a long time
+// to initialize, which is undesirable for consumers of
+// profiler_suspend_and_sample_thread like the Background Hang Reporter.
+#    if defined(MOZ_PROFILING)
+#      define USE_FRAME_POINTER_STACK_WALK
+#    endif
+#  endif
+#endif
+
+// We can only stackwalk without expensive initialization on platforms which
+// support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
+// initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
+// which can be expensive.
+#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
+#  define HAVE_FASTINIT_NATIVE_UNWIND
+#endif
+
+#ifdef MOZ_VALGRIND
+#  include <valgrind/memcheck.h>
+#else
+#  define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+#  include <ucontext.h>
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+using detail::RacyFeatures;
+
+bool LogTest(int aLevelToTest) {
+  static const int maxLevel = getenv("MOZ_BASE_PROFILER_VERBOSE_LOGGING") ? 5
+                              : getenv("MOZ_BASE_PROFILER_DEBUG_LOGGING") ? 4
+                              : getenv("MOZ_BASE_PROFILER_LOGGING")       ? 3
+                                                                          : 0;
+  return aLevelToTest <= maxLevel;
+}
+
+void PrintToConsole(const char* aFmt, ...) {
+  va_list args;
+  va_start(args, aFmt);
+#if defined(ANDROID)
+  __android_log_vprint(ANDROID_LOG_INFO, "Gecko", aFmt, args);
+#else
+  vfprintf(stderr, aFmt, args);
+#endif
+  va_end(args);
+}
+
+// Statically initialized to 0, then set once from profiler_init(), which should
+// be called from the main thread before any other use of the profiler.
+int scProfilerMainThreadId;
+
+constexpr static bool ValidateFeatures() {
+  int expectedFeatureNumber = 0;
+
+  // Feature numbers should start at 0 and increase by 1 each.
+#define CHECK_FEATURE(n_, str_, Name_, desc_) \
+  if ((n_) != expectedFeatureNumber) {        \
+    return false;                             \
+  }                                           \
+  ++expectedFeatureNumber;
+
+  BASE_PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
+
+#undef CHECK_FEATURE
+
+  return true;
+}
+
+static_assert(ValidateFeatures(), "Feature list is invalid");
+
+// Return all features that are available on this platform.
+static uint32_t AvailableFeatures() {
+  uint32_t features = 0;
+
+#define ADD_FEATURE(n_, str_, Name_, desc_) \
+  ProfilerFeature::Set##Name_(features);
+
+  // Add all the possible features.
+  BASE_PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
+
+#undef ADD_FEATURE
+
+  // Now remove features not supported on this platform/configuration.
+  ProfilerFeature::ClearJava(features);
+  ProfilerFeature::ClearJS(features);
+  ProfilerFeature::ClearScreenshots(features);
+#if !defined(HAVE_NATIVE_UNWIND)
+  ProfilerFeature::ClearStackWalk(features);
+#endif
+  ProfilerFeature::ClearTaskTracer(features);
+  ProfilerFeature::ClearJSTracer(features);
+
+  return features;
+}
+
+// Default features common to all contexts (even if not available).
+static uint32_t DefaultFeatures() {
+  return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
+         ProfilerFeature::StackWalk | ProfilerFeature::Threads;
+}
+
+// Extra default features when MOZ_PROFILER_STARTUP is set (even if not
+// available).
+static uint32_t StartupExtraDefaultFeatures() {
+  // Enable CPUUtilization by default for startup profiles as it is useful to
+  // see when startup alternates between CPU intensive tasks and being blocked.
+  // Enable mainthreadio by default for startup profiles as startup is heavy on
+  // I/O operations, and main thread I/O is really important to see there.
+  return ProfilerFeature::CPUUtilization | ProfilerFeature::MainThreadIO;
+}
+
+class MOZ_RAII PSAutoTryLock;
+
+// The auto-lock/unlock mutex that guards accesses to CorePS and ActivePS.
+// Use `PSAutoLock lock;` to take the lock until the end of the enclosing block.
+// External profilers may use this same lock for their own data, but as the lock
+// is non-recursive, *only* `f(PSLockRef, ...)` functions below should be
+// called, to avoid double-locking.
+class MOZ_RAII PSAutoLock {
+ public:
+  PSAutoLock() { gPSMutex.Lock(); }
+
+  ~PSAutoLock() { gPSMutex.Unlock(); }
+
+  PSAutoLock(const PSAutoLock&) = delete;
+  void operator=(const PSAutoLock&) = delete;
+
+  [[nodiscard]] static bool IsLockedOnCurrentThread() {
+    return gPSMutex.IsLockedOnCurrentThread();
+  }
+
+ private:
+  // Allow PSAutoTryLock to access gPSMutex, and to call the following
+  // `PSAutoLock(int)` constructor through `Maybe<const PSAutoLock>::emplace()`.
+  friend class PSAutoTryLock;
+  friend class Maybe<const PSAutoLock>;
+
+  // Special constructor for an already-locked gPSMutex. The `int` parameter is
+  // necessary to distinguish it from the main constructor.
+  explicit PSAutoLock(int) { gPSMutex.AssertCurrentThreadOwns(); }
+
+  static detail::BaseProfilerMutex gPSMutex;
+};
+
+// RAII class that attempts to lock the profiler mutex. Example usage:
+//   PSAutoTryLock tryLock;
+//   if (tryLock.IsLocked()) { locked_foo(tryLock.LockRef()); }
+class MOZ_RAII PSAutoTryLock {
+ public:
+  PSAutoTryLock() {
+    if (PSAutoLock::gPSMutex.TryLock()) {
+      mMaybePSAutoLock.emplace(0);
+    }
+  }
+
+  // Return true if the mutex was aquired and locked.
+  [[nodiscard]] bool IsLocked() const { return mMaybePSAutoLock.isSome(); }
+
+  // Assuming the mutex is locked, return a reference to a `PSAutoLock` for that
+  // mutex, which can be passed as proof-of-lock.
+  [[nodiscard]] const PSAutoLock& LockRef() const {
+    MOZ_ASSERT(IsLocked());
+    return mMaybePSAutoLock.ref();
+  }
+
+ private:
+  // `mMaybePSAutoLock` is `Nothing` if locking failed, otherwise it contains a
+  // `const PSAutoLock` holding the locked mutex, and whose reference may be
+  // passed to functions expecting a proof-of-lock.
+  Maybe<const PSAutoLock> mMaybePSAutoLock;
+};
+
+detail::BaseProfilerMutex PSAutoLock::gPSMutex;
+
+// Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
+// fields.
+typedef const PSAutoLock& PSLockRef;
+
+#define PS_GET(type_, name_)      \
+  static type_ name_(PSLockRef) { \
+    MOZ_ASSERT(sInstance);        \
+    return sInstance->m##name_;   \
+  }
+
+#define PS_GET_LOCKLESS(type_, name_) \
+  static type_ name_() {              \
+    MOZ_ASSERT(sInstance);            \
+    return sInstance->m##name_;       \
+  }
+
+#define PS_GET_AND_SET(type_, name_)                  \
+  PS_GET(type_, name_)                                \
+  static void Set##name_(PSLockRef, type_ a##name_) { \
+    MOZ_ASSERT(sInstance);                            \
+    sInstance->m##name_ = a##name_;                   \
+  }
+
+// All functions in this file can run on multiple threads unless they have an
+// NS_IsMainThread() assertion.
+
+// This class contains the profiler's core global state, i.e. that which is
+// valid even when the profiler is not active. Most profile operations can't do
+// anything useful when this class is not instantiated, so we release-assert
+// its non-nullness in all such operations.
+//
+// Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
+// PSAutoLock reference as an argument as proof that the gPSMutex is currently
+// locked. This makes it clear when gPSMutex is locked and helps avoid
+// accidental unlocked accesses to global state. There are ways to circumvent
+// this mechanism, but please don't do so without *very* good reason and a
+// detailed explanation.
+//
+// The exceptions to this rule:
+//
+// - mProcessStartTime, because it's immutable;
+//
+// - each thread's RacyRegisteredThread object is accessible without locking via
+//   TLSRegisteredThread::RacyRegisteredThread().
+class CorePS {
+ private:
+  CorePS()
+      : mProcessStartTime(TimeStamp::ProcessCreation()),
+        // This needs its own mutex, because it is used concurrently from
+        // functions guarded by gPSMutex as well as others without safety (e.g.,
+        // profiler_add_marker). It is *not* used inside the critical section of
+        // the sampler, because mutexes cannot be used there.
+        mCoreBuffer(ProfileChunkedBuffer::ThreadSafety::WithMutex)
+#ifdef USE_LUL_STACKWALK
+        ,
+        mLul(nullptr)
+#endif
+  {
+  }
+
+  ~CorePS() {}
+
+ public:
+  static void Create(PSLockRef aLock) {
+    MOZ_ASSERT(!sInstance);
+    sInstance = new CorePS();
+  }
+
+  static void Destroy(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+    delete sInstance;
+    sInstance = nullptr;
+  }
+
+  // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
+  // being locked. This is because CorePS is instantiated so early on the main
+  // thread that we don't have to worry about it being racy.
+  static bool Exists() { return !!sInstance; }
+
+  static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
+                        size_t& aProfSize, size_t& aLulSize) {
+    MOZ_ASSERT(sInstance);
+
+    aProfSize += aMallocSizeOf(sInstance);
+
+    for (auto& registeredThread : sInstance->mRegisteredThreads) {
+      aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
+    }
+
+    for (auto& registeredPage : sInstance->mRegisteredPages) {
+      aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
+    }
+
+    // Measurement of the following things may be added later if DMD finds it
+    // is worthwhile:
+    // - CorePS::mRegisteredThreads itself (its elements' children are
+    // measured above)
+    // - CorePS::mRegisteredPages itself (its elements' children are
+    // measured above)
+    // - CorePS::mInterposeObserver
+
+#if defined(USE_LUL_STACKWALK)
+    if (sInstance->mLul) {
+      aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
+    }
+#endif
+  }
+
+  // No PSLockRef is needed for this field because it's immutable.
+  PS_GET_LOCKLESS(const TimeStamp&, ProcessStartTime)
+
+  // No PSLockRef is needed for this field because it's thread-safe.
+  PS_GET_LOCKLESS(ProfileChunkedBuffer&, CoreBuffer)
+
+  PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
+
+  static void AppendRegisteredThread(
+      PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
+    MOZ_ASSERT(sInstance);
+    MOZ_RELEASE_ASSERT(
+        sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
+  }
+
+  static void RemoveRegisteredThread(PSLockRef,
+                                     RegisteredThread* aRegisteredThread) {
+    MOZ_ASSERT(sInstance);
+    // Remove aRegisteredThread from mRegisteredThreads.
+    for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
+      if (rt.get() == aRegisteredThread) {
+        sInstance->mRegisteredThreads.erase(&rt);
+        return;
+      }
+    }
+  }
+
+  PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
+
+  static void AppendRegisteredPage(PSLockRef,
+                                   RefPtr<PageInformation>&& aRegisteredPage) {
+    MOZ_ASSERT(sInstance);
+    struct RegisteredPageComparator {
+      PageInformation* aA;
+      bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
+    };
+
+    auto foundPageIter = std::find_if(
+        sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
+        RegisteredPageComparator{aRegisteredPage.get()});
+
+    if (foundPageIter != sInstance->mRegisteredPages.end()) {
+      if ((*foundPageIter)->Url() == "about:blank") {
+        // When a BrowsingContext is loaded, the first url loaded in it will be
+        // about:blank, and if the principal matches, the first document loaded
+        // in it will share an inner window. That's why we should delete the
+        // intermittent about:blank if they share the inner window.
+        sInstance->mRegisteredPages.erase(foundPageIter);
+      } else {
+        // Do not register the same page again.
+        return;
+      }
+    }
+    MOZ_RELEASE_ASSERT(
+        sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
+  }
+
+  static void RemoveRegisteredPage(PSLockRef,
+                                   uint64_t aRegisteredInnerWindowID) {
+    MOZ_ASSERT(sInstance);
+    // Remove RegisteredPage from mRegisteredPages by given inner window ID.
+    sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
+      return rd->InnerWindowID() == aRegisteredInnerWindowID;
+    });
+  }
+
+  static void ClearRegisteredPages(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    sInstance->mRegisteredPages.clear();
+  }
+
+  PS_GET(const Vector<BaseProfilerCount*>&, Counters)
+
+  static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
+    MOZ_ASSERT(sInstance);
+    // we don't own the counter; they may be stored in static objects
+    MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
+  }
+
+  static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
+    // we may be called to remove a counter after the profiler is stopped or
+    // late in shutdown.
+    if (sInstance) {
+      auto* counter = std::find(sInstance->mCounters.begin(),
+                                sInstance->mCounters.end(), aCounter);
+      MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
+      sInstance->mCounters.erase(counter);
+    }
+  }
+
+#ifdef USE_LUL_STACKWALK
+  static lul::LUL* Lul(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    return sInstance->mLul.get();
+  }
+  static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
+    MOZ_ASSERT(sInstance);
+    sInstance->mLul = std::move(aLul);
+  }
+#endif
+
+  PS_GET_AND_SET(const std::string&, ProcessName)
+  PS_GET_AND_SET(const std::string&, ETLDplus1)
+
+ private:
+  // The singleton instance
+  static CorePS* sInstance;
+
+  // The time that the process started.
+  const TimeStamp mProcessStartTime;
+
+  // The thread-safe blocks-oriented buffer into which all profiling data is
+  // recorded.
+  // ActivePS controls the lifetime of the underlying contents buffer: When
+  // ActivePS does not exist, mCoreBuffer is empty and rejects all reads&writes;
+  // see ActivePS for further details.
+  // Note: This needs to live here outside of ActivePS, because some producers
+  // are indirectly controlled (e.g., by atomic flags) and therefore may still
+  // attempt to write some data shortly after ActivePS has shutdown and deleted
+  // the underlying buffer in memory.
+  ProfileChunkedBuffer mCoreBuffer;
+
+  // Info on all the registered threads.
+  // ThreadIds in mRegisteredThreads are unique.
+  Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
+
+  // Info on all the registered pages.
+  // InnerWindowIDs in mRegisteredPages are unique.
+  Vector<RefPtr<PageInformation>> mRegisteredPages;
+
+  // Non-owning pointers to all active counters
+  Vector<BaseProfilerCount*> mCounters;
+
+#ifdef USE_LUL_STACKWALK
+  // LUL's state. Null prior to the first activation, non-null thereafter.
+  UniquePtr<lul::LUL> mLul;
+#endif
+
+  // Process name, provided by child process initialization code.
+  std::string mProcessName;
+  // Private name, provided by child process initialization code (eTLD+1 in
+  // fission)
+  std::string mETLDplus1;
+};
+
+CorePS* CorePS::sInstance = nullptr;
+
+ProfileChunkedBuffer& profiler_get_core_buffer() {
+  MOZ_ASSERT(CorePS::Exists());
+  return CorePS::CoreBuffer();
+}
+
+class SamplerThread;
+
+static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
+                                       double aInterval);
+
+struct LiveProfiledThreadData {
+  RegisteredThread* mRegisteredThread;
+  UniquePtr<ProfiledThreadData> mProfiledThreadData;
+};
+
+// The buffer size is provided as a number of "entries", this is their size in
+// bytes.
+constexpr static uint32_t scBytesPerEntry = 8;
+
+// This class contains the profiler's global state that is valid only when the
+// profiler is active. When not instantiated, the profiler is inactive.
+//
+// Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
+// CorePS.
+//
+class ActivePS {
+ private:
+  // We need to decide how many chunks of what size we want to fit in the given
+  // total maximum capacity for this process, in the (likely) context of
+  // multiple processes doing the same choice and having an inter-process
+  // mechanism to control the overal memory limit.
+
+  // Minimum chunk size allowed, enough for at least one stack.
+  constexpr static uint32_t scMinimumChunkSize =
+      2 * ProfileBufferChunkManager::scExpectedMaximumStackSize;
+
+  // Ideally we want at least 2 unreleased chunks to work with (1 current and 1
+  // next), and 2 released chunks (so that one can be recycled when old, leaving
+  // one with some data).
+  constexpr static uint32_t scMinimumNumberOfChunks = 4;
+
+  // And we want to limit chunks to a maximum size, which is a compromise
+  // between:
+  // - A big size, which helps with reducing the rate of allocations and IPCs.
+  // - A small size, which helps with equalizing the duration of recorded data
+  //   (as the inter-process controller will discard the oldest chunks in all
+  //   Firefox processes).
+  constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
+
+ public:
+  // We should be able to store at least the minimum number of the smallest-
+  // possible chunks.
+  constexpr static uint32_t scMinimumBufferSize =
+      scMinimumNumberOfChunks * scMinimumChunkSize;
+  constexpr static uint32_t scMinimumBufferEntries =
+      scMinimumBufferSize / scBytesPerEntry;
+
+  // Limit to 2GiB.
+  constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
+  constexpr static uint32_t scMaximumBufferEntries =
+      scMaximumBufferSize / scBytesPerEntry;
+
+  constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
+    if (aEntries <= scMinimumBufferEntries) {
+      return scMinimumBufferEntries;
+    }
+    if (aEntries >= scMaximumBufferEntries) {
+      return scMaximumBufferEntries;
+    }
+    return aEntries;
+  }
+
+ private:
+  constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
+    return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
+                                 scBytesPerEntry / scMinimumNumberOfChunks,
+                             size_t(scMaximumChunkSize)));
+  }
+
+  static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
+    // Filter out any features unavailable in this platform/configuration.
+    aFeatures &= AvailableFeatures();
+
+    // Always enable ProfilerFeature::Threads if we have a filter, because
+    // users sometimes ask to filter by a list of threads but forget to
+    // explicitly specify ProfilerFeature::Threads.
+    if (aFilterCount > 0) {
+      aFeatures |= ProfilerFeature::Threads;
+    }
+
+    // Some features imply others.
+    if (aFeatures & ProfilerFeature::FileIOAll) {
+      aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
+    } else if (aFeatures & ProfilerFeature::FileIO) {
+      aFeatures |= ProfilerFeature::MainThreadIO;
+    }
+
+    return aFeatures;
+  }
+
+  ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
+           uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
+           const Maybe<double>& aDuration)
+      : mGeneration(sNextGeneration++),
+        mCapacity(aCapacity),
+        mDuration(aDuration),
+        mInterval(aInterval),
+        mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
+        mProfileBufferChunkManager(
+            size_t(ClampToAllowedEntries(aCapacity.Value())) * scBytesPerEntry,
+            ChunkSizeForEntries(aCapacity.Value())),
+        mProfileBuffer([this]() -> ProfileChunkedBuffer& {
+          CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager);
+          return CorePS::CoreBuffer();
+        }()),
+        // The new sampler thread doesn't start sampling immediately because the
+        // main loop within Run() is blocked until this function's caller
+        // unlocks gPSMutex.
+        mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval)),
+        mIsPaused(false),
+        mIsSamplingPaused(false)
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+        ,
+        mWasSamplingPaused(false)
+#endif
+  {
+    // Deep copy aFilters.
+    MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
+    for (uint32_t i = 0; i < aFilterCount; ++i) {
+      mFilters[i] = aFilters[i];
+    }
+  }
+
+  ~ActivePS() { CorePS::CoreBuffer().ResetChunkManager(); }
+
+  bool ThreadSelected(const char* aThreadName) {
+    if (mFilters.empty()) {
+      return true;
+    }
+
+    std::string name = aThreadName;
+    std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+
+    for (uint32_t i = 0; i < mFilters.length(); ++i) {
+      std::string filter = mFilters[i];
+
+      if (filter == "*") {
+        return true;
+      }
+
+      std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
+
+      // Crude, non UTF-8 compatible, case insensitive substring search
+      if (name.find(filter) != std::string::npos) {
+        return true;
+      }
+
+      // If the filter starts with pid:, check for a pid match
+      if (filter.find("pid:") == 0) {
+        std::string mypid = std::to_string(profiler_current_process_id());
+        if (filter.compare(4, std::string::npos, mypid) == 0) {
+          return true;
+        }
+      }
+    }
+
+    return false;
+  }
+
+ public:
+  static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
+                     uint32_t aFeatures, const char** aFilters,
+                     uint32_t aFilterCount, const Maybe<double>& aDuration) {
+    MOZ_ASSERT(!sInstance);
+    sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
+                             aFilterCount, aDuration);
+  }
+
+  [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+    auto samplerThread = sInstance->mSamplerThread;
+    delete sInstance;
+    sInstance = nullptr;
+
+    return samplerThread;
+  }
+
+  static bool Exists(PSLockRef) { return !!sInstance; }
+
+  static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
+                     const Maybe<double>& aDuration, double aInterval,
+                     uint32_t aFeatures, const char** aFilters,
+                     uint32_t aFilterCount) {
+    MOZ_ASSERT(sInstance);
+    if (sInstance->mCapacity != aCapacity ||
+        sInstance->mDuration != aDuration ||
+        sInstance->mInterval != aInterval ||
+        sInstance->mFeatures != aFeatures ||
+        sInstance->mFilters.length() != aFilterCount) {
+      return false;
+    }
+
+    for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
+      if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
+    MOZ_ASSERT(sInstance);
+
+    size_t n = aMallocSizeOf(sInstance);
+
+    n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
+
+    // Measurement of the following members may be added later if DMD finds it
+    // is worthwhile:
+    // - mLiveProfiledThreads (both the array itself, and the contents)
+    // - mDeadProfiledThreads (both the array itself, and the contents)
+    //
+
+    return n;
+  }
+
+  static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
+    MOZ_ASSERT(sInstance);
+    return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
+            sInstance->ThreadSelected(aInfo->Name()));
+  }
+
+  PS_GET(uint32_t, Generation)
+
+  PS_GET(PowerOfTwo32, Capacity)
+
+  PS_GET(Maybe<double>, Duration)
+
+  PS_GET(double, Interval)
+
+  PS_GET(uint32_t, Features)
+
+#define PS_GET_FEATURE(n_, str_, Name_, desc_)                \
+  static bool Feature##Name_(PSLockRef) {                     \
+    MOZ_ASSERT(sInstance);                                    \
+    return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
+  }
+
+  BASE_PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
+
+#undef PS_GET_FEATURE
+
+  PS_GET(const Vector<std::string>&, Filters)
+
+  static void FulfillChunkRequests(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    sInstance->mProfileBufferChunkManager.FulfillChunkRequests();
+  }
+
+  static ProfileBuffer& Buffer(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    return sInstance->mProfileBuffer;
+  }
+
+  static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    return sInstance->mLiveProfiledThreads;
+  }
+
+  // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
+  // for all threads that should be included in a profile, both for threads
+  // that are still registered, and for threads that have been unregistered but
+  // still have data in the buffer.
+  // For threads that have already been unregistered, the RegisteredThread
+  // pointer will be null.
+  // The returned array is sorted by thread register time.
+  // Do not hold on to the return value across thread registration or profiler
+  // restarts.
+  static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
+  ProfiledThreads(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
+    MOZ_RELEASE_ASSERT(
+        array.initCapacity(sInstance->mLiveProfiledThreads.length() +
+                           sInstance->mDeadProfiledThreads.length()));
+    for (auto& t : sInstance->mLiveProfiledThreads) {
+      MOZ_RELEASE_ASSERT(array.append(
+          std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
+    }
+    for (auto& t : sInstance->mDeadProfiledThreads) {
+      MOZ_RELEASE_ASSERT(
+          array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
+    }
+
+    std::sort(array.begin(), array.end(),
+              [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a,
+                 const std::pair<RegisteredThread*, ProfiledThreadData*>& b) {
+                return a.second->Info()->RegisterTime() <
+                       b.second->Info()->RegisterTime();
+              });
+    return array;
+  }
+
+  static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+    Vector<RefPtr<PageInformation>> array;
+    for (auto& d : CorePS::RegisteredPages(aLock)) {
+      MOZ_RELEASE_ASSERT(array.append(d));
+    }
+    for (auto& d : sInstance->mDeadProfiledPages) {
+      MOZ_RELEASE_ASSERT(array.append(d));
+    }
+    // We don't need to sort the pages like threads since we won't show them
+    // as a list.
+    return array;
+  }
+
+  // Do a linear search through mLiveProfiledThreads to find the
+  // ProfiledThreadData object for a RegisteredThread.
+  static ProfiledThreadData* GetProfiledThreadData(
+      PSLockRef, RegisteredThread* aRegisteredThread) {
+    MOZ_ASSERT(sInstance);
+    for (const LiveProfiledThreadData& thread :
+         sInstance->mLiveProfiledThreads) {
+      if (thread.mRegisteredThread == aRegisteredThread) {
+        return thread.mProfiledThreadData.get();
+      }
+    }
+    return nullptr;
+  }
+
+  static ProfiledThreadData* AddLiveProfiledThread(
+      PSLockRef, RegisteredThread* aRegisteredThread,
+      UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
+    MOZ_ASSERT(sInstance);
+    MOZ_RELEASE_ASSERT(
+        sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
+            aRegisteredThread, std::move(aProfiledThreadData)}));
+
+    // Return a weak pointer to the ProfiledThreadData object.
+    return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
+  }
+
+  static void UnregisterThread(PSLockRef aLockRef,
+                               RegisteredThread* aRegisteredThread) {
+    MOZ_ASSERT(sInstance);
+
+    DiscardExpiredDeadProfiledThreads(aLockRef);
+
+    // Find the right entry in the mLiveProfiledThreads array and remove the
+    // element, moving the ProfiledThreadData object for the thread into the
+    // mDeadProfiledThreads array.
+    // The thread's RegisteredThread object gets destroyed here.
+    for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
+      LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
+      if (thread.mRegisteredThread == aRegisteredThread) {
+        thread.mProfiledThreadData->NotifyUnregistered(
+            sInstance->mProfileBuffer.BufferRangeEnd());
+        MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
+            std::move(thread.mProfiledThreadData)));
+        sInstance->mLiveProfiledThreads.erase(
+            &sInstance->mLiveProfiledThreads[i]);
+        return;
+      }
+    }
+  }
+
+  PS_GET_AND_SET(bool, IsPaused)
+
+  // True if sampling is paused (though generic `SetIsPaused()` or specific
+  // `SetIsSamplingPaused()`).
+  static bool IsSamplingPaused(PSLockRef lock) {
+    MOZ_ASSERT(sInstance);
+    return IsPaused(lock) || sInstance->mIsSamplingPaused;
+  }
+
+  static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
+    MOZ_ASSERT(sInstance);
+    sInstance->mIsSamplingPaused = aIsSamplingPaused;
+  }
+
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+  PS_GET_AND_SET(bool, WasSamplingPaused)
+#endif
+
+  static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+    // Discard any dead threads that were unregistered before bufferRangeStart.
+    sInstance->mDeadProfiledThreads.eraseIf(
+        [bufferRangeStart](
+            const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
+          Maybe<uint64_t> bufferPosition =
+              aProfiledThreadData->BufferPositionWhenUnregistered();
+          MOZ_RELEASE_ASSERT(bufferPosition,
+                             "should have unregistered this thread");
+          return *bufferPosition < bufferRangeStart;
+        });
+  }
+
+  static void UnregisterPage(PSLockRef aLock,
+                             uint64_t aRegisteredInnerWindowID) {
+    MOZ_ASSERT(sInstance);
+    auto& registeredPages = CorePS::RegisteredPages(aLock);
+    for (size_t i = 0; i < registeredPages.length(); i++) {
+      RefPtr<PageInformation>& page = registeredPages[i];
+      if (page->InnerWindowID() == aRegisteredInnerWindowID) {
+        page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
+        MOZ_RELEASE_ASSERT(
+            sInstance->mDeadProfiledPages.append(std::move(page)));
+        registeredPages.erase(&registeredPages[i--]);
+      }
+    }
+  }
+
+  static void DiscardExpiredPages(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+    // Discard any dead pages that were unregistered before
+    // bufferRangeStart.
+    sInstance->mDeadProfiledPages.eraseIf(
+        [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
+          Maybe<uint64_t> bufferPosition =
+              aProfiledPage->BufferPositionWhenUnregistered();
+          MOZ_RELEASE_ASSERT(bufferPosition,
+                             "should have unregistered this page");
+          return *bufferPosition < bufferRangeStart;
+        });
+  }
+
+  static void ClearUnregisteredPages(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    sInstance->mDeadProfiledPages.clear();
+  }
+
+  static void ClearExpiredExitProfiles(PSLockRef) {
+    MOZ_ASSERT(sInstance);
+    uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
+    // Discard exit profiles that were gathered before our buffer RangeStart.
+    sInstance->mExitProfiles.eraseIf(
+        [bufferRangeStart](const ExitProfile& aExitProfile) {
+          return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
+        });
+  }
+
+  static void AddExitProfile(PSLockRef aLock, const std::string& aExitProfile) {
+    MOZ_ASSERT(sInstance);
+
+    ClearExpiredExitProfiles(aLock);
+
+    MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
+        ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
+  }
+
+  static Vector<std::string> MoveExitProfiles(PSLockRef aLock) {
+    MOZ_ASSERT(sInstance);
+
+    ClearExpiredExitProfiles(aLock);
+
+    Vector<std::string> profiles;
+    MOZ_RELEASE_ASSERT(
+        profiles.initCapacity(sInstance->mExitProfiles.length()));
+    for (auto& profile : sInstance->mExitProfiles) {
+      MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
+    }
+    sInstance->mExitProfiles.clear();
+    return profiles;
+  }
+
+ private:
+  // The singleton instance.
+  static ActivePS* sInstance;
+
+  // We need to track activity generations. If we didn't we could have the
+  // following scenario.
+  //
+  // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
+  //   gPSMutex, deletes the SamplerThread (which does a join).
+  //
+  // - profiler_start() runs on a different thread, locks gPSMutex,
+  //   re-instantiates ActivePS, unlocks gPSMutex -- all before the join
+  //   completes.
+  //
+  // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
+  //   and continues as if the start/stop pair didn't occur. Also
+  //   profiler_stop() is stuck, unable to finish.
+  //
+  // By checking ActivePS *and* the generation, we can avoid this scenario.
+  // sNextGeneration is used to track the next generation number; it is static
+  // because it must persist across different ActivePS instantiations.
+  const uint32_t mGeneration;
+  static uint32_t sNextGeneration;
+
+  // The maximum number of 8-byte entries in mProfileBuffer.
+  const PowerOfTwo32 mCapacity;
+
+  // The maximum duration of entries in mProfileBuffer, in seconds.
+  const Maybe<double> mDuration;
+
+  // The interval between samples, measured in milliseconds.
+  const double mInterval;
+
+  // The profile features that are enabled.
+  const uint32_t mFeatures;
+
+  // Substrings of names of threads we want to profile.
+  Vector<std::string> mFilters;
+
+  // The chunk manager used by `mProfileBuffer` below.
+  ProfileBufferChunkManagerWithLocalLimit mProfileBufferChunkManager;
+
+  // The buffer into which all samples are recorded.
+  ProfileBuffer mProfileBuffer;
+
+  // ProfiledThreadData objects for any threads that were profiled at any point
+  // during this run of the profiler:
+  //  - mLiveProfiledThreads contains all threads that are still registered, and
+  //  - mDeadProfiledThreads contains all threads that have already been
+  //    unregistered but for which there is still data in the profile buffer.
+  Vector<LiveProfiledThreadData> mLiveProfiledThreads;
+  Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
+
+  // Info on all the dead pages.
+  // Registered pages are being moved to this array after unregistration.
+  // We are keeping them in case we need them in the profile data.
+  // We are removing them when we ensure that we won't need them anymore.
+  Vector<RefPtr<PageInformation>> mDeadProfiledPages;
+
+  // The current sampler thread. This class is not responsible for destroying
+  // the SamplerThread object; the Destroy() method returns it so the caller
+  // can destroy it.
+  SamplerThread* const mSamplerThread;
+
+  // Is the profiler fully paused?
+  bool mIsPaused;
+
+  // Is the profiler periodic sampling paused?
+  bool mIsSamplingPaused;
+
+#if defined(GP_OS_linux) || defined(GP_OS_freebsd)
+  // Used to record whether the sampler was paused just before forking. False
+  // at all times except just before/after forking.
+  bool mWasSamplingPaused;
+#endif
+
+  struct ExitProfile {
+    std::string mJSON;
+    uint64_t mBufferPositionAtGatherTime;
+  };
+  Vector<ExitProfile> mExitProfiles;
+};
+
+ActivePS* ActivePS::sInstance = nullptr;
+uint32_t ActivePS::sNextGeneration = 0;
+
+#undef PS_GET
+#undef PS_GET_LOCKLESS
+#undef PS_GET_AND_SET
+
+Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
+
+/* static */
+void RacyFeatures::SetActive(uint32_t aFeatures) {
+  sActiveAndFeatures = Active | aFeatures;
+}
+
+/* static */
+void RacyFeatures::SetInactive() { sActiveAndFeatures = 0; }
+
+/* static */
+bool RacyFeatures::IsActive() { return uint32_t(sActiveAndFeatures) & Active; }
+
+/* static */
+void RacyFeatures::SetPaused() { sActiveAndFeatures |= Paused; }
+
+/* static */
+void RacyFeatures::SetUnpaused() { sActiveAndFeatures &= ~Paused; }
+
+/* static */
+void RacyFeatures::SetSamplingPaused() { sActiveAndFeatures |= SamplingPaused; }
+
+/* static */
+void RacyFeatures::SetSamplingUnpaused() {
+  sActiveAndFeatures &= ~SamplingPaused;
+}
+
+/* static */
+bool RacyFeatures::IsActiveWithFeature(uint32_t aFeature) {
+  uint32_t af = sActiveAndFeatures;  // copy it first
+  return (af & Active) && (af & aFeature);
+}
+
+/* static */
+bool RacyFeatures::IsActiveAndUnpaused() {
+  uint32_t af = sActiveAndFeatures;  // copy it first
+  return (af & Active) && !(af & Paused);
+}
+
+/* static */
+bool RacyFeatures::IsActiveAndSamplingUnpaused() {
+  uint32_t af = sActiveAndFeatures;  // copy it first
+  return (af & Active) && !(af & (Paused | SamplingPaused));
+}
+
+// Each live thread has a RegisteredThread, and we store a reference to it in
+// TLS. This class encapsulates that TLS.
+class TLSRegisteredThread {
+ public:
+  static bool Init(PSLockRef) {
+    bool ok1 = sRegisteredThread.init();
+    bool ok2 = AutoProfilerLabel::sProfilingStack.init();
+    return ok1 && ok2;
+  }
+
+  // Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
+  static class RegisteredThread* RegisteredThread(PSLockRef) {
+    return sRegisteredThread.get();
+  }
+
+  // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
+  static class RacyRegisteredThread* RacyRegisteredThread() {
+    class RegisteredThread* registeredThread = sRegisteredThread.get();
+    return registeredThread ? &registeredThread->RacyRegisteredThread()
+                            : nullptr;
+  }
+
+  // Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
+  // RacyRegisteredThread() can also be used to get the ProfilingStack, but that
+  // is marginally slower because it requires an extra pointer indirection.
+  static ProfilingStack* Stack() {
+    return AutoProfilerLabel::sProfilingStack.get();
+  }
+
+  static void SetRegisteredThread(PSLockRef,
+                                  class RegisteredThread* aRegisteredThread) {
+    sRegisteredThread.set(aRegisteredThread);
+    AutoProfilerLabel::sProfilingStack.set(
+        aRegisteredThread
+            ? &aRegisteredThread->RacyRegisteredThread().ProfilingStack()
+            : nullptr);
+  }
+
+ private:
+  // This is a non-owning reference to the RegisteredThread;
+  // CorePS::mRegisteredThreads is the owning reference. On thread
+  // deregistration, this reference is cleared and the RegisteredThread is
+  // destroyed.
+  static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
+};
+
+MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
+
+/* static */
+ProfilingStack* AutoProfilerLabel::GetProfilingStack() {
+  return sProfilingStack.get();
+}
+
+// Although you can access a thread's ProfilingStack via
+// TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
+// directly to the ProfilingStack. Here's why.
+//
+// - We need to be able to push to and pop from the ProfilingStack in
+//   AutoProfilerLabel.
+//
+// - The class functions are hot and must be defined in BaseProfiler.h so they
+//   can be inlined.
+//
+// - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
+//   BaseProfiler.h.
+//
+// This second pointer isn't ideal, but does provide a way to satisfy those
+// constraints. TLSRegisteredThread is responsible for updating it.
+MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack;
+
+// The name of the main thread.
+static const char* const kMainThreadName = "GeckoMain";
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN sampling/unwinding code
+
+// The registers used for stack unwinding and a few other sampling purposes.
+// The ctor does nothing; users are responsible for filling in the fields.
+class Registers {
+ public:
+  Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
+
+#if defined(HAVE_NATIVE_UNWIND)
+  // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
+  void SyncPopulate();
+#endif
+
+  void Clear() { memset(this, 0, sizeof(*this)); }
+
+  // These fields are filled in by
+  // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
+  // samples, and by SyncPopulate() for synchronous samples.
+  Address mPC;  // Instruction pointer.
+  Address mSP;  // Stack pointer.
+  Address mFP;  // Frame pointer.
+  Address mLR;  // ARM link register.
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+  // This contains all the registers, which means it duplicates the four fields
+  // above. This is ok.
+  ucontext_t* mContext;  // The context from the signal handler.
+#endif
+};
+
+// Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
+// looping on corrupted stacks.
+static const size_t MAX_NATIVE_FRAMES = 1024;
+
+struct NativeStack {
+  void* mPCs[MAX_NATIVE_FRAMES];
+  void* mSPs[MAX_NATIVE_FRAMES];
+  size_t mCount;  // Number of frames filled.
+
+  NativeStack() : mPCs(), mSPs(), mCount(0) {}
+};
+
+// Merges the profiling stack and native stack, outputting the details to
+// aCollector.
+static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
+                        const RegisteredThread& aRegisteredThread,
+                        const Registers& aRegs, const NativeStack& aNativeStack,
+                        ProfilerStackCollector& aCollector) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  const ProfilingStack& profilingStack =
+      aRegisteredThread.RacyRegisteredThread().ProfilingStack();
+  const ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
+  uint32_t profilingStackFrameCount = profilingStack.stackSize();
+
+  Maybe<uint64_t> samplePosInBuffer;
+  if (!aIsSynchronous) {
+    // aCollector.SamplePositionInBuffer() will return Nothing() when
+    // profiler_suspend_and_sample_thread is called from the background hang
+    // reporter.
+    samplePosInBuffer = aCollector.SamplePositionInBuffer();
+  }
+  // While the profiling stack array is ordered oldest-to-youngest, the JS and
+  // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
+  // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
+  // and native arrays backwards. Note: this means the terminating condition
+  // jsIndex and nativeIndex is being < 0.
+  uint32_t profilingStackIndex = 0;
+  int32_t nativeIndex = aNativeStack.mCount - 1;
+
+  uint8_t* lastLabelFrameStackAddr = nullptr;
+
+  // Iterate as long as there is at least one frame remaining.
+  while (profilingStackIndex != profilingStackFrameCount || nativeIndex >= 0) {
+    // There are 1 to 3 frames available. Find and add the oldest.
+    uint8_t* profilingStackAddr = nullptr;
+    uint8_t* nativeStackAddr = nullptr;
+
+    if (profilingStackIndex != profilingStackFrameCount) {
+      const ProfilingStackFrame& profilingStackFrame =
+          profilingStackFrames[profilingStackIndex];
+
+      if (profilingStackFrame.isLabelFrame() ||
+          profilingStackFrame.isSpMarkerFrame()) {
+        lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
+      }
+
+      // Skip any JS_OSR frames. Such frames are used when the JS interpreter
+      // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
+      // To avoid both the profiling stack frame and jit frame being recorded
+      // (and showing up twice), the interpreter marks the interpreter
+      // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
+      if (profilingStackFrame.isOSRFrame()) {
+        profilingStackIndex++;
+        continue;
+      }
+
+      MOZ_ASSERT(lastLabelFrameStackAddr);
+      profilingStackAddr = lastLabelFrameStackAddr;
+    }
+
+    if (nativeIndex >= 0) {
+      nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
+    }
+
+    // If there's a native stack frame which has the same SP as a profiling
+    // stack frame, pretend we didn't see the native stack frame.  Ditto for a
+    // native stack frame which has the same SP as a JS stack frame.  In effect
+    // this means profiling stack frames or JS frames trump conflicting native
+    // frames.
+    if (nativeStackAddr && (profilingStackAddr == nativeStackAddr)) {
+      nativeStackAddr = nullptr;
+      nativeIndex--;
+      MOZ_ASSERT(profilingStackAddr);
+    }
+
+    // Sanity checks.
+    MOZ_ASSERT_IF(profilingStackAddr, profilingStackAddr != nativeStackAddr);
+    MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr);
+
+    // Check to see if profiling stack frame is top-most.
+    if (profilingStackAddr > nativeStackAddr) {
+      MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
+      const ProfilingStackFrame& profilingStackFrame =
+          profilingStackFrames[profilingStackIndex];
+
+      // Sp marker frames are just annotations and should not be recorded in
+      // the profile.
+      if (!profilingStackFrame.isSpMarkerFrame()) {
+        aCollector.CollectProfilingStackFrame(profilingStackFrame);
+      }
+      profilingStackIndex++;
+      continue;
+    }
+
+    // If we reach here, there must be a native stack frame and it must be the
+    // greatest frame.
+    if (nativeStackAddr) {
+      MOZ_ASSERT(nativeIndex >= 0);
+      void* addr = (void*)aNativeStack.mPCs[nativeIndex];
+      aCollector.CollectNativeLeafAddr(addr);
+    }
+    if (nativeIndex >= 0) {
+      nativeIndex--;
+    }
+  }
+}
+
+#if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
+static HANDLE GetThreadHandle(PlatformData* aData);
+#endif
+
+#if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
+static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
+                              void* aClosure) {
+  NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
+  MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
+  nativeStack->mSPs[nativeStack->mCount] = aSP;
+  nativeStack->mPCs[nativeStack->mCount] = aPC;
+  nativeStack->mCount++;
+}
+#endif
+
+#if defined(USE_FRAME_POINTER_STACK_WALK)
+static void DoFramePointerBacktrace(PSLockRef aLock,
+                                    const RegisteredThread& aRegisteredThread,
+                                    const Registers& aRegs,
+                                    NativeStack& aNativeStack) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  // Start with the current function. We use 0 as the frame number here because
+  // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
+  // but it doesn't matter because StackWalkCallback() doesn't use the frame
+  // number argument.
+  StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
+
+  uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
+
+  const void* stackEnd = aRegisteredThread.StackTop();
+  if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
+    FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
+                          &aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
+                          const_cast<void*>(stackEnd));
+  }
+}
+#endif
+
+#if defined(USE_MOZ_STACK_WALK)
+static void DoMozStackWalkBacktrace(PSLockRef aLock,
+                                    const RegisteredThread& aRegisteredThread,
+                                    const Registers& aRegs,
+                                    NativeStack& aNativeStack) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  // Start with the current function. We use 0 as the frame number here because
+  // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
+  // it doesn't matter because StackWalkCallback() doesn't use the frame number
+  // argument.
+  StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
+
+  uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
+
+  HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
+  MOZ_ASSERT(thread);
+  MozStackWalkThread(StackWalkCallback, /* skipFrames */ 0, maxFrames,
+                     &aNativeStack, thread, /* context */ nullptr);
+}
+#endif
+
+#ifdef USE_EHABI_STACKWALK
+static void DoEHABIBacktrace(PSLockRef aLock,
+                             const RegisteredThread& aRegisteredThread,
+                             const Registers& aRegs,
+                             NativeStack& aNativeStack) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  aNativeStack.mCount =
+      EHABIStackWalk(aRegs.mContext->uc_mcontext,
+                     const_cast<void*>(aRegisteredThread.StackTop()),
+                     aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
+}
+#endif
+
+#ifdef USE_LUL_STACKWALK
+
+// See the comment at the callsite for why this function is necessary.
+#  if defined(MOZ_HAVE_ASAN_BLACKLIST)
+MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc,
+                                           size_t aLen) {
+  // The obvious thing to do here is call memcpy(). However, although
+  // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
+  // false positive still manifests! So we must implement memcpy() ourselves
+  // within this function.
+  char* dst = static_cast<char*>(aDst);
+  const char* src = static_cast<const char*>(aSrc);
+
+  for (size_t i = 0; i < aLen; i++) {
+    dst[i] = src[i];
+  }
+}
+#  endif
+
+static void DoLULBacktrace(PSLockRef aLock,
+                           const RegisteredThread& aRegisteredThread,
+                           const Registers& aRegs, NativeStack& aNativeStack) {
+  // WARNING: this function runs within the profiler's "critical section".
+  // WARNING: this function might be called while the profiler is inactive, and
+  //          cannot rely on ActivePS.
+
+  const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
+
+  lul::UnwindRegs startRegs;
+  memset(&startRegs, 0, sizeof(startRegs));
+
+#  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
+  startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
+  startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
+  startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
+#  elif defined(GP_PLAT_amd64_freebsd)
+  startRegs.xip = lul::TaggedUWord(mc->mc_rip);
+  startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
+  startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
+#  elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+  startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
+  startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
+  startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
+  startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
+  startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
+  startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
+#  elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+  startRegs.pc = lul::TaggedUWord(mc->pc);
+  startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
+  startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
+  startRegs.sp = lul::TaggedUWord(mc->sp);
+#  elif defined(GP_PLAT_arm64_freebsd)
+  startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
+  startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
+  startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
+  startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
+#  elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+  startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
+  startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
+  startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
+#  elif defined(GP_PLAT_mips64_linux)
+  startRegs.pc = lul::TaggedUWord(mc->pc);
+  startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
+  startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
+#  else
+#    error "Unknown plat"
+#  endif
+
+  // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
+  // stack's registered top point.  Do some basic sanity checks too.  This
+  // assumes that the TaggedUWord holding the stack pointer value is valid, but
+  // it should be, since it was constructed that way in the code just above.
+
+  // We could construct |stackImg| so that LUL reads directly from the stack in
+  // question, rather than from a copy of it.  That would reduce overhead and
+  // space use a bit.  However, it gives a problem with dynamic analysis tools
+  // (ASan, TSan, Valgrind) which is that such tools will report invalid or
+  // racing memory accesses, and such accesses will be reported deep inside LUL.
+  // By taking a copy here, we can either sanitise the copy (for Valgrind) or
+  // copy it using an unchecked memcpy (for ASan, TSan).  That way we don't have
+  // to try and suppress errors inside LUL.
+  //
+  // N_STACK_BYTES is set to 160KB.  This is big enough to hold all stacks
+  // observed in some minutes of testing, whilst keeping the size of this
+  // function (DoNativeBacktrace)'s frame reasonable.  Most stacks observed in
+  // practice are small, 4KB or less, and so the copy costs are insignificant
+  // compared to other profiler overhead.
+  //
+  // |stackImg| is allocated on this (the sampling thread's) stack.  That
+  // implies that the frame for this function is at least N_STACK_BYTES large.
+  // In general it would be considered unacceptable to have such a large frame
+  // on a stack, but it only exists for the unwinder thread, and so is not
+  // expected to be a problem.  Allocating it on the heap is troublesome because
+  // this function runs whilst the sampled thread is suspended, so any heap
+  // allocation risks deadlock.  Allocating it as a global variable is not
+  // thread safe, which would be a problem if we ever allow multiple sampler
+  // threads.  Hence allocating it on the stack seems to be the least-worst
+  // option.
+
+  lul::StackImage stackImg;
+
+  {
+#  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
+      defined(GP_PLAT_amd64_freebsd)
+    uintptr_t rEDZONE_SIZE = 128;
+    uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+#  elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
+#  elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
+      defined(GP_PLAT_arm64_freebsd)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
+#  elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+#  elif defined(GP_PLAT_mips64_linux)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
+#  else
+#    error "Unknown plat"
+#  endif
+    uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
+    uintptr_t ws = sizeof(void*);
+    start &= ~(ws - 1);
+    end &= ~(ws - 1);
+    uintptr_t nToCopy = 0;
+    if (start < end) {
+      nToCopy = end - start;
+      if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
+    }
+    MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+    stackImg.mLen = nToCopy;
+    stackImg.mStartAvma = start;
+    if (nToCopy > 0) {
+      // If this is a vanilla memcpy(), ASAN makes the following complaint:
+      //
+      //   ERROR: AddressSanitizer: stack-buffer-underflow ...
+      //   ...
+      //   HINT: this may be a false positive if your program uses some custom
+      //   stack unwind mechanism or swapcontext
+      //
+      // This code is very much a custom stack unwind mechanism! So we use an
+      // alternative memcpy() implementation that is ignored by ASAN.
+#  if defined(MOZ_HAVE_ASAN_BLACKLIST)
+      ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+#  else
+      memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+#  endif
+      (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
+    }
+  }
+
+  size_t framePointerFramesAcquired = 0;
+  lul::LUL* lul = CorePS::Lul(aLock);
+  lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
+              reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
+              &aNativeStack.mCount, &framePointerFramesAcquired,
+              MAX_NATIVE_FRAMES, &startRegs, &stackImg);
+
+  // Update stats in the LUL stats object.  Unfortunately this requires
+  // three global memory operations.
+  lul->mStats.mContext += 1;
+  lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
+  lul->mStats.mFP += framePointerFramesAcquired;
+}
+
+#endif
+
+#ifdef HAVE_NATIVE_UNWIND
+static void DoNativeBacktrace(PSLockRef aLock,
+                              const RegisteredThread& aRegisteredThread,
+                              const Registers& aRegs,
+                              NativeStack& aNativeStack) {
+  // This method determines which stackwalker is used for periodic and
+  // synchronous samples. (Backtrace samples are treated differently, see
+  // profiler_suspend_and_sample_thread() for details). The only part of the
+  // ordering that matters is that LUL must precede FRAME_POINTER, because on
+  // Linux they can both be present.
+#  if defined(USE_LUL_STACKWALK)
+  DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+#  elif defined(USE_EHABI_STACKWALK)
+  DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+#  elif defined(USE_FRAME_POINTER_STACK_WALK)
+  DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+#  elif defined(USE_MOZ_STACK_WALK)
+  DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
+#  else
+#    error "Invalid configuration"
+#  endif
+}
+#endif
+
+// Writes some components shared by periodic and synchronous profiles to
+// ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
+// and DoPeriodicSample().)
+//
+// The grammar for entry sequences is in a comment above
+// ProfileBuffer::StreamSamplesToJSON.
+static inline void DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
+                                  RegisteredThread& aRegisteredThread,
+                                  const Registers& aRegs, uint64_t aSamplePos,
+                                  ProfileBuffer& aBuffer) {
+  // WARNING: this function runs within the profiler's "critical section".
+
+  MOZ_ASSERT(!aBuffer.IsThreadSafe(),
+             "Mutexes cannot be used inside this critical section");
+
+  MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
+
+  ProfileBufferCollector collector(aBuffer, aSamplePos);
+  NativeStack nativeStack;
+#if defined(HAVE_NATIVE_UNWIND)
+  if (ActivePS::FeatureStackWalk(aLock)) {
+    DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack);
+
+    MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
+                aRegs, nativeStack, collector);
+  } else
+#endif
+  {
+    MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
+                aRegs, nativeStack, collector);
+
+    // We can't walk the whole native stack, but we can record the top frame.
+    if (ActivePS::FeatureLeaf(aLock)) {
+      aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
+    }
+  }
+}
+
+// Writes the components of a synchronous sample to the given ProfileBuffer.
+static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
+                         const TimeStamp& aNow, const Registers& aRegs,
+                         ProfileBuffer& aBuffer) {
+  // WARNING: this function runs within the profiler's "critical section".
+
+  uint64_t samplePos =
+      aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
+
+  TimeDuration delta = aNow - CorePS::ProcessStartTime();
+  aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+
+  DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs,
+                 samplePos, aBuffer);
+}
+
+// Writes the components of a periodic sample to ActivePS's ProfileBuffer.
+// The ThreadId entry is already written in the main ProfileBuffer, its location
+// is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
+static void DoPeriodicSample(PSLockRef aLock,
+                             RegisteredThread& aRegisteredThread,
+                             ProfiledThreadData& aProfiledThreadData,
+                             const Registers& aRegs, uint64_t aSamplePos,
+                             ProfileBuffer& aBuffer) {
+  // WARNING: this function runs within the profiler's "critical section".
+
+  DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs,
+                 aSamplePos, aBuffer);
+}
+
+// END sampling/unwinding code
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN saving/streaming code
+
+const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
+
+static int64_t SafeJSInteger(uint64_t aValue) {
+  return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
+}
+
+static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
+                                         const SharedLibrary& aLib) {
+  aWriter.StartObjectElement();
+  aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
+  aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
+  aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
+  aWriter.StringProperty("name", aLib.GetModuleName());
+  aWriter.StringProperty("path", aLib.GetModulePath());
+  aWriter.StringProperty("debugName", aLib.GetDebugName());
+  aWriter.StringProperty("debugPath", aLib.GetDebugPath());
+  aWriter.StringProperty("breakpadId", aLib.GetBreakpadId());
+  aWriter.StringProperty("arch", aLib.GetArch());
+  aWriter.EndObject();
+}
+
+void AppendSharedLibraries(JSONWriter& aWriter) {
+  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+  info.SortByAddress();
+  for (size_t i = 0; i < info.GetSize(); i++) {
+    AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
+  }
+}
+
+static void StreamCategories(SpliceableJSONWriter& aWriter) {
+  // Same order as ProfilingCategory. Format:
+  // [
+  //   {
+  //     name: "Idle",
+  //     color: "transparent",
+  //     subcategories: ["Other"],
+  //   },
+  //   {
+  //     name: "Other",
+  //     color: "grey",
+  //     subcategories: [
+  //       "JSM loading",
+  //       "Subprocess launching",
+  //       "DLL loading"
+  //     ]
+  //   },
+  //   ...
+  // ]
+
+#define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
+  aWriter.Start();                                               \
+  aWriter.StringProperty("name", labelAsString);                 \
+  aWriter.StringProperty("color", color);                        \
+  aWriter.StartArrayProperty("subcategories");
+#define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
+  aWriter.StringElement(labelAsString);
+#define CATEGORY_JSON_END_CATEGORY \
+  aWriter.EndArray();              \
+  aWriter.EndObject();
+
+  MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
+                              CATEGORY_JSON_SUBCATEGORY,
+                              CATEGORY_JSON_END_CATEGORY)
+
+#undef CATEGORY_JSON_BEGIN_CATEGORY
+#undef CATEGORY_JSON_SUBCATEGORY
+#undef CATEGORY_JSON_END_CATEGORY
+}
+
+static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) {
+  // Get an array view with all registered marker-type-specific functions.
+  Span<const base_profiler_markers_detail::Streaming::MarkerTypeFunctions>
+      markerTypeFunctionsArray =
+          base_profiler_markers_detail::Streaming::MarkerTypeFunctionsArray();
+  // List of streamed marker names, this is used to spot duplicates.
+  std::set<std::string> names;
+  // Stream the display schema for each different one. (Duplications may come
+  // from the same code potentially living in different libraries.)
+  for (const auto& markerTypeFunctions : markerTypeFunctionsArray) {
+    auto name = markerTypeFunctions.mMarkerTypeNameFunction();
+    // std::set.insert(T&&) returns a pair, its `second` is true if the element
+    // was actually inserted (i.e., it was not there yet.)
+    const bool didInsert =
+        names.insert(std::string(name.data(), name.size())).second;
+    if (didInsert) {
+      markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name);
+    }
+  }
+}
+
+static int64_t MicrosecondsSince1970();
+
+static void StreamMetaJSCustomObject(PSLockRef aLock,
+                                     SpliceableJSONWriter& aWriter,
+                                     bool aIsShuttingDown) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+  aWriter.IntProperty("version", 19);
+
+  // The "startTime" field holds the number of milliseconds since midnight
+  // January 1, 1970 GMT. This grotty code computes (Now - (Now -
+  // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
+  TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
+  aWriter.DoubleProperty(
+      "startTime", MicrosecondsSince1970() / 1000.0 - delta.ToMilliseconds());
+
+  // Write the shutdownTime field. Unlike startTime, shutdownTime is not an
+  // absolute time stamp: It's relative to startTime. This is consistent with
+  // all other (non-"startTime") times anywhere in the profile JSON.
+  if (aIsShuttingDown) {
+    aWriter.DoubleProperty("shutdownTime", profiler_time());
+  } else {
+    aWriter.NullProperty("shutdownTime");
+  }
+
+  aWriter.StartArrayProperty("categories");
+  StreamCategories(aWriter);
+  aWriter.EndArray();
+
+  aWriter.StartArrayProperty("markerSchema");
+  StreamMarkerSchema(aWriter);
+  aWriter.EndArray();
+
+  if (!profiler_is_main_thread()) {
+    // Leave the rest of the properties out if we're not on the main thread.
+    // At the moment, the only case in which this function is called on a
+    // background thread is if we're in a content process and are going to
+    // send this profile to the parent process. In that case, the parent
+    // process profile's "meta" object already has the rest of the properties,
+    // and the parent process profile is dumped on that process's main thread.
+    return;
+  }
+
+  aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
+  aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
+
+#ifdef DEBUG
+  aWriter.IntProperty("debug", 1);
+#else
+  aWriter.IntProperty("debug", 0);
+#endif
+
+  aWriter.IntProperty("gcpoison", 0);
+
+  aWriter.IntProperty("asyncstack", 0);
+
+  aWriter.IntProperty("processType", 0);
+}
+
+static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  ActivePS::DiscardExpiredPages(aLock);
+  for (const auto& page : ActivePS::ProfiledPages(aLock)) {
+    page->StreamJSON(aWriter);
+  }
+}
+
+static void locked_profiler_stream_json_for_this_process(
+    PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
+    bool aIsShuttingDown, bool aOnlyThreads = false) {
+  LOG("locked_profiler_stream_json_for_this_process");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+  AUTO_PROFILER_STATS(base_locked_profiler_stream_json_for_this_process);
+
+  const double collectionStartMs = profiler_time();
+
+  ProfileBuffer& buffer = ActivePS::Buffer(aLock);
+
+  // If there is a set "Window length", discard older data.
+  Maybe<double> durationS = ActivePS::Duration(aLock);
+  if (durationS.isSome()) {
+    const double durationStartMs = collectionStartMs - *durationS * 1000;
+    buffer.DiscardSamplesBeforeTime(durationStartMs);
+  }
+
+  if (!aOnlyThreads) {
+    // Put shared library info
+    aWriter.StartArrayProperty("libs");
+    AppendSharedLibraries(aWriter);
+    aWriter.EndArray();
+
+    // Put meta data
+    aWriter.StartObjectProperty("meta");
+    { StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown); }
+    aWriter.EndObject();
+
+    // Put page data
+    aWriter.StartArrayProperty("pages");
+    { StreamPages(aLock, aWriter); }
+    aWriter.EndArray();
+
+    buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(),
+                                        aSinceTime);
+    buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(),
+                                aSinceTime);
+
+    // Lists the samples for each thread profile
+    aWriter.StartArrayProperty("threads");
+  }
+
+  // if aOnlyThreads is true, the only output will be the threads array items.
+  {
+    ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
+    Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads =
+        ActivePS::ProfiledThreads(aLock);
+    for (auto& thread : threads) {
+      ProfiledThreadData* profiledThreadData = thread.second;
+      profiledThreadData->StreamJSON(
+          buffer, aWriter, CorePS::ProcessName(aLock), CorePS::ETLDplus1(aLock),
+          CorePS::ProcessStartTime(), aSinceTime);
+    }
+  }
+
+  if (!aOnlyThreads) {
+    aWriter.EndArray();
+
+    aWriter.StartArrayProperty("pausedRanges");
+    { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); }
+    aWriter.EndArray();
+  }
+
+  const double collectionEndMs = profiler_time();
+
+  // Record timestamps for the collection into the buffer, so that consumers
+  // know why we didn't collect any samples for its duration.
+  // We put these entries into the buffer after we've collected the profile,
+  // so they'll be visible for the *next* profile collection (if they haven't
+  // been overwritten due to buffer wraparound by then).
+  buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
+  buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
+}
+
+bool profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter,
+                                           double aSinceTime,
+                                           bool aIsShuttingDown,
+                                           bool aOnlyThreads) {
+  LOG("profiler_stream_json_for_this_process");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return false;
+  }
+
+  locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime,
+                                               aIsShuttingDown, aOnlyThreads);
+  return true;
+}
+
+// END saving/streaming code
+////////////////////////////////////////////////////////////////////////
+
+static char FeatureCategory(uint32_t aFeature) {
+  if (aFeature & DefaultFeatures()) {
+    if (aFeature & AvailableFeatures()) {
+      return 'D';
+    }
+    return 'd';
+  }
+
+  if (aFeature & StartupExtraDefaultFeatures()) {
+    if (aFeature & AvailableFeatures()) {
+      return 'S';
+    }
+    return 's';
+  }
+
+  if (aFeature & AvailableFeatures()) {
+    return '-';
+  }
+  return 'x';
+}
+
+static void PrintUsageThenExit(int aExitCode) {
+  PrintToConsole(
+      "\n"
+      "Profiler environment variable usage:\n"
+      "\n"
+      "  MOZ_BASE_PROFILER_HELP\n"
+      "  If set to any value, prints this message.\n"
+      "  (Only BaseProfiler features are known here; Use MOZ_PROFILER_HELP\n"
+      "  for Gecko Profiler help, with more features).\n"
+      "\n"
+      "  MOZ_BASE_PROFILER_{,DEBUG_,VERBOSE}LOGGING\n"
+      "  Enables BaseProfiler logging to stdout. The levels of logging\n"
+      "  available are MOZ_BASE_PROFILER_LOGGING' (least verbose),\n"
+      "  '..._DEBUG_LOGGING', '..._VERBOSE_LOGGING' (most verbose)\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP\n"
+      "  If set to any value other than '' or '0'/'N'/'n', starts the\n"
+      "  profiler immediately on start-up.\n"
+      "  Useful if you want profile code that runs very early.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the number of entries\n"
+      "  per process in the profiler's circular buffer when the profiler is\n"
+      "  first started.\n"
+      "  If unset, the platform default is used:\n"
+      "  %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
+      "  (%u bytes per entry -> %u or %u total bytes per process)\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the maximum life time\n"
+      "  of entries in the the profiler's circular buffer when the profiler\n"
+      "  is first started, in seconds.\n"
+      "  If unset, the life time of the entries will only be restricted by\n"
+      "  MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
+      "  additional time duration restriction will be applied.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
+      "  measured in milliseconds, when the profiler is first started.\n"
+      "  If unset, the platform default is used.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
+      "  features, as the integer value of the features bitfield.\n"
+      "  If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
+      "  features, as a comma-separated list of strings.\n"
+      "  Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
+      "  If unset, the platform default is used.\n"
+      "\n"
+      "    Features: (x=unavailable, D/d=default/unavailable,\n"
+      "               S/s=MOZ_PROFILER_STARTUP extra "
+      "default/unavailable)\n",
+      unsigned(ActivePS::scMinimumBufferEntries),
+      unsigned(ActivePS::scMaximumBufferEntries),
+      unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value()),
+      unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
+      unsigned(scBytesPerEntry),
+      unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
+      unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value() *
+               scBytesPerEntry));
+
+#define PRINT_FEATURE(n_, str_, Name_, desc_)             \
+  PrintToConsole("    %c %7u: \"%s\" (%s)\n",             \
+                 FeatureCategory(ProfilerFeature::Name_), \
+                 ProfilerFeature::Name_, str_, desc_);
+
+  BASE_PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
+
+#undef PRINT_FEATURE
+
+  PrintToConsole(
+      "    -        \"default\" (All above D+S defaults)\n"
+      "\n"
+      "  MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
+      "  If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as "
+      "a\n"
+      "  comma-separated list of strings. A given thread will be sampled if\n"
+      "  any of the filters is a case-insensitive substring of the thread\n"
+      "  name. If unset, a default is used.\n"
+      "\n"
+      "  MOZ_PROFILER_SHUTDOWN\n"
+      "  If set, the profiler saves a profile to the named file on shutdown.\n"
+      "\n"
+      "  MOZ_PROFILER_SYMBOLICATE\n"
+      "  If set, the profiler will pre-symbolicate profiles.\n"
+      "  *Note* This will add a significant pause when gathering data, and\n"
+      "  is intended mainly for local development.\n"
+      "\n"
+      "  MOZ_PROFILER_LUL_TEST\n"
+      "  If set to any value, runs LUL unit tests at startup.\n"
+      "\n"
+      "  This platform %s native unwinding.\n"
+      "\n",
+#if defined(HAVE_NATIVE_UNWIND)
+      "supports"
+#else
+      "does not support"
+#endif
+  );
+
+  exit(aExitCode);
+}
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN Sampler
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+struct SigHandlerCoordinator;
+#endif
+
+// Sampler performs setup and teardown of the state required to sample with the
+// profiler. Sampler may exist when ActivePS is not present.
+//
+// SuspendAndSampleAndResumeThread must only be called from a single thread,
+// and must not sample the thread it is being called from. A separate Sampler
+// instance must be used for each thread which wants to capture samples.
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+//
+// With the exception of SamplerThread, all Sampler objects must be Disable-d
+// before releasing the lock which was used to create them. This avoids races
+// on linux with the SIGPROF signal handler.
+
+class Sampler {
+ public:
+  // Sets up the profiler such that it can begin sampling.
+  explicit Sampler(PSLockRef aLock);
+
+  // Disable the sampler, restoring it to its previous state. This must be
+  // called once, and only once, before the Sampler is destroyed.
+  void Disable(PSLockRef aLock);
+
+  // This method suspends and resumes the samplee thread. It calls the passed-in
+  // function-like object aProcessRegs (passing it a populated |const
+  // Registers&| arg) while the samplee thread is suspended.
+  //
+  // Func must be a function-like object of type `void()`.
+  template <typename Func>
+  void SuspendAndSampleAndResumeThread(
+      PSLockRef aLock, const RegisteredThread& aRegisteredThread,
+      const TimeStamp& aNow, const Func& aProcessRegs);
+
+ private:
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+  // Used to restore the SIGPROF handler when ours is removed.
+  struct sigaction mOldSigprofHandler;
+
+  // This process' ID. Needed as an argument for tgkill in
+  // SuspendAndSampleAndResumeThread.
+  int mMyPid;
+
+  // The sampler thread's ID.  Used to assert that it is not sampling itself,
+  // which would lead to deadlock.
+  int mSamplerTid;
+
+ public:
+  // This is the one-and-only variable used to communicate between the sampler
+  // thread and the samplee thread's signal handler. It's static because the
+  // samplee thread's signal handler is static.
+  static struct SigHandlerCoordinator* sSigHandlerCoordinator;
+#endif
+};
+
+// END Sampler
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN SamplerThread
+
+// The sampler thread controls sampling and runs whenever the profiler is
+// active. It periodically runs through all registered threads, finds those
+// that should be sampled, then pauses and samples them.
+
+class SamplerThread {
+ public:
+  // Creates a sampler thread, but doesn't start it.
+  SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                double aIntervalMilliseconds);
+  ~SamplerThread();
+
+  // This runs on (is!) the sampler thread.
+  void Run();
+
+  // This runs on the main thread.
+  void Stop(PSLockRef aLock);
+
+ private:
+  // This suspends the calling thread for the given number of microseconds.
+  // Best effort timing.
+  void SleepMicro(uint32_t aMicroseconds);
+
+  // The sampler used to suspend and sample threads.
+  Sampler mSampler;
+
+  // The activity generation, for detecting when the sampler thread must stop.
+  const uint32_t mActivityGeneration;
+
+  // The interval between samples, measured in microseconds.
+  const int mIntervalMicroseconds;
+
+  // The OS-specific handle for the sampler thread.
+#if defined(GP_OS_windows)
+  HANDLE mThread;
+#elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
+    defined(GP_OS_android) || defined(GP_OS_freebsd)
+  pthread_t mThread;
+#endif
+
+  SamplerThread(const SamplerThread&) = delete;
+  void operator=(const SamplerThread&) = delete;
+};
+
+// This function is required because we need to create a SamplerThread within
+// ActivePS's constructor, but SamplerThread is defined after ActivePS. It
+// could probably be removed by moving some code around.
+static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
+                                       double aInterval) {
+  return new SamplerThread(aLock, aGeneration, aInterval);
+}
+
+// This function is the sampler thread.  This implementation is used for all
+// targets.
+void SamplerThread::Run() {
+  // TODO: If possible, name this thread later on, after NSPR becomes available.
+  // PR_SetCurrentThreadName("SamplerThread");
+
+  // Features won't change during this SamplerThread's lifetime, so we can read
+  // them once and store them locally.
+  const uint32_t features = []() -> uint32_t {
+    PSAutoLock lock;
+    if (!ActivePS::Exists(lock)) {
+      // If there is no active profiler, it doesn't matter what we return,
+      // because this thread will exit before any feature is used.
+      return 0;
+    }
+    return ActivePS::Features(lock);
+  }();
+
+  // Not *no*-stack-sampling means we do want stack sampling.
+  const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features);
+
+  // Use local BlocksRingBuffer&ProfileBuffer to capture the stack.
+  // (This is to avoid touching the CorePS::CoreBuffer lock while
+  // a thread is suspended, because that thread could be working with
+  // the CorePS::CoreBuffer as well.)
+  ProfileBufferChunkManagerSingle localChunkManager(
+      ProfileBufferChunkManager::scExpectedMaximumStackSize);
+  ProfileChunkedBuffer localBuffer(
+      ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
+  ProfileBuffer localProfileBuffer(localBuffer);
+
+  // Will be kept between collections, to know what each collection does.
+  auto previousState = localBuffer.GetState();
+
+  // This will be positive if we are running behind schedule (sampling less
+  // frequently than desired) and negative if we are ahead of schedule.
+  TimeDuration lastSleepOvershoot = 0;
+  TimeStamp sampleStart = TimeStamp::NowUnfuzzed();
+
+  while (true) {
+    // This scope is for |lock|. It ends before we sleep below.
+    {
+      PSAutoLock lock;
+      TimeStamp lockAcquired = TimeStamp::NowUnfuzzed();
+
+      if (!ActivePS::Exists(lock)) {
+        return;
+      }
+
+      // At this point profiler_stop() might have been called, and
+      // profiler_start() might have been called on another thread. If this
+      // happens the generation won't match.
+      if (ActivePS::Generation(lock) != mActivityGeneration) {
+        return;
+      }
+
+      ActivePS::ClearExpiredExitProfiles(lock);
+
+      TimeStamp expiredMarkersCleaned = TimeStamp::NowUnfuzzed();
+
+      if (!ActivePS::IsSamplingPaused(lock)) {
+        TimeDuration delta = sampleStart - CorePS::ProcessStartTime();
+        ProfileBuffer& buffer = ActivePS::Buffer(lock);
+
+        // handle per-process generic counters
+        const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
+        for (auto& counter : counters) {
+          // create Buffer entries for each counter
+          buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
+          buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+          // XXX support keyed maps of counts
+          // In the future, we'll support keyed counters - for example, counters
+          // with a key which is a thread ID. For "simple" counters we'll just
+          // use a key of 0.
+          int64_t count;
+          uint64_t number;
+          counter->Sample(count, number);
+          buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
+          buffer.AddEntry(ProfileBufferEntry::Count(count));
+          if (number) {
+            buffer.AddEntry(ProfileBufferEntry::Number(number));
+          }
+        }
+        TimeStamp countersSampled = TimeStamp::NowUnfuzzed();
+
+        if (stackSampling) {
+          const Vector<LiveProfiledThreadData>& liveThreads =
+              ActivePS::LiveProfiledThreads(lock);
+
+          for (auto& thread : liveThreads) {
+            RegisteredThread* registeredThread = thread.mRegisteredThread;
+            ProfiledThreadData* profiledThreadData =
+                thread.mProfiledThreadData.get();
+            RefPtr<ThreadInfo> info = registeredThread->Info();
+
+            // If the thread is asleep and has been sampled before in the same
+            // sleep episode, find and copy the previous sample, as that's
+            // cheaper than taking a new sample.
+            if (registeredThread->RacyRegisteredThread()
+                    .CanDuplicateLastSampleDueToSleep()) {
+              bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
+                  info->ThreadId(), CorePS::ProcessStartTime(),
+                  profiledThreadData->LastSample());
+              if (dup_ok) {
+                continue;
+              }
+            }
+
+            AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample);
+
+            TimeStamp now = TimeStamp::NowUnfuzzed();
+
+            // Add the thread ID now, so we know its position in the main
+            // buffer, which is used by some JS data. (DoPeriodicSample only
+            // knows about the temporary local buffer.)
+            uint64_t samplePos =
+                buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
+            profiledThreadData->LastSample() = Some(samplePos);
+
+            // Also add the time, so it's always there after the thread ID, as
+            // expected by the parser. (Other stack data is optional.)
+            TimeDuration delta = now - CorePS::ProcessStartTime();
+            buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+
+            mSampler.SuspendAndSampleAndResumeThread(
+                lock, *registeredThread, now,
+                [&](const Registers& aRegs, const TimeStamp& aNow) {
+                  DoPeriodicSample(lock, *registeredThread, *profiledThreadData,
+                                   aRegs, samplePos, localProfileBuffer);
+                });
+
+            // If data is complete, copy it into the global buffer.
+            auto state = localBuffer.GetState();
+            if (state.mClearedBlockCount != previousState.mClearedBlockCount) {
+              LOG("Stack sample too big for local storage, needed %u bytes",
+                  unsigned(state.mRangeEnd - previousState.mRangeEnd));
+            } else if (state.mRangeEnd - previousState.mRangeEnd >=
+                       *CorePS::CoreBuffer().BufferLength()) {
+              LOG("Stack sample too big for profiler storage, needed %u bytes",
+                  unsigned(state.mRangeEnd - previousState.mRangeEnd));
+            } else {
+              CorePS::CoreBuffer().AppendContents(localBuffer);
+            }
+
+            // Clean up for the next run.
+            localBuffer.Clear();
+            previousState = localBuffer.GetState();
+          }
+        }
+
+#if defined(USE_LUL_STACKWALK)
+        // The LUL unwind object accumulates frame statistics. Periodically we
+        // should poke it to give it a chance to print those statistics.  This
+        // involves doing I/O (fprintf, __android_log_print, etc.) and so
+        // can't safely be done from the critical section inside
+        // SuspendAndSampleAndResumeThread, which is why it is done here.
+        CorePS::Lul(lock)->MaybeShowStats();
+#endif
+        TimeStamp threadsSampled = TimeStamp::NowUnfuzzed();
+
+        {
+          AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
+          ActivePS::FulfillChunkRequests(lock);
+        }
+
+        buffer.CollectOverheadStats(delta, lockAcquired - sampleStart,
+                                    expiredMarkersCleaned - lockAcquired,
+                                    countersSampled - expiredMarkersCleaned,
+                                    threadsSampled - countersSampled);
+      }
+    }
+    // gPSMutex is not held after this point.
+
+    // Calculate how long a sleep to request.  After the sleep, measure how
+    // long we actually slept and take the difference into account when
+    // calculating the sleep interval for the next iteration.  This is an
+    // attempt to keep "to schedule" in the presence of inaccuracy of the
+    // actual sleep intervals.
+    TimeStamp targetSleepEndTime =
+        sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds);
+    TimeStamp beforeSleep = TimeStamp::NowUnfuzzed();
+    TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
+    double sleepTime = std::max(
+        0.0, (targetSleepDuration - lastSleepOvershoot).ToMicroseconds());
+    SleepMicro(static_cast<uint32_t>(sleepTime));
+    sampleStart = TimeStamp::NowUnfuzzed();
+    lastSleepOvershoot =
+        sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
+  }
+}
+
+// Temporary closing namespaces from enclosing platform.cpp.
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+// We #include these files directly because it means those files can use
+// declarations from this file trivially.  These provide target-specific
+// implementations of all SamplerThread methods except Run().
+#if defined(GP_OS_windows)
+#  include "platform-win32.cpp"
+#elif defined(GP_OS_darwin)
+#  include "platform-macos.cpp"
+#elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+#  include "platform-linux-android.cpp"
+#else
+#  error "bad platform"
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+UniquePlatformData AllocPlatformData(int aThreadId) {
+  return UniquePlatformData(new PlatformData(aThreadId));
+}
+
+void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; }
+
+// END SamplerThread
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN externally visible functions
+
+static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
+  if (strcmp(aFeature, "default") == 0) {
+    return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
+                       : DefaultFeatures()) &
+           AvailableFeatures();
+  }
+
+#define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
+  if (strcmp(aFeature, str_) == 0) {              \
+    return ProfilerFeature::Name_;                \
+  }
+
+  BASE_PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
+
+#undef PARSE_FEATURE_BIT
+
+  PrintToConsole("\nUnrecognized feature \"%s\".\n\n", aFeature);
+  // Since we may have an old feature we don't implement anymore, don't exit
+  PrintUsageThenExit(0);
+  return 0;
+}
+
+uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
+                                      uint32_t aFeatureCount,
+                                      bool aIsStartup /* = false */) {
+  uint32_t features = 0;
+  for (size_t i = 0; i < aFeatureCount; i++) {
+    features |= ParseFeature(aFeatures[i], aIsStartup);
+  }
+  return features;
+}
+
+// Find the RegisteredThread for the current thread. This should only be called
+// in places where TLSRegisteredThread can't be used.
+static RegisteredThread* FindCurrentThreadRegisteredThread(PSLockRef aLock) {
+  int id = profiler_current_thread_id();
+  const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+      CorePS::RegisteredThreads(aLock);
+  for (auto& registeredThread : registeredThreads) {
+    if (registeredThread->Info()->ThreadId() == id) {
+      return registeredThread.get();
+    }
+  }
+
+  return nullptr;
+}
+
+static ProfilingStack* locked_register_thread(PSLockRef aLock,
+                                              const char* aName,
+                                              void* aStackTop) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  MOZ_ASSERT(!FindCurrentThreadRegisteredThread(aLock));
+
+  VTUNE_REGISTER_THREAD(aName);
+
+  if (!TLSRegisteredThread::Init(aLock)) {
+    return nullptr;
+  }
+
+  RefPtr<ThreadInfo> info = new ThreadInfo(aName, profiler_current_thread_id(),
+                                           profiler_is_main_thread());
+  UniquePtr<RegisteredThread> registeredThread =
+      MakeUnique<RegisteredThread>(info, aStackTop);
+
+  TLSRegisteredThread::SetRegisteredThread(aLock, registeredThread.get());
+
+  if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) {
+    registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
+    ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
+                                    MakeUnique<ProfiledThreadData>(info));
+  }
+
+  ProfilingStack* profilingStack =
+      &registeredThread->RacyRegisteredThread().ProfilingStack();
+
+  CorePS::AppendRegisteredThread(aLock, std::move(registeredThread));
+
+  return profilingStack;
+}
+
+static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
+                                  double aInterval, uint32_t aFeatures,
+                                  const char** aFilters, uint32_t aFilterCount,
+                                  const Maybe<double>& aDuration);
+
+static Vector<const char*> SplitAtCommas(const char* aString,
+                                         UniquePtr<char[]>& aStorage) {
+  size_t len = strlen(aString);
+  aStorage = MakeUnique<char[]>(len + 1);
+  PodCopy(aStorage.get(), aString, len + 1);
+
+  // Iterate over all characters in aStorage and split at commas, by
+  // overwriting commas with the null char.
+  Vector<const char*> array;
+  size_t currentElementStart = 0;
+  for (size_t i = 0; i <= len; i++) {
+    if (aStorage[i] == ',') {
+      aStorage[i] = '\0';
+    }
+    if (aStorage[i] == '\0') {
+      MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
+      currentElementStart = i + 1;
+    }
+  }
+  return array;
+}
+
+void profiler_init(void* aStackTop) {
+  LOG("profiler_init");
+
+  scProfilerMainThreadId = profiler_current_thread_id();
+
+  VTUNE_INIT();
+
+  MOZ_RELEASE_ASSERT(!CorePS::Exists());
+
+  if (getenv("MOZ_BASE_PROFILER_HELP")) {
+    PrintUsageThenExit(0);  // terminates execution
+  }
+
+  SharedLibraryInfo::Initialize();
+
+  uint32_t features = DefaultFeatures() & AvailableFeatures();
+
+  UniquePtr<char[]> filterStorage;
+
+  Vector<const char*> filters;
+  MOZ_RELEASE_ASSERT(filters.append(kMainThreadName));
+
+  PowerOfTwo32 capacity = BASE_PROFILER_DEFAULT_ENTRIES;
+  Maybe<double> duration = Nothing();
+  double interval = BASE_PROFILER_DEFAULT_INTERVAL;
+
+  {
+    PSAutoLock lock;
+
+    // We've passed the possible failure point. Instantiate CorePS, which
+    // indicates that the profiler has initialized successfully.
+    CorePS::Create(lock);
+
+    Unused << locked_register_thread(lock, kMainThreadName, aStackTop);
+
+    // Platform-specific initialization.
+    PlatformInit(lock);
+
+    // (Linux-only) We could create CorePS::mLul and read unwind info into it
+    // at this point. That would match the lifetime implied by destruction of
+    // it in profiler_shutdown() just below. However, that gives a big delay on
+    // startup, even if no profiling is actually to be done. So, instead, it is
+    // created on demand at the first call to PlatformStart().
+
+    const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
+    if (!startupEnv || startupEnv[0] == '\0' ||
+        ((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
+          startupEnv[0] == 'n') &&
+         startupEnv[1] == '\0')) {
+      return;
+    }
+
+    // Hidden option to stop Base Profiler, mostly due to Talos intermittents,
+    // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3
+    // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325.
+    if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) {
+      return;
+    }
+
+    LOG("- MOZ_PROFILER_STARTUP is set");
+
+    // Startup default capacity may be different.
+    capacity = BASE_PROFILER_DEFAULT_STARTUP_ENTRIES;
+
+    const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
+    if (startupCapacity && startupCapacity[0] != '\0') {
+      errno = 0;
+      long capacityLong = strtol(startupCapacity, nullptr, 10);
+      // `long` could be 32 or 64 bits, so we force a 64-bit comparison with
+      // the maximum 32-bit signed number (as more than that is clamped down to
+      // 2^31 anyway).
+      if (errno == 0 && capacityLong > 0 &&
+          static_cast<uint64_t>(capacityLong) <=
+              static_cast<uint64_t>(INT32_MAX)) {
+        capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries(
+            static_cast<uint32_t>(capacityLong)));
+        LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
+      } else {
+        PrintToConsole("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
+                       startupCapacity);
+        PrintUsageThenExit(1);
+      }
+    }
+
+    const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
+    if (startupDuration && startupDuration[0] != '\0') {
+      // The duration is a floating point number. Use StringToDouble rather than
+      // strtod, so that "." is used as the decimal separator regardless of OS
+      // locale.
+      auto durationVal = StringToDouble(std::string(startupDuration));
+      if (durationVal && *durationVal >= 0.0) {
+        if (*durationVal > 0.0) {
+          duration = Some(*durationVal);
+        }
+        LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", *durationVal);
+      } else {
+        PrintToConsole("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
+                       startupDuration);
+        PrintUsageThenExit(1);
+      }
+    }
+
+    const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
+    if (startupInterval && startupInterval[0] != '\0') {
+      // The interval is a floating point number. Use StringToDouble rather than
+      // strtod, so that "." is used as the decimal separator regardless of OS
+      // locale.
+      auto intervalValue = StringToDouble(MakeStringSpan(startupInterval));
+      if (intervalValue && *intervalValue > 0.0 && *intervalValue <= 1000.0) {
+        interval = *intervalValue;
+        LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
+      } else {
+        PrintToConsole("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
+                       startupInterval);
+        PrintUsageThenExit(1);
+      }
+    }
+
+    features |= StartupExtraDefaultFeatures() & AvailableFeatures();
+
+    const char* startupFeaturesBitfield =
+        getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
+    if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
+      errno = 0;
+      features = strtol(startupFeaturesBitfield, nullptr, 10);
+      if (errno == 0 && features != 0) {
+        LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
+      } else {
+        PrintToConsole(
+            "- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
+            startupFeaturesBitfield);
+        PrintUsageThenExit(1);
+      }
+    } else {
+      const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
+      if (startupFeatures && startupFeatures[0] != '\0') {
+        // Interpret startupFeatures as a list of feature strings, separated by
+        // commas.
+        UniquePtr<char[]> featureStringStorage;
+        Vector<const char*> featureStringArray =
+            SplitAtCommas(startupFeatures, featureStringStorage);
+        features = ParseFeaturesFromStringArray(featureStringArray.begin(),
+                                                featureStringArray.length(),
+                                                /* aIsStartup */ true);
+        LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
+      }
+    }
+
+    const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
+    if (startupFilters && startupFilters[0] != '\0') {
+      filters = SplitAtCommas(startupFilters, filterStorage);
+      LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
+    }
+
+    locked_profiler_start(lock, capacity, interval, features, filters.begin(),
+                          filters.length(), duration);
+  }
+
+  // TODO: Install memory counter if it is possible from mozglue.
+  // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  //   // start counting memory allocations (outside of lock because this may
+  //   call
+  //   // profiler_add_sampled_counter which would attempt to take the lock.)
+  //   mozilla::profiler::install_memory_counter(true);
+  // #endif
+}
+
+static void locked_profiler_save_profile_to_file(PSLockRef aLock,
+                                                 const char* aFilename,
+                                                 bool aIsShuttingDown);
+
+static SamplerThread* locked_profiler_stop(PSLockRef aLock);
+
+void profiler_shutdown() {
+  LOG("profiler_shutdown");
+
+  VTUNE_SHUTDOWN();
+
+  MOZ_RELEASE_ASSERT(profiler_is_main_thread());
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  // If the profiler is active we must get a handle to the SamplerThread before
+  // ActivePS is destroyed, in order to delete it.
+  SamplerThread* samplerThread = nullptr;
+  {
+    PSAutoLock lock;
+
+    // Save the profile on shutdown if requested.
+    if (ActivePS::Exists(lock)) {
+      const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
+      if (filename) {
+        locked_profiler_save_profile_to_file(lock, filename,
+                                             /* aIsShuttingDown */ true);
+      }
+
+      samplerThread = locked_profiler_stop(lock);
+    }
+
+    CorePS::Destroy(lock);
+
+    // We just destroyed CorePS and the ThreadInfos it contains, so we can
+    // clear this thread's TLSRegisteredThread.
+    TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
+  }
+
+  // We do these operations with gPSMutex unlocked. The comments in
+  // profiler_stop() explain why.
+  if (samplerThread) {
+    delete samplerThread;
+  }
+}
+
+static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
+                                     double aSinceTime, bool aIsShuttingDown,
+                                     bool aOnlyThreads = false) {
+  LOG("WriteProfileToJSONWriter");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  if (!aOnlyThreads) {
+    aWriter.Start();
+    {
+      if (!profiler_stream_json_for_this_process(
+              aWriter, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
+        return false;
+      }
+
+      // Don't include profiles from other processes because this is a
+      // synchronous function.
+      aWriter.StartArrayProperty("processes");
+      aWriter.EndArray();
+    }
+    aWriter.End();
+  } else {
+    aWriter.StartBareList();
+    if (!profiler_stream_json_for_this_process(aWriter, aSinceTime,
+                                               aIsShuttingDown, aOnlyThreads)) {
+      return false;
+    }
+    aWriter.EndBareList();
+  }
+  return true;
+}
+
+void profiler_set_process_name(const std::string& aProcessName,
+                               const std::string* aETLDplus1) {
+  LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.c_str(),
+      aETLDplus1 ? aETLDplus1->c_str() : "<none>");
+  PSAutoLock lock;
+  CorePS::SetProcessName(lock, aProcessName);
+  if (aETLDplus1) {
+    CorePS::SetETLDplus1(lock, *aETLDplus1);
+  }
+}
+
+UniquePtr<char[]> profiler_get_profile(double aSinceTime, bool aIsShuttingDown,
+                                       bool aOnlyThreads) {
+  LOG("profiler_get_profile");
+
+  SpliceableChunkedJSONWriter b;
+  if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
+    return nullptr;
+  }
+  return b.ChunkedWriteFunc().CopyData();
+}
+
+void profiler_get_profile_json_into_lazily_allocated_buffer(
+    const std::function<char*(size_t)>& aAllocator, double aSinceTime,
+    bool aIsShuttingDown) {
+  LOG("profiler_get_profile_json_into_lazily_allocated_buffer");
+
+  SpliceableChunkedJSONWriter b;
+  if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown)) {
+    return;
+  }
+
+  b.ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer(aAllocator);
+}
+
+void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
+                               double* aInterval, uint32_t* aFeatures,
+                               Vector<const char*>* aFilters) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  if (!aCapacity || !aDuration || !aInterval || !aFeatures || !aFilters) {
+    return;
+  }
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    *aCapacity = 0;
+    *aDuration = Nothing();
+    *aInterval = 0;
+    *aFeatures = 0;
+    aFilters->clear();
+    return;
+  }
+
+  *aCapacity = ActivePS::Capacity(lock).Value();
+  *aDuration = ActivePS::Duration(lock);
+  *aInterval = ActivePS::Interval(lock);
+  *aFeatures = ActivePS::Features(lock);
+
+  const Vector<std::string>& filters = ActivePS::Filters(lock);
+  MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
+  for (uint32_t i = 0; i < filters.length(); ++i) {
+    (*aFilters)[i] = filters[i].c_str();
+  }
+}
+
+void GetProfilerEnvVarsForChildProcess(
+    std::function<void(const char* key, const char* value)>&& aSetEnv) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    aSetEnv("MOZ_PROFILER_STARTUP", "");
+    return;
+  }
+
+  aSetEnv("MOZ_PROFILER_STARTUP", "1");
+  auto capacityString =
+      Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
+  aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
+
+  // Use AppendFloat instead of Smprintf with %f because the decimal
+  // separator used by %f is locale-dependent. But the string we produce needs
+  // to be parseable by strtod, which only accepts the period character as a
+  // decimal separator. AppendFloat always uses the period character.
+  std::string intervalString = std::to_string(ActivePS::Interval(lock));
+  aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.c_str());
+
+  auto featuresString = Smprintf("%d", ActivePS::Features(lock));
+  aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
+
+  std::string filtersString;
+  const Vector<std::string>& filters = ActivePS::Filters(lock);
+  for (uint32_t i = 0; i < filters.length(); ++i) {
+    filtersString += filters[i];
+    if (i != filters.length() - 1) {
+      filtersString += ",";
+    }
+  }
+  aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
+}
+
+void profiler_received_exit_profile(const std::string& aExitProfile) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  PSAutoLock lock;
+  if (!ActivePS::Exists(lock)) {
+    return;
+  }
+  ActivePS::AddExitProfile(lock, aExitProfile);
+}
+
+Vector<std::string> profiler_move_exit_profiles() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  PSAutoLock lock;
+  Vector<std::string> profiles;
+  if (ActivePS::Exists(lock)) {
+    profiles = ActivePS::MoveExitProfiles(lock);
+  }
+  return profiles;
+}
+
+static void locked_profiler_save_profile_to_file(PSLockRef aLock,
+                                                 const char* aFilename,
+                                                 bool aIsShuttingDown = false) {
+  LOG("locked_profiler_save_profile_to_file(%s)", aFilename);
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+  std::ofstream stream;
+  stream.open(aFilename);
+  if (stream.is_open()) {
+    SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream));
+    w.Start();
+    {
+      locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0,
+                                                   aIsShuttingDown);
+
+      w.StartArrayProperty("processes");
+      Vector<std::string> exitProfiles = ActivePS::MoveExitProfiles(aLock);
+      for (auto& exitProfile : exitProfiles) {
+        if (!exitProfile.empty()) {
+          w.Splice(exitProfile);
+        }
+      }
+      w.EndArray();
+    }
+    w.End();
+
+    stream.close();
+  }
+}
+
+void profiler_save_profile_to_file(const char* aFilename) {
+  LOG("profiler_save_profile_to_file(%s)", aFilename);
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return;
+  }
+
+  locked_profiler_save_profile_to_file(lock, aFilename);
+}
+
+uint32_t profiler_get_available_features() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+  return AvailableFeatures();
+}
+
+Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return Nothing();
+  }
+
+  return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
+}
+
+// This basically duplicates AutoProfilerLabel's constructor.
+static void* MozGlueBaseLabelEnter(const char* aLabel,
+                                   const char* aDynamicString, void* aSp) {
+  ProfilingStack* profilingStack = AutoProfilerLabel::sProfilingStack.get();
+  if (profilingStack) {
+    profilingStack->pushLabelFrame(aLabel, aDynamicString, aSp,
+                                   ProfilingCategoryPair::OTHER);
+  }
+  return profilingStack;
+}
+
+// This basically duplicates AutoProfilerLabel's destructor.
+static void MozGlueBaseLabelExit(void* sProfilingStack) {
+  if (sProfilingStack) {
+    reinterpret_cast<ProfilingStack*>(sProfilingStack)->pop();
+  }
+}
+
+static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
+                                  double aInterval, uint32_t aFeatures,
+                                  const char** aFilters, uint32_t aFilterCount,
+                                  const Maybe<double>& aDuration) {
+  if (LOG_TEST) {
+    LOG("locked_profiler_start");
+    LOG("- capacity  = %d", int(aCapacity.Value()));
+    LOG("- duration  = %.2f", aDuration ? *aDuration : -1);
+    LOG("- interval = %.2f", aInterval);
+
+#define LOG_FEATURE(n_, str_, Name_, desc_)     \
+  if (ProfilerFeature::Has##Name_(aFeatures)) { \
+    LOG("- feature  = %s", str_);               \
+  }
+
+    BASE_PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
+
+#undef LOG_FEATURE
+
+    for (uint32_t i = 0; i < aFilterCount; i++) {
+      LOG("- threads  = %s", aFilters[i]);
+    }
+  }
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
+
+#if defined(GP_PLAT_amd64_windows)
+  InitializeWin64ProfilerHooks();
+#endif
+
+  // Fall back to the default values if the passed-in values are unreasonable.
+  // We want to be able to store at least one full stack.
+  // TODO: Review magic numbers.
+  PowerOfTwo32 capacity =
+      (aCapacity.Value() >=
+       ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry)
+          ? aCapacity
+          : BASE_PROFILER_DEFAULT_ENTRIES;
+  Maybe<double> duration = aDuration;
+
+  if (aDuration && *aDuration <= 0) {
+    duration = Nothing();
+  }
+  double interval = aInterval > 0 ? aInterval : BASE_PROFILER_DEFAULT_INTERVAL;
+
+  ActivePS::Create(aLock, capacity, interval, aFeatures, aFilters, aFilterCount,
+                   duration);
+
+  // Set up profiling for each registered thread, if appropriate.
+  const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+      CorePS::RegisteredThreads(aLock);
+  for (auto& registeredThread : registeredThreads) {
+    RefPtr<ThreadInfo> info = registeredThread->Info();
+
+    if (ActivePS::ShouldProfileThread(aLock, info)) {
+      registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
+      ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
+                                      MakeUnique<ProfiledThreadData>(info));
+      registeredThread->RacyRegisteredThread().ReinitializeOnResume();
+    }
+  }
+
+  // Setup support for pushing/popping labels in mozglue.
+  RegisterProfilerLabelEnterExit(MozGlueBaseLabelEnter, MozGlueBaseLabelExit);
+
+  // At the very end, set up RacyFeatures.
+  RacyFeatures::SetActive(ActivePS::Features(aLock));
+}
+
+void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
+                    uint32_t aFeatures, const char** aFilters,
+                    uint32_t aFilterCount, const Maybe<double>& aDuration) {
+  LOG("profiler_start");
+
+  SamplerThread* samplerThread = nullptr;
+  {
+    PSAutoLock lock;
+
+    // Initialize if necessary.
+    if (!CorePS::Exists()) {
+      profiler_init(nullptr);
+    }
+
+    // Reset the current state if the profiler is running.
+    if (ActivePS::Exists(lock)) {
+      samplerThread = locked_profiler_stop(lock);
+    }
+
+    locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+                          aFilterCount, aDuration);
+  }
+
+  // TODO: Install memory counter if it is possible from mozglue.
+  // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  //   // start counting memory allocations (outside of lock because this may
+  //   call
+  //   // profiler_add_sampled_counter which would attempt to take the lock.)
+  //   mozilla::profiler::install_memory_counter(true);
+  // #endif
+
+  // We do these operations with gPSMutex unlocked. The comments in
+  // profiler_stop() explain why.
+  if (samplerThread) {
+    delete samplerThread;
+  }
+}
+
+void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
+                             uint32_t aFeatures, const char** aFilters,
+                             uint32_t aFilterCount,
+                             const Maybe<double>& aDuration) {
+  LOG("profiler_ensure_started");
+
+  // bool startedProfiler = false; (See TODO below)
+  SamplerThread* samplerThread = nullptr;
+  {
+    PSAutoLock lock;
+
+    // Initialize if necessary.
+    if (!CorePS::Exists()) {
+      profiler_init(nullptr);
+    }
+
+    if (ActivePS::Exists(lock)) {
+      // The profiler is active.
+      if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
+                            aFilters, aFilterCount)) {
+        // Stop and restart with different settings.
+        samplerThread = locked_profiler_stop(lock);
+        locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+                              aFilterCount, aDuration);
+        // startedProfiler = true; (See TODO below)
+      }
+    } else {
+      // The profiler is stopped.
+      locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
+                            aFilterCount, aDuration);
+      // startedProfiler = true; (See TODO below)
+    }
+  }
+
+  // TODO: Install memory counter if it is possible from mozglue.
+  // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  //   // start counting memory allocations (outside of lock because this may
+  //   // call profiler_add_sampled_counter which would attempt to take the
+  //   // lock.)
+  //   mozilla::profiler::install_memory_counter(true);
+  // #endif
+
+  // We do these operations with gPSMutex unlocked. The comments in
+  // profiler_stop() explain why.
+  if (samplerThread) {
+    delete samplerThread;
+  }
+}
+
+[[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
+  LOG("locked_profiler_stop");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
+
+  // At the very start, clear RacyFeatures.
+  RacyFeatures::SetInactive();
+
+  // TODO: Uninstall memory counter if it is possible from mozglue.
+  // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
+  //   mozilla::profiler::install_memory_counter(false);
+  // #endif
+
+  // Remove support for pushing/popping labels in mozglue.
+  RegisterProfilerLabelEnterExit(nullptr, nullptr);
+
+  // Stop sampling live threads.
+  const Vector<LiveProfiledThreadData>& liveProfiledThreads =
+      ActivePS::LiveProfiledThreads(aLock);
+  for (auto& thread : liveProfiledThreads) {
+    RegisteredThread* registeredThread = thread.mRegisteredThread;
+    registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false);
+  }
+
+  // The Stop() call doesn't actually stop Run(); that happens in this
+  // function's caller when the sampler thread is destroyed. Stop() just gives
+  // the SamplerThread a chance to do some cleanup with gPSMutex locked.
+  SamplerThread* samplerThread = ActivePS::Destroy(aLock);
+  samplerThread->Stop(aLock);
+
+  return samplerThread;
+}
+
+void profiler_stop() {
+  LOG("profiler_stop");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  SamplerThread* samplerThread;
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return;
+    }
+
+    samplerThread = locked_profiler_stop(lock);
+  }
+
+  // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
+  // would be waiting here with gPSMutex locked for SamplerThread::Run() to
+  // return so the join operation within the destructor can complete, but Run()
+  // needs to lock gPSMutex to return.
+  //
+  // Because this call occurs with gPSMutex unlocked, it -- including the final
+  // iteration of Run()'s loop -- must be able detect deactivation and return
+  // in a way that's safe with respect to other gPSMutex-locking operations
+  // that may have occurred in the meantime.
+  delete samplerThread;
+}
+
+bool profiler_is_paused() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return false;
+  }
+
+  return ActivePS::IsPaused(lock);
+}
+
+void profiler_pause() {
+  LOG("profiler_pause");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return;
+    }
+
+    RacyFeatures::SetPaused();
+    ActivePS::SetIsPaused(lock, true);
+    ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
+  }
+}
+
+void profiler_resume() {
+  LOG("profiler_resume");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return;
+    }
+
+    ActivePS::Buffer(lock).AddEntry(
+        ProfileBufferEntry::Resume(profiler_time()));
+    ActivePS::SetIsPaused(lock, false);
+    RacyFeatures::SetUnpaused();
+  }
+}
+
+bool profiler_is_sampling_paused() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return false;
+  }
+
+  return ActivePS::IsSamplingPaused(lock);
+}
+
+void profiler_pause_sampling() {
+  LOG("profiler_pause_sampling");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return;
+    }
+
+    RacyFeatures::SetSamplingPaused();
+    ActivePS::SetIsSamplingPaused(lock, true);
+    ActivePS::Buffer(lock).AddEntry(
+        ProfileBufferEntry::PauseSampling(profiler_time()));
+  }
+}
+
+void profiler_resume_sampling() {
+  LOG("profiler_resume_sampling");
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  {
+    PSAutoLock lock;
+
+    if (!ActivePS::Exists(lock)) {
+      return;
+    }
+
+    ActivePS::Buffer(lock).AddEntry(
+        ProfileBufferEntry::ResumeSampling(profiler_time()));
+    ActivePS::SetIsSamplingPaused(lock, false);
+    RacyFeatures::SetSamplingUnpaused();
+  }
+}
+
+bool profiler_feature_active(uint32_t aFeature) {
+  // This function runs both on and off the main thread.
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  // This function is hot enough that we use RacyFeatures, not ActivePS.
+  return RacyFeatures::IsActiveWithFeature(aFeature);
+}
+
+void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
+  DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
+  PSAutoLock lock;
+  CorePS::AppendCounter(lock, aCounter);
+}
+
+void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
+  DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
+  PSAutoLock lock;
+  // Note: we don't enforce a final sample, though we could do so if the
+  // profiler was active
+  CorePS::RemoveCounter(lock, aCounter);
+}
+
+ProfilingStack* profiler_register_thread(const char* aName,
+                                         void* aGuessStackTop) {
+  DEBUG_LOG("profiler_register_thread(%s)", aName);
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (RegisteredThread* thread = FindCurrentThreadRegisteredThread(lock);
+      thread) {
+    LOG("profiler_register_thread(%s) - thread %d already registered as %s",
+        aName, profiler_current_thread_id(), thread->Info()->Name());
+    // TODO: Use new name. This is currently not possible because the
+    // RegisteredThread's ThreadInfo cannot be changed.
+    // In the meantime, we record a marker that could be used in the frontend.
+    std::string text("Thread ");
+    text += std::to_string(profiler_current_thread_id());
+    text += " \"";
+    text += thread->Info()->Name();
+    text += "\" attempted to re-register as \"";
+    text += aName;
+    text += "\"";
+    BASE_PROFILER_MARKER_TEXT("profiler_register_thread again", OTHER_Profiling,
+                              MarkerThreadId::MainThread(), text);
+
+    return &thread->RacyRegisteredThread().ProfilingStack();
+  }
+
+  void* stackTop = GetStackTop(aGuessStackTop);
+  return locked_register_thread(lock, aName, stackTop);
+}
+
+void profiler_unregister_thread() {
+  if (!CorePS::Exists()) {
+    // This function can be called after the main thread has already shut down.
+    return;
+  }
+
+  PSAutoLock lock;
+
+  RegisteredThread* registeredThread = FindCurrentThreadRegisteredThread(lock);
+  MOZ_RELEASE_ASSERT(registeredThread ==
+                     TLSRegisteredThread::RegisteredThread(lock));
+  if (registeredThread) {
+    RefPtr<ThreadInfo> info = registeredThread->Info();
+
+    DEBUG_LOG("profiler_unregister_thread: %s", info->Name());
+
+    if (ActivePS::Exists(lock)) {
+      ActivePS::UnregisterThread(lock, registeredThread);
+    }
+
+    // Clear the pointer to the RegisteredThread object that we're about to
+    // destroy.
+    TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
+
+    // Remove the thread from the list of registered threads. This deletes the
+    // registeredThread object.
+    CorePS::RemoveRegisteredThread(lock, registeredThread);
+  } else {
+    LOG("profiler_unregister_thread() - thread %d already unregistered",
+        profiler_current_thread_id());
+    // We cannot record a marker on this thread because it was already
+    // unregistered. Send it to the main thread (unless this *is* already the
+    // main thread, which has been unregistered); this may be useful to catch
+    // mismatched register/unregister pairs in Firefox.
+    if (int tid = profiler_current_thread_id();
+        tid != profiler_main_thread_id()) {
+      BASE_PROFILER_MARKER_TEXT("profiler_unregister_thread again",
+                                OTHER_Profiling, MarkerThreadId::MainThread(),
+                                std::to_string(profiler_current_thread_id()));
+    }
+    // There are two ways FindCurrentThreadRegisteredThread() might have failed.
+    //
+    // - TLSRegisteredThread::Init() failed in locked_register_thread().
+    //
+    // - We've already called profiler_unregister_thread() for this thread.
+    //   (Whether or not it should, this does happen in practice.)
+    //
+    // Either way, TLSRegisteredThread should be empty.
+    MOZ_RELEASE_ASSERT(!TLSRegisteredThread::RegisteredThread(lock));
+  }
+}
+
+void profiler_register_page(uint64_t aBrowsingContextID,
+                            uint64_t aInnerWindowID, const std::string& aUrl,
+                            uint64_t aEmbedderInnerWindowID) {
+  DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")",
+            aBrowsingContextID, aInnerWindowID, aUrl.c_str(),
+            aEmbedderInnerWindowID);
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  // When a Browsing context is first loaded, the first url loaded in it will be
+  // about:blank. Because of that, this call keeps the first non-about:blank
+  // registration of window and discards the previous one.
+  RefPtr<PageInformation> pageInfo = new PageInformation(
+      aBrowsingContextID, aInnerWindowID, aUrl, aEmbedderInnerWindowID);
+  CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
+
+  // After appending the given page to CorePS, look for the expired
+  // pages and remove them if there are any.
+  if (ActivePS::Exists(lock)) {
+    ActivePS::DiscardExpiredPages(lock);
+  }
+}
+
+void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
+  if (!CorePS::Exists()) {
+    // This function can be called after the main thread has already shut down.
+    return;
+  }
+
+  PSAutoLock lock;
+
+  // During unregistration, if the profiler is active, we have to keep the
+  // page information since there may be some markers associated with the given
+  // page. But if profiler is not active. we have no reason to keep the
+  // page information here because there can't be any marker associated with it.
+  if (ActivePS::Exists(lock)) {
+    ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
+  } else {
+    CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
+  }
+}
+
+void profiler_clear_all_pages() {
+  if (!CorePS::Exists()) {
+    // This function can be called after the main thread has already shut down.
+    return;
+  }
+
+  {
+    PSAutoLock lock;
+    CorePS::ClearRegisteredPages(lock);
+    if (ActivePS::Exists(lock)) {
+      ActivePS::ClearUnregisteredPages(lock);
+    }
+  }
+}
+
+void profiler_thread_sleep() {
+  // This function runs both on and off the main thread.
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  RacyRegisteredThread* racyRegisteredThread =
+      TLSRegisteredThread::RacyRegisteredThread();
+  if (!racyRegisteredThread) {
+    return;
+  }
+
+  racyRegisteredThread->SetSleeping();
+}
+
+void profiler_thread_wake() {
+  // This function runs both on and off the main thread.
+
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  RacyRegisteredThread* racyRegisteredThread =
+      TLSRegisteredThread::RacyRegisteredThread();
+  if (!racyRegisteredThread) {
+    return;
+  }
+
+  racyRegisteredThread->SetAwake();
+}
+
+bool detail::IsThreadBeingProfiled() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  const RacyRegisteredThread* racyRegisteredThread =
+      TLSRegisteredThread::RacyRegisteredThread();
+  return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled();
+}
+
+bool profiler_thread_is_sleeping() {
+  MOZ_RELEASE_ASSERT(profiler_is_main_thread());
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  RacyRegisteredThread* racyRegisteredThread =
+      TLSRegisteredThread::RacyRegisteredThread();
+  if (!racyRegisteredThread) {
+    return false;
+  }
+  return racyRegisteredThread->IsSleeping();
+}
+
+double profiler_time() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
+  return delta.ToMilliseconds();
+}
+
+bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer) {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  PSAutoLock lock;
+
+  if (!ActivePS::Exists(lock)) {
+    return false;
+  }
+
+  RegisteredThread* registeredThread =
+      TLSRegisteredThread::RegisteredThread(lock);
+  if (!registeredThread) {
+    MOZ_ASSERT(registeredThread);
+    return false;
+  }
+
+  ProfileBuffer profileBuffer(aChunkedBuffer);
+
+  Registers regs;
+#if defined(HAVE_NATIVE_UNWIND)
+  regs.SyncPopulate();
+#else
+  regs.Clear();
+#endif
+
+  DoSyncSample(lock, *registeredThread, TimeStamp::NowUnfuzzed(), regs,
+               profileBuffer);
+
+  return true;
+}
+
+UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
+  MOZ_RELEASE_ASSERT(CorePS::Exists());
+
+  // Quick is-active check before allocating a buffer.
+  if (!profiler_is_active()) {
+    return nullptr;
+  }
+
+  auto buffer = MakeUnique<ProfileChunkedBuffer>(
+      ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+      MakeUnique<ProfileBufferChunkManagerSingle>(
+          ProfileBufferChunkManager::scExpectedMaximumStackSize));
+
+  if (!profiler_capture_backtrace_into(*buffer)) {
+    return nullptr;
+  }
+
+  return buffer;
+}
+
+UniqueProfilerBacktrace profiler_get_backtrace() {
+  UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace();
+
+  if (!buffer) {
+    return nullptr;
+  }
+
+  return UniqueProfilerBacktrace(
+      new ProfilerBacktrace("SyncProfile", std::move(buffer)));
+}
+
+void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
+  delete aBacktrace;
+}
+
+bool profiler_is_locked_on_current_thread() {
+  // This function is used to help users avoid calling `profiler_...` functions
+  // when the profiler may already have a lock in place, which would prevent a
+  // 2nd recursive lock (resulting in a crash or a never-ending wait).
+  // So we must return `true` for any of:
+  // - The main profiler mutex, used by most functions, and/or
+  // - The buffer mutex, used directly in some functions without locking the
+  //   main mutex, e.g., marker-related functions.
+  return PSAutoLock::IsLockedOnCurrentThread() ||
+         CorePS::CoreBuffer().IsThreadSafeAndLockedOnCurrentThread();
+}
+
+// This is a simplified version of profiler_add_marker that can be easily passed
+// into the JS engine.
+void profiler_add_js_marker(const char* aMarkerName, const char* aMarkerText) {
+  BASE_PROFILER_MARKER_TEXT(
+      ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, {},
+      ProfilerString8View::WrapNullTerminatedString(aMarkerText));
+}
+
+// NOTE: aCollector's methods will be called while the target thread is paused.
+// Doing things in those methods like allocating -- which may try to claim
+// locks -- is a surefire way to deadlock.
+void profiler_suspend_and_sample_thread(int aThreadId, uint32_t aFeatures,
+                                        ProfilerStackCollector& aCollector,
+                                        bool aSampleNative /* = true */) {
+  // Lock the profiler mutex
+  PSAutoLock lock;
+
+  const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
+      CorePS::RegisteredThreads(lock);
+  for (auto& thread : registeredThreads) {
+    RefPtr<ThreadInfo> info = thread->Info();
+    RegisteredThread& registeredThread = *thread.get();
+
+    if (info->ThreadId() == aThreadId) {
+      if (info->IsMainThread()) {
+        aCollector.SetIsMainThread();
+      }
+
+      // Allocate the space for the native stack
+      NativeStack nativeStack;
+
+      // Suspend, sample, and then resume the target thread.
+      Sampler sampler(lock);
+      TimeStamp now = TimeStamp::NowUnfuzzed();
+      sampler.SuspendAndSampleAndResumeThread(
+          lock, registeredThread, now,
+          [&](const Registers& aRegs, const TimeStamp& aNow) {
+            // The target thread is now suspended. Collect a native
+            // backtrace, and call the callback.
+            bool isSynchronous = false;
+#if defined(HAVE_FASTINIT_NATIVE_UNWIND)
+            if (aSampleNative) {
+          // We can only use FramePointerStackWalk or MozStackWalk from
+          // suspend_and_sample_thread as other stackwalking methods may not be
+          // initialized.
+#  if defined(USE_FRAME_POINTER_STACK_WALK)
+              DoFramePointerBacktrace(lock, registeredThread, aRegs,
+                                      nativeStack);
+#  elif defined(USE_MOZ_STACK_WALK)
+              DoMozStackWalkBacktrace(lock, registeredThread, aRegs,
+                                      nativeStack);
+#  else
+#    error "Invalid configuration"
+#  endif
+
+              MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
+                          nativeStack, aCollector);
+            } else
+#endif
+            {
+              MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
+                          nativeStack, aCollector);
+
+              if (ProfilerFeature::HasLeaf(aFeatures)) {
+                aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
+              }
+            }
+          });
+
+      // NOTE: Make sure to disable the sampler before it is destroyed, in case
+      // the profiler is running at the same time.
+      sampler.Disable(lock);
+      break;
+    }
+  }
+}
+
+// END externally visible functions
+////////////////////////////////////////////////////////////////////////
+
+}  // namespace baseprofiler
+}  // namespace mozilla
diff --git a/mozglue/baseprofiler/core/platform.h b/mozglue/baseprofiler/core/platform.h
new file mode 100644
index 0000000000..1913a0def6
--- /dev/null
+++ b/mozglue/baseprofiler/core/platform.h
@@ -0,0 +1,132 @@
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#ifndef TOOLS_PLATFORM_H_
+#define TOOLS_PLATFORM_H_
+
+#include "PlatformMacros.h"
+
+#include "BaseProfiler.h"
+
+#include "mozilla/Logging.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+
+#include <functional>
+#include <stdint.h>
+#include <string>
+
+namespace mozilla {
+namespace baseprofiler {
+bool LogTest(int aLevelToTest);
+void PrintToConsole(const char* aFmt, ...) MOZ_FORMAT_PRINTF(1, 2);
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+// These are for MOZ_BASE_PROFILER_LOGGING and above. It's the default logging
+// level for the profiler, and should be used sparingly.
+#define LOG_TEST ::mozilla::baseprofiler::LogTest(3)
+#define LOG(arg, ...)                                           \
+  do {                                                          \
+    if (LOG_TEST) {                                             \
+      ::mozilla::baseprofiler::PrintToConsole(                  \
+          "[I %d/%d] " arg "\n", profiler_current_process_id(), \
+          profiler_current_thread_id(), ##__VA_ARGS__);         \
+    }                                                           \
+  } while (0)
+
+// These are for MOZ_BASE_PROFILER_DEBUG_LOGGING. It should be used for logging
+// that is somewhat more verbose than LOG.
+#define DEBUG_LOG_TEST ::mozilla::baseprofiler::LogTest(4)
+#define DEBUG_LOG(arg, ...)                                     \
+  do {                                                          \
+    if (DEBUG_LOG_TEST) {                                       \
+      ::mozilla::baseprofiler::PrintToConsole(                  \
+          "[D %d/%d] " arg "\n", profiler_current_process_id(), \
+          profiler_current_thread_id(), ##__VA_ARGS__);         \
+    }                                                           \
+  } while (0)
+
+// These are for MOZ_BASE_PROFILER_VERBOSE_LOGGING. It should be used for
+// logging that is somewhat more verbose than DEBUG_LOG.
+#define VERBOSE_LOG_TEST ::mozilla::baseprofiler::LogTest(5)
+#define VERBOSE_LOG(arg, ...)                                   \
+  do {                                                          \
+    if (VERBOSE_LOG_TEST) {                                     \
+      ::mozilla::baseprofiler::PrintToConsole(                  \
+          "[V %d/%d] " arg "\n", profiler_current_process_id(), \
+          profiler_current_thread_id(), ##__VA_ARGS__);         \
+    }                                                           \
+  } while (0)
+
+namespace mozilla {
+
+class JSONWriter;
+
+namespace baseprofiler {
+
+typedef uint8_t* Address;
+
+class PlatformData;
+
+// We can't new/delete the type safely without defining it
+// (-Wdelete-incomplete).  Use these to hide the details from clients.
+struct PlatformDataDestructor {
+  void operator()(PlatformData*);
+};
+
+typedef UniquePtr<PlatformData, PlatformDataDestructor> UniquePlatformData;
+UniquePlatformData AllocPlatformData(int aThreadId);
+
+// Convert the array of strings to a bitfield.
+uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
+                                      uint32_t aFeatureCount,
+                                      bool aIsStartup = false);
+
+void profiler_get_profile_json_into_lazily_allocated_buffer(
+    const std::function<char*(size_t)>& aAllocator, double aSinceTime,
+    bool aIsShuttingDown);
+
+// Flags to conveniently track various JS instrumentations.
+enum class JSInstrumentationFlags {
+  StackSampling = 0x1,
+  TraceLogging = 0x2,
+  Allocations = 0x4,
+};
+
+// Record an exit profile from a child process.
+void profiler_received_exit_profile(const std::string& aExitProfile);
+
+// Extract all received exit profiles that have not yet expired (i.e., they
+// still intersect with this process' buffer range).
+Vector<std::string> profiler_move_exit_profiles();
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif /* ndef TOOLS_PLATFORM_H_ */
diff --git a/mozglue/baseprofiler/core/shared-libraries-linux.cc b/mozglue/baseprofiler/core/shared-libraries-linux.cc
new file mode 100644
index 0000000000..c38e72378a
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-linux.cc
@@ -0,0 +1,835 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilerSharedLibraries.h"
+
+#define PATH_MAX_TOSTRING(x) #x
+#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x)
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <fstream>
+#include "platform.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/Unused.h"
+
+#include <algorithm>
+#include <arpa/inet.h>
+#include <dlfcn.h>
+#include <elf.h>
+#include <fcntl.h>
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+#  include <features.h>
+#endif
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <vector>
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+#  include <link.h>  // dl_phdr_info, ElfW()
+#else
+#  error "Unexpected configuration"
+#endif
+
+#if defined(GP_OS_android)
+extern "C" MOZ_EXPORT __attribute__((weak)) int dl_iterate_phdr(
+    int (*callback)(struct dl_phdr_info* info, size_t size, void* data),
+    void* data);
+#endif
+
+#if defined(GP_OS_freebsd) && !defined(ElfW)
+#  define ElfW(type) Elf_##type
+#endif
+
+// ----------------------------------------------------------------------------
+// Starting imports from toolkit/crashreporter/google-breakpad/, as needed by
+// this file when moved to mozglue.
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/memory_range.h.
+// A lightweight wrapper with a pointer and a length to encapsulate a contiguous
+// range of memory. It provides helper methods for checked access of a subrange
+// of the memory. Its implemementation does not allocate memory or call into
+// libc functions, and is thus safer to use in a crashed environment.
+class MemoryRange {
+ public:
+  MemoryRange() : data_(NULL), length_(0) {}
+
+  MemoryRange(const void* data, size_t length) { Set(data, length); }
+
+  // Returns true if this memory range contains no data.
+  bool IsEmpty() const {
+    // Set() guarantees that |length_| is zero if |data_| is NULL.
+    return length_ == 0;
+  }
+
+  // Resets to an empty range.
+  void Reset() {
+    data_ = NULL;
+    length_ = 0;
+  }
+
+  // Sets this memory range to point to |data| and its length to |length|.
+  void Set(const void* data, size_t length) {
+    data_ = reinterpret_cast<const uint8_t*>(data);
+    // Always set |length_| to zero if |data_| is NULL.
+    length_ = data ? length : 0;
+  }
+
+  // Returns true if this range covers a subrange of |sub_length| bytes
+  // at |sub_offset| bytes of this memory range, or false otherwise.
+  bool Covers(size_t sub_offset, size_t sub_length) const {
+    // The following checks verify that:
+    // 1. sub_offset is within [ 0 .. length_ - 1 ]
+    // 2. sub_offset + sub_length is within
+    //    [ sub_offset .. length_ ]
+    return sub_offset < length_ && sub_offset + sub_length >= sub_offset &&
+           sub_offset + sub_length <= length_;
+  }
+
+  // Returns a raw data pointer to a subrange of |sub_length| bytes at
+  // |sub_offset| bytes of this memory range, or NULL if the subrange
+  // is out of bounds.
+  const void* GetData(size_t sub_offset, size_t sub_length) const {
+    return Covers(sub_offset, sub_length) ? (data_ + sub_offset) : NULL;
+  }
+
+  // Same as the two-argument version of GetData() but uses sizeof(DataType)
+  // as the subrange length and returns an |DataType| pointer for convenience.
+  template <typename DataType>
+  const DataType* GetData(size_t sub_offset) const {
+    return reinterpret_cast<const DataType*>(
+        GetData(sub_offset, sizeof(DataType)));
+  }
+
+  // Returns a raw pointer to the |element_index|-th element of an array
+  // of elements of length |element_size| starting at |sub_offset| bytes
+  // of this memory range, or NULL if the element is out of bounds.
+  const void* GetArrayElement(size_t element_offset, size_t element_size,
+                              unsigned element_index) const {
+    size_t sub_offset = element_offset + element_index * element_size;
+    return GetData(sub_offset, element_size);
+  }
+
+  // Same as the three-argument version of GetArrayElement() but deduces
+  // the element size using sizeof(ElementType) and returns an |ElementType|
+  // pointer for convenience.
+  template <typename ElementType>
+  const ElementType* GetArrayElement(size_t element_offset,
+                                     unsigned element_index) const {
+    return reinterpret_cast<const ElementType*>(
+        GetArrayElement(element_offset, sizeof(ElementType), element_index));
+  }
+
+  // Returns a subrange of |sub_length| bytes at |sub_offset| bytes of
+  // this memory range, or an empty range if the subrange is out of bounds.
+  MemoryRange Subrange(size_t sub_offset, size_t sub_length) const {
+    return Covers(sub_offset, sub_length)
+               ? MemoryRange(data_ + sub_offset, sub_length)
+               : MemoryRange();
+  }
+
+  // Returns a pointer to the beginning of this memory range.
+  const uint8_t* data() const { return data_; }
+
+  // Returns the length, in bytes, of this memory range.
+  size_t length() const { return length_; }
+
+ private:
+  // Pointer to the beginning of this memory range.
+  const uint8_t* data_;
+
+  // Length, in bytes, of this memory range.
+  size_t length_;
+};
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/linux/memory_mapped_file.h
+// and inlined .cc.
+// A utility class for mapping a file into memory for read-only access of the
+// file content. Its implementation avoids calling into libc functions by
+// directly making system calls for open, close, mmap, and munmap.
+class MemoryMappedFile {
+ public:
+  MemoryMappedFile() {}
+
+  // Constructor that calls Map() to map a file at |path| into memory.
+  // If Map() fails, the object behaves as if it is default constructed.
+  MemoryMappedFile(const char* path, size_t offset) { Map(path, offset); }
+
+  MemoryMappedFile(const MemoryMappedFile&) = delete;
+  MemoryMappedFile& operator=(const MemoryMappedFile&) = delete;
+
+  ~MemoryMappedFile() {}
+
+  // Maps a file at |path| into memory, which can then be accessed via
+  // content() as a MemoryRange object or via data(), and returns true on
+  // success. Mapping an empty file will succeed but with data() and size()
+  // returning NULL and 0, respectively. An existing mapping is unmapped
+  // before a new mapping is created.
+  bool Map(const char* path, size_t offset) {
+    Unmap();
+
+    int fd = open(path, O_RDONLY, 0);
+    if (fd == -1) {
+      return false;
+    }
+
+#if defined(__x86_64__) || defined(__aarch64__) || \
+    (defined(__mips__) && _MIPS_SIM == _ABI64) ||  \
+    !(defined(GP_OS_linux) || defined(GP_OS_android))
+
+    struct stat st;
+    if (fstat(fd, &st) == -1 || st.st_size < 0) {
+#else
+    struct stat64 st;
+    if (fstat64(fd, &st) == -1 || st.st_size < 0) {
+#endif
+      close(fd);
+      return false;
+    }
+
+    // Strangely file size can be negative, but we check above that it is not.
+    size_t file_len = static_cast<size_t>(st.st_size);
+    // If the file does not extend beyond the offset, simply use an empty
+    // MemoryRange and return true. Don't bother to call mmap()
+    // even though mmap() can handle an empty file on some platforms.
+    if (offset >= file_len) {
+      close(fd);
+      return true;
+    }
+
+    void* data = mmap(NULL, file_len, PROT_READ, MAP_PRIVATE, fd, offset);
+    close(fd);
+    if (data == MAP_FAILED) {
+      return false;
+    }
+
+    content_.Set(data, file_len - offset);
+    return true;
+  }
+
+  // Unmaps the memory for the mapped file. It's a no-op if no file is
+  // mapped.
+  void Unmap() {
+    if (content_.data()) {
+      munmap(const_cast<uint8_t*>(content_.data()), content_.length());
+      content_.Set(NULL, 0);
+    }
+  }
+
+  // Returns a MemoryRange object that covers the memory for the mapped
+  // file. The MemoryRange object is empty if no file is mapped.
+  const MemoryRange& content() const { return content_; }
+
+  // Returns a pointer to the beginning of the memory for the mapped file.
+  // or NULL if no file is mapped or the mapped file is empty.
+  const void* data() const { return content_.data(); }
+
+  // Returns the size in bytes of the mapped file, or zero if no file
+  // is mapped.
+  size_t size() const { return content_.length(); }
+
+ private:
+  // Mapped file content as a MemoryRange object.
+  MemoryRange content_;
+};
+
+// Imported from
+// toolkit/crashreporter/google-breakpad/src/common/linux/file_id.h and inlined
+// .cc.
+// GNU binutils' ld defaults to 'sha1', which is 160 bits == 20 bytes,
+// so this is enough to fit that, which most binaries will use.
+// This is just a sensible default for vectors so most callers can get away with
+// stack allocation.
+static const size_t kDefaultBuildIdSize = 20;
+
+// Used in a few places for backwards-compatibility.
+typedef struct {
+  uint32_t data1;
+  uint16_t data2;
+  uint16_t data3;
+  uint8_t data4[8];
+} MDGUID; /* GUID */
+
+const size_t kMDGUIDSize = sizeof(MDGUID);
+
+class FileID {
+ public:
+  explicit FileID(const char* path) : path_(path) {}
+  ~FileID() {}
+
+  // Load the identifier for the elf file path specified in the constructor into
+  // |identifier|.
+  //
+  // The current implementation will look for a .note.gnu.build-id
+  // section and use that as the file id, otherwise it falls back to
+  // XORing the first 4096 bytes of the .text section to generate an identifier.
+  bool ElfFileIdentifier(std::vector<uint8_t>& identifier) {
+    MemoryMappedFile mapped_file(path_.c_str(), 0);
+    if (!mapped_file.data())  // Should probably check if size >= ElfW(Ehdr)?
+      return false;
+
+    return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
+  }
+
+  // Traits classes so consumers can write templatized code to deal
+  // with specific ELF bits.
+  struct ElfClass32 {
+    typedef Elf32_Addr Addr;
+    typedef Elf32_Ehdr Ehdr;
+    typedef Elf32_Nhdr Nhdr;
+    typedef Elf32_Phdr Phdr;
+    typedef Elf32_Shdr Shdr;
+    typedef Elf32_Half Half;
+    typedef Elf32_Off Off;
+    typedef Elf32_Sym Sym;
+    typedef Elf32_Word Word;
+
+    static const int kClass = ELFCLASS32;
+    static const uint16_t kMachine = EM_386;
+    static const size_t kAddrSize = sizeof(Elf32_Addr);
+    static constexpr const char* kMachineName = "x86";
+  };
+
+  struct ElfClass64 {
+    typedef Elf64_Addr Addr;
+    typedef Elf64_Ehdr Ehdr;
+    typedef Elf64_Nhdr Nhdr;
+    typedef Elf64_Phdr Phdr;
+    typedef Elf64_Shdr Shdr;
+    typedef Elf64_Half Half;
+    typedef Elf64_Off Off;
+    typedef Elf64_Sym Sym;
+    typedef Elf64_Word Word;
+
+    static const int kClass = ELFCLASS64;
+    static const uint16_t kMachine = EM_X86_64;
+    static const size_t kAddrSize = sizeof(Elf64_Addr);
+    static constexpr const char* kMachineName = "x86_64";
+  };
+
+  // Internal helper method, exposed for convenience for callers
+  // that already have more info.
+  template <typename ElfClass>
+  static const typename ElfClass::Shdr* FindElfSectionByName(
+      const char* name, typename ElfClass::Word section_type,
+      const typename ElfClass::Shdr* sections, const char* section_names,
+      const char* names_end, int nsection) {
+    if (!name || !sections || nsection == 0) {
+      return NULL;
+    }
+
+    int name_len = strlen(name);
+    if (name_len == 0) return NULL;
+
+    for (int i = 0; i < nsection; ++i) {
+      const char* section_name = section_names + sections[i].sh_name;
+      if (sections[i].sh_type == section_type &&
+          names_end - section_name >= name_len + 1 &&
+          strcmp(name, section_name) == 0) {
+        return sections + i;
+      }
+    }
+    return NULL;
+  }
+
+  struct ElfSegment {
+    const void* start;
+    size_t size;
+  };
+
+  // Convert an offset from an Elf header into a pointer to the mapped
+  // address in the current process. Takes an extra template parameter
+  // to specify the return type to avoid having to dynamic_cast the
+  // result.
+  template <typename ElfClass, typename T>
+  static const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+                            typename ElfClass::Off offset) {
+    return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) +
+                                      offset);
+  }
+
+// ELF note name and desc are 32-bits word padded.
+#define NOTE_PADDING(a) ((a + 3) & ~3)
+
+  static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length,
+                                            std::vector<uint8_t>& identifier) {
+    static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr),
+                  "Elf32_Nhdr and Elf64_Nhdr should be the same");
+    typedef typename ElfClass32::Nhdr Nhdr;
+
+    const void* section_end = reinterpret_cast<const char*>(section) + length;
+    const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
+    while (reinterpret_cast<const void*>(note_header) < section_end) {
+      if (note_header->n_type == NT_GNU_BUILD_ID) break;
+      note_header = reinterpret_cast<const Nhdr*>(
+          reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
+          NOTE_PADDING(note_header->n_namesz) +
+          NOTE_PADDING(note_header->n_descsz));
+    }
+    if (reinterpret_cast<const void*>(note_header) >= section_end ||
+        note_header->n_descsz == 0) {
+      return false;
+    }
+
+    const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
+                              sizeof(Nhdr) +
+                              NOTE_PADDING(note_header->n_namesz);
+    identifier.insert(identifier.end(), build_id,
+                      build_id + note_header->n_descsz);
+
+    return true;
+  }
+
+  template <typename ElfClass>
+  static bool FindElfClassSection(const char* elf_base,
+                                  const char* section_name,
+                                  typename ElfClass::Word section_type,
+                                  const void** section_start,
+                                  size_t* section_size) {
+    typedef typename ElfClass::Ehdr Ehdr;
+    typedef typename ElfClass::Shdr Shdr;
+
+    if (!elf_base || !section_start || !section_size) {
+      return false;
+    }
+
+    if (strncmp(elf_base, ELFMAG, SELFMAG) != 0) {
+      return false;
+    }
+
+    const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+    if (elf_header->e_ident[EI_CLASS] != ElfClass::kClass) {
+      return false;
+    }
+
+    const Shdr* sections =
+        GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+    const Shdr* section_names = sections + elf_header->e_shstrndx;
+    const char* names =
+        GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+    const char* names_end = names + section_names->sh_size;
+
+    const Shdr* section =
+        FindElfSectionByName<ElfClass>(section_name, section_type, sections,
+                                       names, names_end, elf_header->e_shnum);
+
+    if (section != NULL && section->sh_size > 0) {
+      *section_start = elf_base + section->sh_offset;
+      *section_size = section->sh_size;
+    }
+
+    return true;
+  }
+
+  template <typename ElfClass>
+  static bool FindElfClassSegment(const char* elf_base,
+                                  typename ElfClass::Word segment_type,
+                                  std::vector<ElfSegment>* segments) {
+    typedef typename ElfClass::Ehdr Ehdr;
+    typedef typename ElfClass::Phdr Phdr;
+
+    if (!elf_base || !segments) {
+      return false;
+    }
+
+    if (strncmp(elf_base, ELFMAG, SELFMAG) != 0) {
+      return false;
+    }
+
+    const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+    if (elf_header->e_ident[EI_CLASS] != ElfClass::kClass) {
+      return false;
+    }
+
+    const Phdr* phdrs =
+        GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
+
+    for (int i = 0; i < elf_header->e_phnum; ++i) {
+      if (phdrs[i].p_type == segment_type) {
+        ElfSegment seg = {};
+        seg.start = elf_base + phdrs[i].p_offset;
+        seg.size = phdrs[i].p_filesz;
+        segments->push_back(seg);
+      }
+    }
+
+    return true;
+  }
+
+  static bool IsValidElf(const void* elf_base) {
+    return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) ==
+           0;
+  }
+
+  static int ElfClass(const void* elf_base) {
+    const ElfW(Ehdr)* elf_header =
+        reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
+
+    return elf_header->e_ident[EI_CLASS];
+  }
+
+  static bool FindElfSection(const void* elf_mapped_base,
+                             const char* section_name, uint32_t section_type,
+                             const void** section_start, size_t* section_size) {
+    if (!elf_mapped_base || !section_start || !section_size) {
+      return false;
+    }
+
+    *section_start = NULL;
+    *section_size = 0;
+
+    if (!IsValidElf(elf_mapped_base)) return false;
+
+    int cls = ElfClass(elf_mapped_base);
+    const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+    if (cls == ELFCLASS32) {
+      return FindElfClassSection<ElfClass32>(elf_base, section_name,
+                                             section_type, section_start,
+                                             section_size) &&
+             *section_start != NULL;
+    } else if (cls == ELFCLASS64) {
+      return FindElfClassSection<ElfClass64>(elf_base, section_name,
+                                             section_type, section_start,
+                                             section_size) &&
+             *section_start != NULL;
+    }
+
+    return false;
+  }
+
+  static bool FindElfSegments(const void* elf_mapped_base,
+                              uint32_t segment_type,
+                              std::vector<ElfSegment>* segments) {
+    if (!elf_mapped_base || !segments) {
+      return false;
+    }
+
+    if (!IsValidElf(elf_mapped_base)) return false;
+
+    int cls = ElfClass(elf_mapped_base);
+    const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+    if (cls == ELFCLASS32) {
+      return FindElfClassSegment<ElfClass32>(elf_base, segment_type, segments);
+    } else if (cls == ELFCLASS64) {
+      return FindElfClassSegment<ElfClass64>(elf_base, segment_type, segments);
+    }
+
+    return false;
+  }
+
+  // Attempt to locate a .note.gnu.build-id section in an ELF binary
+  // and copy it into |identifier|.
+  static bool FindElfBuildIDNote(const void* elf_mapped_base,
+                                 std::vector<uint8_t>& identifier) {
+    // lld normally creates 2 PT_NOTEs, gold normally creates 1.
+    std::vector<ElfSegment> segs;
+    if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
+      for (ElfSegment& seg : segs) {
+        if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
+          return true;
+        }
+      }
+    }
+
+    void* note_section;
+    size_t note_size;
+    if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
+                       (const void**)&note_section, &note_size)) {
+      return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
+    }
+
+    return false;
+  }
+
+  // Attempt to locate the .text section of an ELF binary and generate
+  // a simple hash by XORing the first page worth of bytes into |identifier|.
+  static bool HashElfTextSection(const void* elf_mapped_base,
+                                 std::vector<uint8_t>& identifier) {
+    identifier.resize(kMDGUIDSize);
+
+    void* text_section;
+    size_t text_size;
+    if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
+                        (const void**)&text_section, &text_size) ||
+        text_size == 0) {
+      return false;
+    }
+
+    // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
+    // function backwards-compatible.
+    memset(&identifier[0], 0, kMDGUIDSize);
+    const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
+    const uint8_t* ptr_end =
+        ptr + std::min(text_size, static_cast<size_t>(4096));
+    while (ptr < ptr_end) {
+      for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
+      ptr += kMDGUIDSize;
+    }
+    return true;
+  }
+
+  // Load the identifier for the elf file mapped into memory at |base| into
+  // |identifier|. Return false if the identifier could not be created for this
+  // file.
+  static bool ElfFileIdentifierFromMappedFile(
+      const void* base, std::vector<uint8_t>& identifier) {
+    // Look for a build id note first.
+    if (FindElfBuildIDNote(base, identifier)) return true;
+
+    // Fall back on hashing the first page of the text section.
+    return HashElfTextSection(base, identifier);
+  }
+
+  // These three functions are not ever called in an unsafe context, so it's OK
+  // to allocate memory and use libc.
+  static std::string bytes_to_hex_string(const uint8_t* bytes, size_t count) {
+    std::string result;
+    for (unsigned int idx = 0; idx < count; ++idx) {
+      char buf[3];
+      SprintfLiteral(buf, "%02X", bytes[idx]);
+      result.append(buf);
+    }
+    return result;
+  }
+
+  // Convert the |identifier| data to a string.  The string will
+  // be formatted as a UUID in all uppercase without dashes.
+  // (e.g., 22F065BBFC9C49F780FE26A7CEBD7BCE).
+  static std::string ConvertIdentifierToUUIDString(
+      const std::vector<uint8_t>& identifier) {
+    uint8_t identifier_swapped[kMDGUIDSize] = {0};
+
+    // Endian-ness swap to match dump processor expectation.
+    memcpy(identifier_swapped, &identifier[0],
+           std::min(kMDGUIDSize, identifier.size()));
+    uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
+    *data1 = htonl(*data1);
+    uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
+    *data2 = htons(*data2);
+    uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
+    *data3 = htons(*data3);
+
+    return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
+  }
+
+  // Convert the entire |identifier| data to a hex string.
+  static std::string ConvertIdentifierToString(
+      const std::vector<uint8_t>& identifier) {
+    return bytes_to_hex_string(&identifier[0], identifier.size());
+  }
+
+ private:
+  // Storage for the path specified
+  std::string path_;
+};
+
+// End of imports from toolkit/crashreporter/google-breakpad/.
+// ----------------------------------------------------------------------------
+
+struct LoadedLibraryInfo {
+  LoadedLibraryInfo(const char* aName, unsigned long aBaseAddress,
+                    unsigned long aFirstMappingStart,
+                    unsigned long aLastMappingEnd)
+      : mName(aName),
+        mBaseAddress(aBaseAddress),
+        mFirstMappingStart(aFirstMappingStart),
+        mLastMappingEnd(aLastMappingEnd) {}
+
+  std::string mName;
+  unsigned long mBaseAddress;
+  unsigned long mFirstMappingStart;
+  unsigned long mLastMappingEnd;
+};
+
+static std::string IDtoUUIDString(const std::vector<uint8_t>& aIdentifier) {
+  std::string uuid = FileID::ConvertIdentifierToUUIDString(aIdentifier);
+  // This is '0', not '\0', since it represents the breakpad id age.
+  uuid += '0';
+  return uuid;
+}
+
+// Get the breakpad Id for the binary file pointed by bin_name
+static std::string getId(const char* bin_name) {
+  std::vector<uint8_t> identifier;
+  identifier.reserve(kDefaultBuildIdSize);
+
+  FileID file_id(bin_name);
+  if (file_id.ElfFileIdentifier(identifier)) {
+    return IDtoUUIDString(identifier);
+  }
+
+  return {};
+}
+
+static SharedLibrary SharedLibraryAtPath(const char* path,
+                                         unsigned long libStart,
+                                         unsigned long libEnd,
+                                         unsigned long offset = 0) {
+  std::string pathStr = path;
+
+  size_t pos = pathStr.rfind('\\');
+  std::string nameStr =
+      (pos != std::string::npos) ? pathStr.substr(pos + 1) : pathStr;
+
+  return SharedLibrary(libStart, libEnd, offset, getId(path), nameStr, pathStr,
+                       nameStr, pathStr, std::string{}, "");
+}
+
+static int dl_iterate_callback(struct dl_phdr_info* dl_info, size_t size,
+                               void* data) {
+  auto libInfoList = reinterpret_cast<std::vector<LoadedLibraryInfo>*>(data);
+
+  if (dl_info->dlpi_phnum <= 0) return 0;
+
+  unsigned long baseAddress = dl_info->dlpi_addr;
+  unsigned long firstMappingStart = -1;
+  unsigned long lastMappingEnd = 0;
+
+  for (size_t i = 0; i < dl_info->dlpi_phnum; i++) {
+    if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) {
+      continue;
+    }
+    unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr;
+    unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz;
+    if (start < firstMappingStart) {
+      firstMappingStart = start;
+    }
+    if (end > lastMappingEnd) {
+      lastMappingEnd = end;
+    }
+  }
+
+  libInfoList->push_back(LoadedLibraryInfo(dl_info->dlpi_name, baseAddress,
+                                           firstMappingStart, lastMappingEnd));
+
+  return 0;
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+  SharedLibraryInfo info;
+
+#if defined(GP_OS_linux)
+  // We need to find the name of the executable (exeName, exeNameLen) and the
+  // address of its executable section (exeExeAddr) in the running image.
+  char exeName[PATH_MAX];
+  memset(exeName, 0, sizeof(exeName));
+
+  ssize_t exeNameLen = readlink("/proc/self/exe", exeName, sizeof(exeName) - 1);
+  if (exeNameLen == -1) {
+    // readlink failed for whatever reason.  Note this, but keep going.
+    exeName[0] = '\0';
+    exeNameLen = 0;
+    // LOG("SharedLibraryInfo::GetInfoForSelf(): readlink failed");
+  } else {
+    // Assert no buffer overflow.
+    MOZ_RELEASE_ASSERT(exeNameLen >= 0 &&
+                       exeNameLen < static_cast<ssize_t>(sizeof(exeName)));
+  }
+
+  unsigned long exeExeAddr = 0;
+#endif
+
+#if defined(GP_OS_android)
+  // If dl_iterate_phdr doesn't exist, we give up immediately.
+  if (!dl_iterate_phdr) {
+    // On ARM Android, dl_iterate_phdr is provided by the custom linker.
+    // So if libxul was loaded by the system linker (e.g. as part of
+    // xpcshell when running tests), it won't be available and we should
+    // not call it.
+    return info;
+  }
+#endif
+
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+  // Read info from /proc/self/maps. We ignore most of it.
+  pid_t pid = mozilla::baseprofiler::profiler_current_process_id();
+  char path[PATH_MAX];
+  SprintfLiteral(path, "/proc/%d/maps", pid);
+  std::ifstream maps(path);
+  std::string line;
+  while (std::getline(maps, line)) {
+    int ret;
+    unsigned long start;
+    unsigned long end;
+    char perm[6 + 1] = "";
+    unsigned long offset;
+    char modulePath[PATH_MAX + 1] = "";
+    ret = sscanf(line.c_str(),
+                 "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n",
+                 &start, &end, perm, &offset, modulePath);
+    if (!strchr(perm, 'x')) {
+      // Ignore non executable entries
+      continue;
+    }
+    if (ret != 5 && ret != 4) {
+      // LOG("SharedLibraryInfo::GetInfoForSelf(): "
+      //     "reading /proc/self/maps failed");
+      continue;
+    }
+
+#  if defined(GP_OS_linux)
+    // Try to establish the main executable's load address.
+    if (exeNameLen > 0 && strcmp(modulePath, exeName) == 0) {
+      exeExeAddr = start;
+    }
+#  elif defined(GP_OS_android)
+    // Use /proc/pid/maps to get the dalvik-jit section since it has no
+    // associated phdrs.
+    if (0 == strcmp(modulePath, "/dev/ashmem/dalvik-jit-code-cache")) {
+      info.AddSharedLibrary(
+          SharedLibraryAtPath(modulePath, start, end, offset));
+      if (info.GetSize() > 10000) {
+        // LOG("SharedLibraryInfo::GetInfoForSelf(): "
+        //     "implausibly large number of mappings acquired");
+        break;
+      }
+    }
+#  endif
+  }
+#endif
+
+  std::vector<LoadedLibraryInfo> libInfoList;
+
+  // We collect the bulk of the library info using dl_iterate_phdr.
+  dl_iterate_phdr(dl_iterate_callback, &libInfoList);
+
+  for (const auto& libInfo : libInfoList) {
+    info.AddSharedLibrary(
+        SharedLibraryAtPath(libInfo.mName.c_str(), libInfo.mFirstMappingStart,
+                            libInfo.mLastMappingEnd,
+                            libInfo.mFirstMappingStart - libInfo.mBaseAddress));
+  }
+
+#if defined(GP_OS_linux)
+  // Make another pass over the information we just harvested from
+  // dl_iterate_phdr.  If we see a nameless object mapped at what we earlier
+  // established to be the main executable's load address, attach the
+  // executable's name to that entry.
+  for (size_t i = 0; i < info.GetSize(); i++) {
+    SharedLibrary& lib = info.GetMutableEntry(i);
+    if (lib.GetStart() <= exeExeAddr && exeExeAddr <= lib.GetEnd() &&
+        lib.GetDebugPath().empty()) {
+      lib = SharedLibraryAtPath(exeName, lib.GetStart(), lib.GetEnd(),
+                                lib.GetOffset());
+
+      // We only expect to see one such entry.
+      break;
+    }
+  }
+#endif
+
+  return info;
+}
+
+void SharedLibraryInfo::Initialize() { /* do nothing */
+}
diff --git a/mozglue/baseprofiler/core/shared-libraries-macos.cc b/mozglue/baseprofiler/core/shared-libraries-macos.cc
new file mode 100644
index 0000000000..13e66f9f26
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-macos.cc
@@ -0,0 +1,182 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfilerSharedLibraries.h"
+
+#include "platform.h"
+
+#include "mozilla/Unused.h"
+#include <AvailabilityMacros.h>
+
+#include <dlfcn.h>
+#include <mach-o/arch.h>
+#include <mach-o/dyld_images.h>
+#include <mach-o/dyld.h>
+#include <mach-o/loader.h>
+#include <mach/mach_init.h>
+#include <mach/mach_traps.h>
+#include <mach/task_info.h>
+#include <mach/task.h>
+#include <sstream>
+#include <stdlib.h>
+#include <string.h>
+#include <vector>
+
+// Architecture specific abstraction.
+#if defined(GP_ARCH_x86)
+typedef mach_header platform_mach_header;
+typedef segment_command mach_segment_command_type;
+#  define MACHO_MAGIC_NUMBER MH_MAGIC
+#  define CMD_SEGMENT LC_SEGMENT
+#  define seg_size uint32_t
+#else
+typedef mach_header_64 platform_mach_header;
+typedef segment_command_64 mach_segment_command_type;
+#  define MACHO_MAGIC_NUMBER MH_MAGIC_64
+#  define CMD_SEGMENT LC_SEGMENT_64
+#  define seg_size uint64_t
+#endif
+
+struct NativeSharedLibrary {
+  const platform_mach_header* header;
+  std::string path;
+};
+static std::vector<NativeSharedLibrary>* sSharedLibrariesList = nullptr;
+
+class MOZ_RAII SharedLibrariesLock {
+ public:
+  SharedLibrariesLock() { sSharedLibrariesMutex.Lock(); }
+
+  ~SharedLibrariesLock() { sSharedLibrariesMutex.Unlock(); }
+
+  SharedLibrariesLock(const SharedLibrariesLock&) = delete;
+  void operator=(const SharedLibrariesLock&) = delete;
+
+ private:
+  static mozilla::baseprofiler::detail::BaseProfilerMutex sSharedLibrariesMutex;
+};
+
+mozilla::baseprofiler::detail::BaseProfilerMutex
+    SharedLibrariesLock::sSharedLibrariesMutex;
+
+static void SharedLibraryAddImage(const struct mach_header* mh,
+                                  intptr_t vmaddr_slide) {
+  // NOTE: Presumably for backwards-compatibility reasons, this function accepts
+  // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast
+  // it to the right type here.
+  auto header = reinterpret_cast<const platform_mach_header*>(mh);
+
+  Dl_info info;
+  if (!dladdr(header, &info)) {
+    return;
+  }
+
+  SharedLibrariesLock lock;
+  if (!sSharedLibrariesList) {
+    return;
+  }
+
+  NativeSharedLibrary lib = {header, info.dli_fname};
+  sSharedLibrariesList->push_back(lib);
+}
+
+static void SharedLibraryRemoveImage(const struct mach_header* mh,
+                                     intptr_t vmaddr_slide) {
+  // NOTE: Presumably for backwards-compatibility reasons, this function accepts
+  // a mach_header even on 64-bit where it ought to be a mach_header_64. We cast
+  // it to the right type here.
+  auto header = reinterpret_cast<const platform_mach_header*>(mh);
+
+  SharedLibrariesLock lock;
+  if (!sSharedLibrariesList) {
+    return;
+  }
+
+  uint32_t count = sSharedLibrariesList->size();
+  for (uint32_t i = 0; i < count; ++i) {
+    if ((*sSharedLibrariesList)[i].header == header) {
+      sSharedLibrariesList->erase(sSharedLibrariesList->begin() + i);
+      return;
+    }
+  }
+}
+
+void SharedLibraryInfo::Initialize() {
+  // NOTE: We intentionally leak this memory here. We're allocating dynamically
+  // in order to avoid static initializers.
+  sSharedLibrariesList = new std::vector<NativeSharedLibrary>();
+
+  _dyld_register_func_for_add_image(SharedLibraryAddImage);
+  _dyld_register_func_for_remove_image(SharedLibraryRemoveImage);
+}
+
+static void addSharedLibrary(const platform_mach_header* header,
+                             const char* path, SharedLibraryInfo& info) {
+  const struct load_command* cmd =
+      reinterpret_cast<const struct load_command*>(header + 1);
+
+  seg_size size = 0;
+  unsigned long long start = reinterpret_cast<unsigned long long>(header);
+  // Find the cmd segment in the macho image. It will contain the offset we care
+  // about.
+  const uint8_t* uuid_bytes = nullptr;
+  for (unsigned int i = 0;
+       cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0);
+       ++i) {
+    if (cmd->cmd == CMD_SEGMENT) {
+      const mach_segment_command_type* seg =
+          reinterpret_cast<const mach_segment_command_type*>(cmd);
+
+      if (!strcmp(seg->segname, "__TEXT")) {
+        size = seg->vmsize;
+      }
+    } else if (cmd->cmd == LC_UUID) {
+      const uuid_command* ucmd = reinterpret_cast<const uuid_command*>(cmd);
+      uuid_bytes = ucmd->uuid;
+    }
+
+    cmd = reinterpret_cast<const struct load_command*>(
+        reinterpret_cast<const char*>(cmd) + cmd->cmdsize);
+  }
+
+  std::string uuid;
+  if (uuid_bytes != nullptr) {
+    static constexpr char digits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
+                                        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+    for (int i = 0; i < 15; ++i) {
+      uint8_t byte = uuid_bytes[i];
+      uuid += digits[byte >> 4];
+      uuid += digits[byte & 0xFu];
+    }
+    // breakpad id age.
+    uuid += '0';
+  }
+
+  std::string pathStr = path;
+
+  size_t pos = pathStr.rfind('\\');
+  std::string nameStr =
+      (pos != std::string::npos) ? pathStr.substr(pos + 1) : pathStr;
+
+  const NXArchInfo* archInfo =
+      NXGetArchInfoFromCpuType(header->cputype, header->cpusubtype);
+
+  info.AddSharedLibrary(SharedLibrary(start, start + size, 0, uuid, nameStr,
+                                      pathStr, nameStr, pathStr, std::string{},
+                                      archInfo ? archInfo->name : ""));
+}
+
+// Translate the statically stored sSharedLibrariesList information into a
+// SharedLibraryInfo object.
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+  SharedLibrariesLock lock;
+  SharedLibraryInfo sharedLibraryInfo;
+
+  for (auto& info : *sSharedLibrariesList) {
+    addSharedLibrary(info.header, info.path.c_str(), sharedLibraryInfo);
+  }
+
+  return sharedLibraryInfo;
+}
diff --git a/mozglue/baseprofiler/core/shared-libraries-win32.cc b/mozglue/baseprofiler/core/shared-libraries-win32.cc
new file mode 100644
index 0000000000..5bf7408193
--- /dev/null
+++ b/mozglue/baseprofiler/core/shared-libraries-win32.cc
@@ -0,0 +1,277 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+#include <dbghelp.h>
+#include <sstream>
+#include <psapi.h>
+
+#include "BaseProfilerSharedLibraries.h"
+
+#include "mozilla/glue/WindowsUnicode.h"
+#include "mozilla/Unused.h"
+#include "mozilla/WindowsVersion.h"
+
+#include <cctype>
+#include <string>
+
+#define CV_SIGNATURE 0x53445352  // 'SDSR'
+
+struct CodeViewRecord70 {
+  uint32_t signature;
+  GUID pdbSignature;
+  uint32_t pdbAge;
+  // A UTF-8 string, according to
+  // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/dbi/locator.cpp#L785
+  char pdbFileName[1];
+};
+
+static constexpr char digits[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
+                                    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+
+static void AppendHex(const unsigned char* aBegin, const unsigned char* aEnd,
+                      std::string& aOut) {
+  for (const unsigned char* p = aBegin; p < aEnd; ++p) {
+    unsigned char c = *p;
+    aOut += digits[c >> 4];
+    aOut += digits[c & 0xFu];
+  }
+}
+
+static constexpr bool WITH_PADDING = true;
+static constexpr bool WITHOUT_PADDING = false;
+template <typename T>
+static void AppendHex(T aValue, std::string& aOut, bool aWithPadding) {
+  for (int i = sizeof(T) * 2 - 1; i >= 0; --i) {
+    unsigned nibble = (aValue >> (i * 4)) & 0xFu;
+    // If no-padding requested, skip starting zeroes -- unless we're on the very
+    // last nibble (so we don't output a blank).
+    if (!aWithPadding && i != 0) {
+      if (nibble == 0) {
+        // Requested no padding, skip zeroes.
+        continue;
+      }
+      // Requested no padding, got first non-zero, pretend we now want padding
+      // so we don't skip zeroes anymore.
+      aWithPadding = true;
+    }
+    aOut += digits[nibble];
+  }
+}
+
+static bool GetPdbInfo(uintptr_t aStart, std::string& aSignature,
+                       uint32_t& aAge, char** aPdbName) {
+  if (!aStart) {
+    return false;
+  }
+
+  PIMAGE_DOS_HEADER dosHeader = reinterpret_cast<PIMAGE_DOS_HEADER>(aStart);
+  if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+    return false;
+  }
+
+  PIMAGE_NT_HEADERS ntHeaders =
+      reinterpret_cast<PIMAGE_NT_HEADERS>(aStart + dosHeader->e_lfanew);
+  if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) {
+    return false;
+  }
+
+  uint32_t relativeVirtualAddress =
+      ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG]
+          .VirtualAddress;
+  if (!relativeVirtualAddress) {
+    return false;
+  }
+
+  PIMAGE_DEBUG_DIRECTORY debugDirectory =
+      reinterpret_cast<PIMAGE_DEBUG_DIRECTORY>(aStart + relativeVirtualAddress);
+  if (!debugDirectory || debugDirectory->Type != IMAGE_DEBUG_TYPE_CODEVIEW) {
+    return false;
+  }
+
+  CodeViewRecord70* debugInfo = reinterpret_cast<CodeViewRecord70*>(
+      aStart + debugDirectory->AddressOfRawData);
+  if (!debugInfo || debugInfo->signature != CV_SIGNATURE) {
+    return false;
+  }
+
+  aAge = debugInfo->pdbAge;
+  GUID& pdbSignature = debugInfo->pdbSignature;
+  AppendHex(pdbSignature.Data1, aSignature, WITH_PADDING);
+  AppendHex(pdbSignature.Data2, aSignature, WITH_PADDING);
+  AppendHex(pdbSignature.Data3, aSignature, WITH_PADDING);
+  AppendHex(reinterpret_cast<const unsigned char*>(&pdbSignature.Data4),
+            reinterpret_cast<const unsigned char*>(&pdbSignature.Data4) +
+                sizeof(pdbSignature.Data4),
+            aSignature);
+
+  // The PDB file name could be different from module filename, so report both
+  // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb
+  *aPdbName = debugInfo->pdbFileName;
+
+  return true;
+}
+
+static std::string GetVersion(wchar_t* dllPath) {
+  DWORD infoSize = GetFileVersionInfoSizeW(dllPath, nullptr);
+  if (infoSize == 0) {
+    return {};
+  }
+
+  mozilla::UniquePtr<unsigned char[]> infoData =
+      mozilla::MakeUnique<unsigned char[]>(infoSize);
+  if (!GetFileVersionInfoW(dllPath, 0, infoSize, infoData.get())) {
+    return {};
+  }
+
+  VS_FIXEDFILEINFO* vInfo;
+  UINT vInfoLen;
+  if (!VerQueryValueW(infoData.get(), L"\\", (LPVOID*)&vInfo, &vInfoLen)) {
+    return {};
+  }
+  if (!vInfo) {
+    return {};
+  }
+
+  return std::to_string(vInfo->dwFileVersionMS >> 16) + '.' +
+         std::to_string(vInfo->dwFileVersionMS & 0xFFFF) + '.' +
+         std::to_string(vInfo->dwFileVersionLS >> 16) + '.' +
+         std::to_string(vInfo->dwFileVersionLS & 0xFFFF);
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() {
+  SharedLibraryInfo sharedLibraryInfo;
+
+  HANDLE hProcess = GetCurrentProcess();
+  mozilla::UniquePtr<HMODULE[]> hMods;
+  size_t modulesNum = 0;
+  if (hProcess != NULL) {
+    DWORD modulesSize;
+    if (!EnumProcessModules(hProcess, nullptr, 0, &modulesSize)) {
+      return sharedLibraryInfo;
+    }
+    modulesNum = modulesSize / sizeof(HMODULE);
+    hMods = mozilla::MakeUnique<HMODULE[]>(modulesNum);
+    if (!EnumProcessModules(hProcess, hMods.get(), modulesNum * sizeof(HMODULE),
+                            &modulesSize)) {
+      return sharedLibraryInfo;
+    }
+    // The list may have shrunk between calls
+    if (modulesSize / sizeof(HMODULE) < modulesNum) {
+      modulesNum = modulesSize / sizeof(HMODULE);
+    }
+  }
+
+  for (unsigned int i = 0; i < modulesNum; i++) {
+    wchar_t modulePath[MAX_PATH + 1];
+    if (!GetModuleFileNameExW(hProcess, hMods[i], modulePath,
+                              std::size(modulePath))) {
+      continue;
+    }
+    mozilla::UniquePtr<char[]> utf8ModulePath(
+        mozilla::glue::WideToUTF8(modulePath));
+    if (!utf8ModulePath) {
+      continue;
+    }
+
+    MODULEINFO module = {0};
+    if (!GetModuleInformation(hProcess, hMods[i], &module,
+                              sizeof(MODULEINFO))) {
+      continue;
+    }
+
+    std::string modulePathStr(utf8ModulePath.get());
+    size_t pos = modulePathStr.find_last_of("\\/");
+    std::string moduleNameStr = (pos != std::string::npos)
+                                    ? modulePathStr.substr(pos + 1)
+                                    : modulePathStr;
+
+    // Hackaround for Bug 1607574.  Nvidia's shim driver nvd3d9wrap[x].dll
+    // detours LoadLibraryExW when it's loaded and the detour function causes
+    // AV when the code tries to access data pointing to an address within
+    // unloaded nvinit[x].dll.
+    // The crashing code is executed when a given parameter is "detoured.dll"
+    // and OS version is older than 6.2.  We hit that crash at the following
+    // call to LoadLibraryEx even if we specify LOAD_LIBRARY_AS_DATAFILE.
+    // We work around it by skipping LoadLibraryEx, and add a library info with
+    // a dummy breakpad id instead.
+#if !defined(_M_ARM64)
+#  if defined(_M_AMD64)
+    LPCWSTR kNvidiaShimDriver = L"nvd3d9wrapx.dll";
+    LPCWSTR kNvidiaInitDriver = L"nvinitx.dll";
+#  elif defined(_M_IX86)
+    LPCWSTR kNvidiaShimDriver = L"nvd3d9wrap.dll";
+    LPCWSTR kNvidiaInitDriver = L"nvinit.dll";
+#  endif
+    constexpr std::string_view detoured_dll = "detoured.dll";
+    if (std::equal(moduleNameStr.cbegin(), moduleNameStr.cend(),
+                   detoured_dll.cbegin(), detoured_dll.cend(),
+                   [](char aModuleChar, char aDetouredChar) {
+                     return std::tolower(aModuleChar) == aDetouredChar;
+                   }) &&
+        !mozilla::IsWin8OrLater() && ::GetModuleHandleW(kNvidiaShimDriver) &&
+        !::GetModuleHandleW(kNvidiaInitDriver)) {
+      const std::string pdbNameStr = "detoured.pdb";
+      SharedLibrary shlib((uintptr_t)module.lpBaseOfDll,
+                          (uintptr_t)module.lpBaseOfDll + module.SizeOfImage,
+                          0,  // DLLs are always mapped at offset 0 on Windows
+                          "000000000000000000000000000000000", moduleNameStr,
+                          modulePathStr, pdbNameStr, pdbNameStr, "", "");
+      sharedLibraryInfo.AddSharedLibrary(shlib);
+      continue;
+    }
+#endif  // !defined(_M_ARM64)
+
+    std::string breakpadId;
+    // Load the module again to make sure that its handle will remain
+    // valid as we attempt to read the PDB information from it.  We load the
+    // DLL as a datafile so that if the module actually gets unloaded between
+    // the call to EnumProcessModules and the following LoadLibraryEx, we
+    // don't end up running the now newly loaded module's DllMain function. If
+    // the module is already loaded, LoadLibraryEx just increments its
+    // refcount.
+    //
+    // Note that because of the race condition above, merely loading the DLL
+    // again is not safe enough, therefore we also need to make sure that we
+    // can read the memory mapped at the base address before we can safely
+    // proceed to actually access those pages.
+    HMODULE handleLock =
+        LoadLibraryExW(modulePath, NULL, LOAD_LIBRARY_AS_DATAFILE);
+    MEMORY_BASIC_INFORMATION vmemInfo = {0};
+    std::string pdbSig;
+    uint32_t pdbAge;
+    std::string pdbPathStr;
+    std::string pdbNameStr;
+    char* pdbName = nullptr;
+    if (handleLock &&
+        sizeof(vmemInfo) ==
+            VirtualQuery(module.lpBaseOfDll, &vmemInfo, sizeof(vmemInfo)) &&
+        vmemInfo.State == MEM_COMMIT &&
+        GetPdbInfo((uintptr_t)module.lpBaseOfDll, pdbSig, pdbAge, &pdbName)) {
+      MOZ_ASSERT(breakpadId.empty());
+      breakpadId += pdbSig;
+      AppendHex(pdbAge, breakpadId, WITHOUT_PADDING);
+
+      pdbPathStr = pdbName;
+      size_t pos = pdbPathStr.find_last_of("\\/");
+      pdbNameStr =
+          (pos != std::string::npos) ? pdbPathStr.substr(pos + 1) : pdbPathStr;
+    }
+
+    SharedLibrary shlib((uintptr_t)module.lpBaseOfDll,
+                        (uintptr_t)module.lpBaseOfDll + module.SizeOfImage,
+                        0,  // DLLs are always mapped at offset 0 on Windows
+                        breakpadId, moduleNameStr, modulePathStr, pdbNameStr,
+                        pdbPathStr, GetVersion(modulePath), "");
+    sharedLibraryInfo.AddSharedLibrary(shlib);
+
+    FreeLibrary(handleLock);  // ok to free null handles
+  }
+
+  return sharedLibraryInfo;
+}
+
+void SharedLibraryInfo::Initialize() { /* do nothing */
+}
diff --git a/mozglue/baseprofiler/core/vtune/ittnotify.h b/mozglue/baseprofiler/core/vtune/ittnotify.h
new file mode 100644
index 0000000000..04adf9eb5e
--- /dev/null
+++ b/mozglue/baseprofiler/core/vtune/ittnotify.h
@@ -0,0 +1,4127 @@
+// clang-format off
+
+/* <copyright>
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  Contact Information:
+  http://software.intel.com/en-us/articles/intel-vtune-amplifier-xe/
+
+  BSD LICENSE
+
+  Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+</copyright> */
+#ifndef _ITTNOTIFY_H_
+#  define _ITTNOTIFY_H_
+
+/**
+@file
+@brief Public User API functions and types
+@mainpage
+
+The ITT API is used to annotate a user's program with additional information
+that can be used by correctness and performance tools. The user inserts
+calls in their program. Those calls generate information that is collected
+at runtime, and used by Intel(R) Threading Tools.
+
+@section API Concepts
+The following general concepts are used throughout the API.
+
+@subsection Unicode Support
+Many API functions take character string arguments. On Windows, there
+are two versions of each such function. The function name is suffixed
+by W if Unicode support is enabled, and by A otherwise. Any API function
+that takes a character string argument adheres to this convention.
+
+@subsection Conditional Compilation
+Many users prefer having an option to modify ITT API code when linking it
+inside their runtimes. ITT API header file provides a mechanism to replace
+ITT API function names inside your code with empty strings. To do this,
+define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the
+static library from the linker script.
+
+@subsection Domains
+[see domains]
+Domains provide a way to separate notification for different modules or
+libraries in a program. Domains are specified by dotted character strings,
+e.g. TBB.Internal.Control.
+
+A mechanism (to be specified) is provided to enable and disable
+domains. By default, all domains are enabled.
+@subsection Named Entities and Instances
+Named entities (frames, regions, tasks, and markers) communicate
+information about the program to the analysis tools. A named entity often
+refers to a section of program code, or to some set of logical concepts
+that the programmer wants to group together.
+
+Named entities relate to the programmer's static view of the program. When
+the program actually executes, many instances of a given named entity
+may be created.
+
+The API annotations denote instances of named entities. The actual
+named entities are displayed using the analysis tools. In other words,
+the named entities come into existence when instances are created.
+
+Instances of named entities may have instance identifiers (IDs). Some
+API calls use instance identifiers to create relationships between
+different instances of named entities. Other API calls associate data
+with instances of named entities.
+
+Some named entities must always have instance IDs. In particular, regions
+and frames always have IDs. Task and markers need IDs only if the ID is
+needed in another API call (such as adding a relation or metadata).
+
+The lifetime of instance IDs is distinct from the lifetime of
+instances. This allows various relationships to be specified separate
+from the actual execution of instances. This flexibility comes at the
+expense of extra API calls.
+
+The same ID may not be reused for different instances, unless a previous
+[ref] __itt_id_destroy call for that ID has been issued.
+*/
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+#  define ITT_OS_WIN   1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+#  define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+#  define ITT_OS_MAC   3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS_FREEBSD
+#  define ITT_OS_FREEBSD   4
+#endif /* ITT_OS_FREEBSD */
+
+#ifndef ITT_OS
+#  if defined WIN32 || defined _WIN32
+#    define ITT_OS ITT_OS_WIN
+#  elif defined( __APPLE__ ) && defined( __MACH__ )
+#    define ITT_OS ITT_OS_MAC
+#  elif defined( __FreeBSD__ )
+#    define ITT_OS ITT_OS_FREEBSD
+#  else
+#    define ITT_OS ITT_OS_LINUX
+#  endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+#  define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+#  define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+#  define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM_FREEBSD
+#  define ITT_PLATFORM_FREEBSD 4
+#endif /* ITT_PLATFORM_FREEBSD */
+
+#ifndef ITT_PLATFORM
+#  if ITT_OS==ITT_OS_WIN
+#    define ITT_PLATFORM ITT_PLATFORM_WIN
+#  elif ITT_OS==ITT_OS_MAC
+#    define ITT_PLATFORM ITT_PLATFORM_MAC
+#  elif ITT_OS==ITT_OS_FREEBSD
+#    define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+#  else
+#    define ITT_PLATFORM ITT_PLATFORM_POSIX
+#  endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef ITTAPI_CDECL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define ITTAPI_CDECL __cdecl
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    if defined _M_IX86 || defined __i386__
+#      define ITTAPI_CDECL __attribute__ ((cdecl))
+#    else  /* _M_IX86 || __i386__ */
+#      define ITTAPI_CDECL /* actual only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* ITTAPI_CDECL */
+
+#ifndef STDCALL
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    define STDCALL __stdcall
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    if defined _M_IX86 || defined __i386__
+#      define STDCALL __attribute__ ((stdcall))
+#    else  /* _M_IX86 || __i386__ */
+#      define STDCALL /* supported only on x86 platform */
+#    endif /* _M_IX86 || __i386__ */
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI    ITTAPI_CDECL
+#define LIBITTAPI ITTAPI_CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL    ITTAPI_CDECL
+#define LIBITTAPI_CALL ITTAPI_CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE           __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE           static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else  /* __STRICT_ANSI__ */
+#define ITT_INLINE           static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY
+#  if ITT_PLATFORM==ITT_PLATFORM_WIN
+#    pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro")
+#  else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#    warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro"
+#  endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#  include "vtune/legacy/ittnotify.h"
+#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */
+
+/** @cond exclude_from_documentation */
+/* Helper macro for joining tokens */
+#define ITT_JOIN_AUX(p,n) p##n
+#define ITT_JOIN(p,n)     ITT_JOIN_AUX(p,n)
+
+#ifdef ITT_MAJOR
+#undef ITT_MAJOR
+#endif
+#ifdef ITT_MINOR
+#undef ITT_MINOR
+#endif
+#define ITT_MAJOR     3
+#define ITT_MINOR     0
+
+/* Standard versioning of a token with major and minor version numbers */
+#define ITT_VERSIONIZE(x)    \
+    ITT_JOIN(x,              \
+    ITT_JOIN(_,              \
+    ITT_JOIN(ITT_MAJOR,      \
+    ITT_JOIN(_, ITT_MINOR))))
+
+#ifndef INTEL_ITTNOTIFY_PREFIX
+#  define INTEL_ITTNOTIFY_PREFIX __itt_
+#endif /* INTEL_ITTNOTIFY_PREFIX */
+#ifndef INTEL_ITTNOTIFY_POSTFIX
+#  define INTEL_ITTNOTIFY_POSTFIX _ptr_
+#endif /* INTEL_ITTNOTIFY_POSTFIX */
+
+#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+#define ITTNOTIFY_NAME(n)     ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX)))
+
+#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
+#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)
+
+#define ITTNOTIFY_VOID_D0(n,d)       (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a)     (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d)       (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x)     (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y)   (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ?       0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a)     (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b)   (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ?       0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+
+#ifdef ITT_STUB
+#undef ITT_STUB
+#endif
+#ifdef ITT_STUBV
+#undef ITT_STUBV
+#endif
+#define ITT_STUBV(api,type,name,args)                             \
+    typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args;   \
+    extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name);
+#define ITT_STUB ITT_STUBV
+/** @endcond */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup public Public API
+ * @{
+ * @}
+ */
+
+/**
+ * @defgroup control Collection Control
+ * @ingroup public
+ * General behavior: application continues to run, but no profiling information is being collected
+ *
+ * Pausing occurs not only for the current thread but for all process as well as spawned processes
+ * - Intel(R) Parallel Inspector and Intel(R) Inspector XE:
+ *   - Does not analyze or report errors that involve memory access.
+ *   - Other errors are reported as usual. Pausing data collection in
+ *     Intel(R) Parallel Inspector and Intel(R) Inspector XE
+ *     only pauses tracing and analyzing memory access.
+ *     It does not pause tracing or analyzing threading APIs.
+ *   .
+ * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ *   - Does continue to record when new threads are started.
+ *   .
+ * - Other effects:
+ *   - Possible reduction of runtime overhead.
+ *   .
+ * @{
+ */
+/** @brief Pause collection */
+void ITTAPI __itt_pause(void);
+/** @brief Resume collection */
+void ITTAPI __itt_resume(void);
+/** @brief Detach collection */
+void ITTAPI __itt_detach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, pause,  (void))
+ITT_STUBV(ITTAPI, void, resume, (void))
+ITT_STUBV(ITTAPI, void, detach, (void))
+#define __itt_pause      ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr  ITTNOTIFY_NAME(pause)
+#define __itt_resume     ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
+#define __itt_detach     ITTNOTIFY_VOID(detach)
+#define __itt_detach_ptr ITTNOTIFY_NAME(detach)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_pause()
+#define __itt_pause_ptr  0
+#define __itt_resume()
+#define __itt_resume_ptr 0
+#define __itt_detach()
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_pause_ptr  0
+#define __itt_resume_ptr 0
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} control group */
+/** @endcond */
+
+/**
+ * @defgroup threads Threads
+ * @ingroup public
+ * Give names to threads
+ * @{
+ */
+/**
+ * @brief Sets thread name of calling thread
+ * @param[in] name - name of thread
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_thread_set_nameA(const char    *name);
+void ITTAPI __itt_thread_set_nameW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_thread_set_name     __itt_thread_set_nameW
+#  define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr
+#else /* UNICODE */
+#  define __itt_thread_set_name     __itt_thread_set_nameA
+#  define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_thread_set_name(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char    *name))
+ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_set_name,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA     ITTNOTIFY_VOID(thread_set_nameA)
+#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA)
+#define __itt_thread_set_nameW     ITTNOTIFY_VOID(thread_set_nameW)
+#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name     ITTNOTIFY_VOID(thread_set_name)
+#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA(name)
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW(name)
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name(name)
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @brief Mark current thread as ignored from this point on, for the duration of its existence.
+ */
+void ITTAPI __itt_thread_ignore(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, thread_ignore, (void))
+#define __itt_thread_ignore     ITTNOTIFY_VOID(thread_ignore)
+#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thread_ignore()
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} threads group */
+
+/**
+ * @defgroup suppress Error suppression
+ * @ingroup public
+ * General behavior: application continues to run, but errors are suppressed
+ *
+ * @{
+ */
+
+/*****************************************************************//**
+ * @name group of functions used for error suppression in correctness tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask
+ */
+#define __itt_suppress_all_errors 0x7fffffff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from threading analysis)
+ */
+#define __itt_suppress_threading_errors 0x000000ff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from memory analysis)
+ */
+#define __itt_suppress_memory_errors 0x0000ff00
+
+/**
+ * @brief Start suppressing errors identified in mask on this thread
+ */
+void ITTAPI __itt_suppress_push(unsigned int mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask))
+#define __itt_suppress_push     ITTNOTIFY_VOID(suppress_push)
+#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_push(mask)
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effects of the matching call to __itt_suppress_push
+ */
+void ITTAPI __itt_suppress_pop(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_pop, (void))
+#define __itt_suppress_pop     ITTNOTIFY_VOID(suppress_pop)
+#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_pop()
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum __itt_suppress_mode {
+    __itt_unsuppress_range,
+    __itt_suppress_range
+} __itt_suppress_mode_t;
+
+/**
+ * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
+ */
+void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_mark_range     ITTNOTIFY_VOID(suppress_mark_range)
+#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_mark_range(mask)
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effect of a matching call to __itt_suppress_mark_range.   If not matching
+ *        call is found, nothing is changed.
+ */
+void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_clear_range     ITTNOTIFY_VOID(suppress_clear_range)
+#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_clear_range(mask)
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+/** @} suppress group */
+
+/**
+ * @defgroup sync Synchronization
+ * @ingroup public
+ * Indicate user-written synchronization code
+ * @{
+ */
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_barrier 1
+
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_mutex   2
+
+/**
+@brief Name a synchronization object
+@param[in] addr       Handle for the synchronization object. You should
+use a real address to uniquely identify the synchronization object.
+@param[in] objtype    null-terminated object type string. If NULL is
+passed, the name will be "User Synchronization".
+@param[in] objname    null-terminated object name string. If NULL,
+no name will be assigned to the object.
+@param[in] attribute  one of [#__itt_attr_barrier, #__itt_attr_mutex]
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_createA(void *addr, const char    *objtype, const char    *objname, int attribute);
+void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_sync_create     __itt_sync_createW
+#  define __itt_sync_create_ptr __itt_sync_createW_ptr
+#else /* UNICODE */
+#  define __itt_sync_create     __itt_sync_createA
+#  define __itt_sync_create_ptr __itt_sync_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char    *objtype, const char    *objname, int attribute))
+ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_create,  (void *addr, const char*    objtype, const char*    objname, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA     ITTNOTIFY_VOID(sync_createA)
+#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA)
+#define __itt_sync_createW     ITTNOTIFY_VOID(sync_createW)
+#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create     ITTNOTIFY_VOID(sync_create)
+#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA(addr, objtype, objname, attribute)
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW(addr, objtype, objname, attribute)
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create(addr, objtype, objname, attribute)
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+@brief Rename a synchronization object
+
+You can use the rename call to assign or reassign a name to a given
+synchronization object.
+@param[in] addr  handle for the synchronization object.
+@param[in] name  null-terminated object name string.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_renameA(void *addr, const char    *name);
+void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_sync_rename     __itt_sync_renameW
+#  define __itt_sync_rename_ptr __itt_sync_renameW_ptr
+#else /* UNICODE */
+#  define __itt_sync_rename     __itt_sync_renameA
+#  define __itt_sync_rename_ptr __itt_sync_renameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_rename(void *addr, const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char    *name))
+ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_rename,  (void *addr, const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA     ITTNOTIFY_VOID(sync_renameA)
+#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA)
+#define __itt_sync_renameW     ITTNOTIFY_VOID(sync_renameW)
+#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename     ITTNOTIFY_VOID(sync_rename)
+#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA(addr, name)
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW(addr, name)
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename(addr, name)
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ @brief Destroy a synchronization object.
+ @param addr Handle for the synchronization object.
+ */
+void ITTAPI __itt_sync_destroy(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr))
+#define __itt_sync_destroy     ITTNOTIFY_VOID(sync_destroy)
+#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_destroy(addr)
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/*****************************************************************//**
+ * @name group of functions is used for performance measurement tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @brief Enter spin loop on user-defined sync object
+ */
+void ITTAPI __itt_sync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr))
+#define __itt_sync_prepare     ITTNOTIFY_VOID(sync_prepare)
+#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_prepare(addr)
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Quit spin loop without acquiring spin object
+ */
+void ITTAPI __itt_sync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr))
+#define __itt_sync_cancel     ITTNOTIFY_VOID(sync_cancel)
+#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_cancel(addr)
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Successful spin loop completion (sync object acquired)
+ */
+void ITTAPI __itt_sync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr))
+#define __itt_sync_acquired     ITTNOTIFY_VOID(sync_acquired)
+#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_acquired(addr)
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Start sync object releasing code. Is called before the lock release call.
+ */
+void ITTAPI __itt_sync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr))
+#define __itt_sync_releasing     ITTNOTIFY_VOID(sync_releasing)
+#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_releasing(addr)
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/** @} sync group */
+
+/**************************************************************//**
+ * @name group of functions is used for correctness checking tools
+ ******************************************************************/
+/** @{ */
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_prepare(void* addr);
+ */
+void ITTAPI __itt_fsync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr))
+#define __itt_fsync_prepare     ITTNOTIFY_VOID(fsync_prepare)
+#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_prepare(addr)
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_cancel(void *addr);
+ */
+void ITTAPI __itt_fsync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr))
+#define __itt_fsync_cancel     ITTNOTIFY_VOID(fsync_cancel)
+#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_cancel(addr)
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_acquired(void *addr);
+ */
+void ITTAPI __itt_fsync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr))
+#define __itt_fsync_acquired     ITTNOTIFY_VOID(fsync_acquired)
+#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_acquired(addr)
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ *   there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ *   in dynamic library.
+ * @see void __itt_sync_releasing(void* addr);
+ */
+void ITTAPI __itt_fsync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr))
+#define __itt_fsync_releasing     ITTNOTIFY_VOID(fsync_releasing)
+#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_releasing(addr)
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/**
+ * @defgroup model Modeling by Intel(R) Parallel Advisor
+ * @ingroup public
+ * This is the subset of itt used for modeling by Intel(R) Parallel Advisor.
+ * This API is called ONLY using annotate.h, by "Annotation" macros
+ * the user places in their sources during the parallelism modeling steps.
+ *
+ * site_begin/end and task_begin/end take the address of handle variables,
+ * which are writeable by the API.  Handles must be 0 initialized prior
+ * to the first call to begin, or may cause a run-time failure.
+ * The handles are initialized in a multi-thread safe way by the API if
+ * the handle is 0.  The commonly expected idiom is one static handle to
+ * identify a site or task.  If a site or task of the same name has already
+ * been started during this collection, the same handle MAY be returned,
+ * but is not required to be - it is unspecified if data merging is done
+ * based on name.  These routines also take an instance variable.  Like
+ * the lexical instance, these must be 0 initialized.  Unlike the lexical
+ * instance, this is used to track a single dynamic instance.
+ *
+ * API used by the Intel(R) Parallel Advisor to describe potential concurrency
+ * and related activities. User-added source annotations expand to calls
+ * to these procedures to enable modeling of a hypothetical concurrent
+ * execution serially.
+ * @{
+ */
+#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL)
+
+typedef void* __itt_model_site;             /*!< @brief handle for lexical site     */
+typedef void* __itt_model_site_instance;    /*!< @brief handle for dynamic instance */
+typedef void* __itt_model_task;             /*!< @brief handle for lexical site     */
+typedef void* __itt_model_task_instance;    /*!< @brief handle for dynamic instance */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum {
+    __itt_model_disable_observation,
+    __itt_model_disable_collection
+} __itt_model_disable;
+
+#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */
+
+/**
+ * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support.
+ *
+ * site_begin/end model a potential concurrency site.
+ * site instances may be recursively nested with themselves.
+ * site_end exits the most recently started but unended site for the current
+ * thread.  The handle passed to end may be used to validate structure.
+ * Instances of a site encountered on different threads concurrently
+ * are considered completely distinct. If the site name for two different
+ * lexical sites match, it is unspecified whether they are treated as the
+ * same or different for data presentation.
+ */
+void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_site_beginW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_site_beginA(const char *name);
+void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen);
+void ITTAPI __itt_model_site_end  (__itt_model_site *site, __itt_model_site_instance *instance);
+void ITTAPI __itt_model_site_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_site_begin,  (__itt_model_site *site, __itt_model_site_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_site_beginW,  (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_site_beginA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_site_beginAL,  (const char *name, size_t siteNameLen))
+ITT_STUBV(ITTAPI, void, model_site_end,    (__itt_model_site *site, __itt_model_site_instance *instance))
+ITT_STUBV(ITTAPI, void, model_site_end_2,  (void))
+#define __itt_model_site_begin      ITTNOTIFY_VOID(model_site_begin)
+#define __itt_model_site_begin_ptr  ITTNOTIFY_NAME(model_site_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW      ITTNOTIFY_VOID(model_site_beginW)
+#define __itt_model_site_beginW_ptr  ITTNOTIFY_NAME(model_site_beginW)
+#endif
+#define __itt_model_site_beginA      ITTNOTIFY_VOID(model_site_beginA)
+#define __itt_model_site_beginA_ptr  ITTNOTIFY_NAME(model_site_beginA)
+#define __itt_model_site_beginAL      ITTNOTIFY_VOID(model_site_beginAL)
+#define __itt_model_site_beginAL_ptr  ITTNOTIFY_NAME(model_site_beginAL)
+#define __itt_model_site_end        ITTNOTIFY_VOID(model_site_end)
+#define __itt_model_site_end_ptr    ITTNOTIFY_NAME(model_site_end)
+#define __itt_model_site_end_2        ITTNOTIFY_VOID(model_site_end_2)
+#define __itt_model_site_end_2_ptr    ITTNOTIFY_NAME(model_site_end_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_site_begin(site, instance, name)
+#define __itt_model_site_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW(name)
+#define __itt_model_site_beginW_ptr  0
+#endif
+#define __itt_model_site_beginA(name)
+#define __itt_model_site_beginA_ptr  0
+#define __itt_model_site_beginAL(name, siteNameLen)
+#define __itt_model_site_beginAL_ptr  0
+#define __itt_model_site_end(site, instance)
+#define __itt_model_site_end_ptr    0
+#define __itt_model_site_end_2()
+#define __itt_model_site_end_2_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_site_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW_ptr  0
+#endif
+#define __itt_model_site_beginA_ptr  0
+#define __itt_model_site_beginAL_ptr  0
+#define __itt_model_site_end_ptr    0
+#define __itt_model_site_end_2_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support
+ *
+ * task_begin/end model a potential task, which is contained within the most
+ * closely enclosing dynamic site.  task_end exits the most recently started
+ * but unended task.  The handle passed to end may be used to validate
+ * structure.  It is unspecified if bad dynamic nesting is detected.  If it
+ * is, it should be encoded in the resulting data collection.  The collector
+ * should not fail due to construct nesting issues, nor attempt to directly
+ * indicate the problem.
+ */
+void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_task_beginW(const wchar_t *name);
+void ITTAPI __itt_model_iteration_taskW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_task_beginA(const char *name);
+void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_iteration_taskA(const char *name);
+void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_task_end  (__itt_model_task *task, __itt_model_task_instance *instance);
+void ITTAPI __itt_model_task_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_task_begin,  (__itt_model_task *task, __itt_model_task_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_task_beginW,  (const wchar_t *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_task_beginA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_task_beginAL,  (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_iteration_taskA,  (const char *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL,  (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_task_end,    (__itt_model_task *task, __itt_model_task_instance *instance))
+ITT_STUBV(ITTAPI, void, model_task_end_2,  (void))
+#define __itt_model_task_begin      ITTNOTIFY_VOID(model_task_begin)
+#define __itt_model_task_begin_ptr  ITTNOTIFY_NAME(model_task_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW     ITTNOTIFY_VOID(model_task_beginW)
+#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW)
+#define __itt_model_iteration_taskW     ITTNOTIFY_VOID(model_iteration_taskW)
+#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW)
+#endif
+#define __itt_model_task_beginA    ITTNOTIFY_VOID(model_task_beginA)
+#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA)
+#define __itt_model_task_beginAL    ITTNOTIFY_VOID(model_task_beginAL)
+#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL)
+#define __itt_model_iteration_taskA    ITTNOTIFY_VOID(model_iteration_taskA)
+#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA)
+#define __itt_model_iteration_taskAL    ITTNOTIFY_VOID(model_iteration_taskAL)
+#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL)
+#define __itt_model_task_end        ITTNOTIFY_VOID(model_task_end)
+#define __itt_model_task_end_ptr    ITTNOTIFY_NAME(model_task_end)
+#define __itt_model_task_end_2        ITTNOTIFY_VOID(model_task_end_2)
+#define __itt_model_task_end_2_ptr    ITTNOTIFY_NAME(model_task_end_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_task_begin(task, instance, name)
+#define __itt_model_task_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW(name)
+#define __itt_model_task_beginW_ptr  0
+#endif
+#define __itt_model_task_beginA(name)
+#define __itt_model_task_beginA_ptr  0
+#define __itt_model_task_beginAL(name, siteNameLen)
+#define __itt_model_task_beginAL_ptr  0
+#define __itt_model_iteration_taskA(name)
+#define __itt_model_iteration_taskA_ptr  0
+#define __itt_model_iteration_taskAL(name, siteNameLen)
+#define __itt_model_iteration_taskAL_ptr  0
+#define __itt_model_task_end(task, instance)
+#define __itt_model_task_end_ptr    0
+#define __itt_model_task_end_2()
+#define __itt_model_task_end_2_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_task_begin_ptr  0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA_ptr  0
+#define __itt_model_task_beginAL_ptr  0
+#define __itt_model_iteration_taskA_ptr    0
+#define __itt_model_iteration_taskAL_ptr    0
+#define __itt_model_task_end_ptr    0
+#define __itt_model_task_end_2_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support
+ *
+ * lock_acquire/release model a potential lock for both lockset and
+ * performance modeling.  Each unique address is modeled as a separate
+ * lock, with invalid addresses being valid lock IDs.  Specifically:
+ * no storage is accessed by the API at the specified address - it is only
+ * used for lock identification.  Lock acquires may be self-nested and are
+ * unlocked by a corresponding number of releases.
+ * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing,
+ * but may not have identical semantics.)
+ */
+void ITTAPI __itt_model_lock_acquire(void *lock);
+void ITTAPI __itt_model_lock_acquire_2(void *lock);
+void ITTAPI __itt_model_lock_release(void *lock);
+void ITTAPI __itt_model_lock_release_2(void *lock);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock))
+#define __itt_model_lock_acquire     ITTNOTIFY_VOID(model_lock_acquire)
+#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire)
+#define __itt_model_lock_acquire_2     ITTNOTIFY_VOID(model_lock_acquire_2)
+#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2)
+#define __itt_model_lock_release     ITTNOTIFY_VOID(model_lock_release)
+#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release)
+#define __itt_model_lock_release_2     ITTNOTIFY_VOID(model_lock_release_2)
+#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_lock_acquire(lock)
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2(lock)
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release(lock)
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2(lock)
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support
+ *
+ * record_allocation/deallocation describe user-defined memory allocator
+ * behavior, which may be required for correctness modeling to understand
+ * when storage is not expected to be actually reused across threads.
+ */
+void ITTAPI __itt_model_record_allocation  (void *addr, size_t size);
+void ITTAPI __itt_model_record_deallocation(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_record_allocation,   (void *addr, size_t size))
+ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr))
+#define __itt_model_record_allocation       ITTNOTIFY_VOID(model_record_allocation)
+#define __itt_model_record_allocation_ptr   ITTNOTIFY_NAME(model_record_allocation)
+#define __itt_model_record_deallocation     ITTNOTIFY_VOID(model_record_deallocation)
+#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_record_allocation(addr, size)
+#define __itt_model_record_allocation_ptr   0
+#define __itt_model_record_deallocation(addr)
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_record_allocation_ptr   0
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_INDUCTION_USES support
+ *
+ * Note particular storage is inductive through the end of the current site
+ */
+void ITTAPI __itt_model_induction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size))
+#define __itt_model_induction_uses     ITTNOTIFY_VOID(model_induction_uses)
+#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_induction_uses(addr, size)
+#define __itt_model_induction_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_induction_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_REDUCTION_USES support
+ *
+ * Note particular storage is used for reduction through the end
+ * of the current site
+ */
+void ITTAPI __itt_model_reduction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size))
+#define __itt_model_reduction_uses     ITTNOTIFY_VOID(model_reduction_uses)
+#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_reduction_uses(addr, size)
+#define __itt_model_reduction_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_reduction_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_OBSERVE_USES support
+ *
+ * Have correctness modeling record observations about uses of storage
+ * through the end of the current site
+ */
+void ITTAPI __itt_model_observe_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size))
+#define __itt_model_observe_uses     ITTNOTIFY_VOID(model_observe_uses)
+#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_observe_uses(addr, size)
+#define __itt_model_observe_uses_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_observe_uses_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_CLEAR_USES support
+ *
+ * Clear the special handling of a piece of storage related to induction,
+ * reduction or observe_uses
+ */
+void ITTAPI __itt_model_clear_uses(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr))
+#define __itt_model_clear_uses     ITTNOTIFY_VOID(model_clear_uses)
+#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_clear_uses(addr)
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support
+ *
+ * disable_push/disable_pop push and pop disabling based on a parameter.
+ * Disabling observations stops processing of memory references during
+ * correctness modeling, and all annotations that occur in the disabled
+ * region.  This allows description of code that is expected to be handled
+ * specially during conversion to parallelism or that is not recognized
+ * by tools (e.g. some kinds of synchronization operations.)
+ * This mechanism causes all annotations in the disabled region, other
+ * than disable_push and disable_pop, to be ignored.  (For example, this
+ * might validly be used to disable an entire parallel site and the contained
+ * tasks and locking in it for data collection purposes.)
+ * The disable for collection is a more expensive operation, but reduces
+ * collector overhead significantly.  This applies to BOTH correctness data
+ * collection and performance data collection.  For example, a site
+ * containing a task might only enable data collection for the first 10
+ * iterations.  Both performance and correctness data should reflect this,
+ * and the program should run as close to full speed as possible when
+ * collection is disabled.
+ */
+void ITTAPI __itt_model_disable_push(__itt_model_disable x);
+void ITTAPI __itt_model_disable_pop(void);
+void ITTAPI __itt_model_aggregate_task(size_t x);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x))
+ITT_STUBV(ITTAPI, void, model_disable_pop,  (void))
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x))
+#define __itt_model_disable_push     ITTNOTIFY_VOID(model_disable_push)
+#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push)
+#define __itt_model_disable_pop      ITTNOTIFY_VOID(model_disable_pop)
+#define __itt_model_disable_pop_ptr  ITTNOTIFY_NAME(model_disable_pop)
+#define __itt_model_aggregate_task      ITTNOTIFY_VOID(model_aggregate_task)
+#define __itt_model_aggregate_task_ptr  ITTNOTIFY_NAME(model_aggregate_task)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_disable_push(x)
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop()
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task(x)
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} model group */
+
+/**
+ * @defgroup heap Heap
+ * @ingroup public
+ * Heap group
+ * @{
+ */
+
+typedef void* __itt_heap_function;
+
+/**
+ * @brief Create an identification for heap function
+ * @return non-zero identifier or NULL
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_heap_function ITTAPI __itt_heap_function_createA(const char*    name, const char*    domain);
+__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_heap_function_create     __itt_heap_function_createW
+#  define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr
+#else
+#  define __itt_heap_function_create     __itt_heap_function_createA
+#  define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char*    name, const char*    domain))
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create,  (const char*    name, const char*    domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA     ITTNOTIFY_DATA(heap_function_createA)
+#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA)
+#define __itt_heap_function_createW     ITTNOTIFY_DATA(heap_function_createW)
+#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create      ITTNOTIFY_DATA(heap_function_create)
+#define __itt_heap_function_create_ptr  ITTNOTIFY_NAME(heap_function_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create(name, domain)  (__itt_heap_function)0
+#define __itt_heap_function_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation begin occurrence.
+ */
+void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized))
+#define __itt_heap_allocate_begin     ITTNOTIFY_VOID(heap_allocate_begin)
+#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_begin(h, size, initialized)
+#define __itt_heap_allocate_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation end occurrence.
+ */
+void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized))
+#define __itt_heap_allocate_end     ITTNOTIFY_VOID(heap_allocate_end)
+#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_end(h, addr, size, initialized)
+#define __itt_heap_allocate_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free begin occurrence.
+ */
+void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_begin     ITTNOTIFY_VOID(heap_free_begin)
+#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_begin(h, addr)
+#define __itt_heap_free_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free end occurrence.
+ */
+void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_end     ITTNOTIFY_VOID(heap_free_end)
+#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_end(h, addr)
+#define __itt_heap_free_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation begin occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_begin     ITTNOTIFY_VOID(heap_reallocate_begin)
+#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_begin(h, addr, new_size, initialized)
+#define __itt_heap_reallocate_begin_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_begin_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation end occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_end     ITTNOTIFY_VOID(heap_reallocate_end)
+#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized)
+#define __itt_heap_reallocate_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access begin */
+void ITTAPI __itt_heap_internal_access_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_begin,  (void))
+#define __itt_heap_internal_access_begin      ITTNOTIFY_VOID(heap_internal_access_begin)
+#define __itt_heap_internal_access_begin_ptr  ITTNOTIFY_NAME(heap_internal_access_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_begin()
+#define __itt_heap_internal_access_begin_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_begin_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access end */
+void ITTAPI __itt_heap_internal_access_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void))
+#define __itt_heap_internal_access_end     ITTNOTIFY_VOID(heap_internal_access_end)
+#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_end()
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth begin */
+void ITTAPI __itt_heap_record_memory_growth_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin,  (void))
+#define __itt_heap_record_memory_growth_begin      ITTNOTIFY_VOID(heap_record_memory_growth_begin)
+#define __itt_heap_record_memory_growth_begin_ptr  ITTNOTIFY_NAME(heap_record_memory_growth_begin)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_begin()
+#define __itt_heap_record_memory_growth_begin_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_begin_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth end */
+void ITTAPI __itt_heap_record_memory_growth_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void))
+#define __itt_heap_record_memory_growth_end     ITTNOTIFY_VOID(heap_record_memory_growth_end)
+#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_end()
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Specify the type of heap detection/reporting to modify.
+ */
+/**
+ * @hideinitializer
+ * @brief Report on memory leaks.
+ */
+#define __itt_heap_leaks 0x00000001
+
+/**
+ * @hideinitializer
+ * @brief Report on memory growth.
+ */
+#define __itt_heap_growth 0x00000002
+
+
+/** @brief heap reset detection */
+void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reset_detection,  (unsigned int reset_mask))
+#define __itt_heap_reset_detection      ITTNOTIFY_VOID(heap_reset_detection)
+#define __itt_heap_reset_detection_ptr  ITTNOTIFY_NAME(heap_reset_detection)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reset_detection()
+#define __itt_heap_reset_detection_ptr  0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reset_detection_ptr  0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief report */
+void ITTAPI __itt_heap_record(unsigned int record_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask))
+#define __itt_heap_record     ITTNOTIFY_VOID(heap_record)
+#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record()
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} heap group */
+/** @endcond */
+/* ========================================================================== */
+
+/**
+ * @defgroup domains Domains
+ * @ingroup public
+ * Domains group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_domain
+{
+    volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */
+    const char* nameA;  /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else  /* UNICODE || _UNICODE */
+    void* nameW;
+#endif /* UNICODE || _UNICODE */
+    int   extra1; /*!< Reserved to the runtime */
+    void* extra2; /*!< Reserved to the runtime */
+    struct ___itt_domain* next;
+} __itt_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup domains
+ * @brief Create a domain.
+ * Create domain using some domain name: the URI naming style is recommended.
+ * Because the set of domains is expected to be static over the application's
+ * execution time, there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of
+ * which thread created the domain. This call is thread-safe.
+ * @param[in] name name of domain
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_domain* ITTAPI __itt_domain_createA(const char    *name);
+__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_domain_create     __itt_domain_createW
+#  define __itt_domain_create_ptr __itt_domain_createW_ptr
+#else /* UNICODE */
+#  define __itt_domain_create     __itt_domain_createA
+#  define __itt_domain_create_ptr __itt_domain_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_domain* ITTAPI __itt_domain_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_domain*, domain_create,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA     ITTNOTIFY_DATA(domain_createA)
+#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA)
+#define __itt_domain_createW     ITTNOTIFY_DATA(domain_createW)
+#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create     ITTNOTIFY_DATA(domain_create)
+#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA(name) (__itt_domain*)0
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW(name) (__itt_domain*)0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create(name)  (__itt_domain*)0
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} domains group */
+
+/**
+ * @defgroup ids IDs
+ * @ingroup public
+ * IDs group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_id
+{
+    unsigned long long d1, d2, d3;
+} __itt_id;
+
+#pragma pack(pop)
+/** @endcond */
+
+const __itt_id __itt_null = { 0, 0, 0 };
+
+/**
+ * @ingroup ids
+ * @brief A convenience function is provided to create an ID without domain control.
+ * @brief This is a convenience function to initialize an __itt_id structure. This function
+ * does not affect the collector runtime in any way. After you make the ID with this
+ * function, you still must create it with the __itt_id_create function before using the ID
+ * to identify a named entity.
+ * @param[in] addr The address of object; high QWORD of the ID value.
+ * @param[in] extra The extra data to unique identify object; low QWORD of the ID value.
+ */
+
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
+{
+    __itt_id id = __itt_null;
+    id.d1 = (unsigned long long)((uintptr_t)addr);
+    id.d2 = (unsigned long long)extra;
+    id.d3 = (unsigned long long)0; /* Reserved. Must be zero */
+    return id;
+}
+
+/**
+ * @ingroup ids
+ * @brief Create an instance of identifier.
+ * This establishes the beginning of the lifetime of an instance of
+ * the given ID in the trace. Once this lifetime starts, the ID
+ * can be used to tag named entity instances in calls such as
+ * __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * Instance IDs are not domain specific!
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x)
+#define __itt_id_create_ptr  ITTNOTIFY_NAME(id_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create(domain,id)
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup ids
+ * @brief Destroy an instance of identifier.
+ * This ends the lifetime of the current instance of the given ID value in the trace.
+ * Any relationships that are established after this lifetime ends are invalid.
+ * This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x)
+#define __itt_id_destroy_ptr  ITTNOTIFY_NAME(id_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_destroy(domain,id)
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} ids group */
+
+/**
+ * @defgroup handless String Handles
+ * @ingroup public
+ * String Handles group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_string_handle
+{
+    const char* strA; /*!< Copy of original string in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+    const wchar_t* strW; /*!< Copy of original string in UNICODE. */
+#else  /* UNICODE || _UNICODE */
+    void* strW;
+#endif /* UNICODE || _UNICODE */
+    int   extra1; /*!< Reserved. Must be zero   */
+    void* extra2; /*!< Reserved. Must be zero   */
+    struct ___itt_string_handle* next;
+} __itt_string_handle;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup handles
+ * @brief Create a string handle.
+ * Create and return handle value that can be associated with a string.
+ * Consecutive calls to __itt_string_handle_create with the same name
+ * return the same value. Because the set of string handles is expected to remain
+ * static during the application's execution time, there is no mechanism to destroy a string handle.
+ * Any string handle can be accessed by any thread in the process, regardless of which thread created
+ * the string handle. This call is thread-safe.
+ * @param[in] name The input string
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_string_handle* ITTAPI __itt_string_handle_createA(const char    *name);
+__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_string_handle_create     __itt_string_handle_createW
+#  define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr
+#else /* UNICODE */
+#  define __itt_string_handle_create     __itt_string_handle_createA
+#  define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create,  (const char    *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA     ITTNOTIFY_DATA(string_handle_createA)
+#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA)
+#define __itt_string_handle_createW     ITTNOTIFY_DATA(string_handle_createW)
+#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create     ITTNOTIFY_DATA(string_handle_create)
+#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA(name) (__itt_string_handle*)0
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW(name) (__itt_string_handle*)0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create(name)  (__itt_string_handle*)0
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} handles group */
+
+/** @cond exclude_from_documentation */
+typedef unsigned long long __itt_timestamp;
+/** @endcond */
+
+#define __itt_timestamp_none ((__itt_timestamp)-1LL)
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @ingroup timestamps
+ * @brief Return timestamp corresponding to the current moment.
+ * This returns the timestamp in the format that is the most relevant for the current
+ * host or platform (RDTSC, QPC, and others). You can use the "<" operator to
+ * compare __itt_timestamp values.
+ */
+__itt_timestamp ITTAPI __itt_get_timestamp(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void))
+#define __itt_get_timestamp      ITTNOTIFY_DATA(get_timestamp)
+#define __itt_get_timestamp_ptr  ITTNOTIFY_NAME(get_timestamp)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_get_timestamp()
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} timestamps */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @defgroup regions Regions
+ * @ingroup public
+ * Regions group
+ * @{
+ */
+/**
+ * @ingroup regions
+ * @brief Begin of region instance.
+ * Successive calls to __itt_region_begin with the same ID are ignored
+ * until a call to __itt_region_end with the same ID
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance. Must not be __itt_null
+ * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null
+ * @param[in] name The name of this region
+ */
+void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup regions
+ * @brief End of region instance.
+ * The first call to __itt_region_end with a given ID ends the
+ * region. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_region_begin call.
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance
+ */
+void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, region_end,   (const __itt_domain *domain, __itt_id id))
+#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z)
+#define __itt_region_begin_ptr      ITTNOTIFY_NAME(region_begin)
+#define __itt_region_end(d,x)       ITTNOTIFY_VOID_D1(region_end,d,x)
+#define __itt_region_end_ptr        ITTNOTIFY_NAME(region_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_region_begin(d,x,y,z)
+#define __itt_region_begin_ptr 0
+#define __itt_region_end(d,x)
+#define __itt_region_end_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_region_begin_ptr 0
+#define __itt_region_end_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} regions group */
+
+/**
+ * @defgroup frames Frames
+ * @ingroup public
+ * Frames are similar to regions, but are intended to be easier to use and to implement.
+ * In particular:
+ * - Frames always represent periods of elapsed time
+ * - By default, frames have no nesting relationships
+ * @{
+ */
+
+/**
+ * @ingroup frames
+ * @brief Begin a frame instance.
+ * Successive calls to __itt_frame_begin with the
+ * same ID are ignored until a call to __itt_frame_end with the same ID.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ */
+void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief End a frame instance.
+ * The first call to __itt_frame_end with a given ID
+ * ends the frame. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_frame_begin call.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL for current
+ */
+void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief Submits a frame instance.
+ * Successive calls to __itt_frame_begin or __itt_frame_submit with the
+ * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit
+ * with the same ID.
+ * Passing special __itt_timestamp_none value as "end" argument means
+ * take the current timestamp as the end timestamp.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ * @param[in] begin Timestamp of the beginning of the frame
+ * @param[in] end Timestamp of the end of the frame
+ */
+void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id,
+    __itt_timestamp begin, __itt_timestamp end);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, frame_begin_v3,  (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_end_v3,    (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end))
+#define __itt_frame_begin_v3(d,x)      ITTNOTIFY_VOID_D1(frame_begin_v3,d,x)
+#define __itt_frame_begin_v3_ptr       ITTNOTIFY_NAME(frame_begin_v3)
+#define __itt_frame_end_v3(d,x)        ITTNOTIFY_VOID_D1(frame_end_v3,d,x)
+#define __itt_frame_end_v3_ptr         ITTNOTIFY_NAME(frame_end_v3)
+#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e)
+#define __itt_frame_submit_v3_ptr      ITTNOTIFY_NAME(frame_submit_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_frame_begin_v3(domain,id)
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3(domain,id)
+#define __itt_frame_end_v3_ptr   0
+#define __itt_frame_submit_v3(domain,id,begin,end)
+#define __itt_frame_submit_v3_ptr   0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3_ptr   0
+#define __itt_frame_submit_v3_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} frames group */
+/** @endcond */
+
+/**
+ * @defgroup taskgroup Task Group
+ * @ingroup public
+ * Task Group
+ * @{
+ */
+/**
+ * @ingroup task_groups
+ * @brief Denotes a task_group instance.
+ * Successive calls to __itt_task_group with the same ID are ignored.
+ * @param[in] domain The domain for this task_group instance
+ * @param[in] id The instance ID for this task_group instance. Must not be __itt_null.
+ * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null.
+ * @param[in] name The name of this task_group
+ */
+void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z)
+#define __itt_task_group_ptr      ITTNOTIFY_NAME(task_group)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_group(d,x,y,z)
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} taskgroup group */
+
+/**
+ * @defgroup tasks Tasks
+ * @ingroup public
+ * A task instance represents a piece of work performed by a particular
+ * thread for a period of time. A call to __itt_task_begin creates a
+ * task instance. This becomes the current instance for that task on that
+ * thread. A following call to __itt_task_end on the same thread ends the
+ * instance. There may be multiple simultaneous instances of tasks with the
+ * same name on different threads. If an ID is specified, the task instance
+ * receives that ID. Nested tasks are allowed.
+ *
+ * Note: The task is defined by the bracketing of __itt_task_begin and
+ * __itt_task_end on the same thread. If some scheduling mechanism causes
+ * task switching (the thread executes a different user task) or task
+ * switching (the user task switches to a different thread) then this breaks
+ * the notion of  current instance. Additional API calls are required to
+ * deal with that possibility.
+ * @{
+ */
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The identifier for this task instance (may be 0)
+ * @param[in] parentid The parent of this task (may be 0)
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup tasks
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ */
+void ITTAPI __itt_task_end(const __itt_domain *domain);
+
+/**
+ * @ingroup tasks
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup tasks
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin,    (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end,      (const __itt_domain *domain))
+ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped,   (const __itt_domain *domain, __itt_id taskid))
+#define __itt_task_begin(d,x,y,z)    ITTNOTIFY_VOID_D3(task_begin,d,x,y,z)
+#define __itt_task_begin_ptr         ITTNOTIFY_NAME(task_begin)
+#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z)
+#define __itt_task_begin_fn_ptr      ITTNOTIFY_NAME(task_begin_fn)
+#define __itt_task_end(d)            ITTNOTIFY_VOID_D0(task_end,d)
+#define __itt_task_end_ptr           ITTNOTIFY_NAME(task_end)
+#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z)
+#define __itt_task_begin_overlapped_ptr      ITTNOTIFY_NAME(task_begin_overlapped)
+#define __itt_task_end_overlapped(d,x)       ITTNOTIFY_VOID_D1(task_end_overlapped,d,x)
+#define __itt_task_end_overlapped_ptr        ITTNOTIFY_NAME(task_end_overlapped)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin(domain,id,parentid,name)
+#define __itt_task_begin_ptr    0
+#define __itt_task_begin_fn(domain,id,parentid,fn)
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end(domain)
+#define __itt_task_end_ptr      0
+#define __itt_task_begin_overlapped(domain,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ptr         0
+#define __itt_task_end_overlapped(domain,taskid)
+#define __itt_task_end_overlapped_ptr           0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ptr    0
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end_ptr      0
+#define __itt_task_begin_overlapped_ptr 0
+#define __itt_task_end_overlapped_ptr   0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} tasks group */
+
+
+/**
+ * @defgroup markers Markers
+ * Markers represent a single discreet event in time. Markers have a scope,
+ * described by an enumerated type __itt_scope. Markers are created by
+ * the API call __itt_marker. A marker instance can be given an ID for use in
+ * adding metadata.
+ * @{
+ */
+
+/**
+ * @brief Describes the scope of an event object in the trace.
+ */
+typedef enum
+{
+    __itt_scope_unknown = 0,
+    __itt_scope_global,
+    __itt_scope_track_group,
+    __itt_scope_track,
+    __itt_scope_task,
+    __itt_scope_marker
+} __itt_scope;
+
+/** @cond exclude_from_documentation */
+#define __itt_marker_scope_unknown  __itt_scope_unknown
+#define __itt_marker_scope_global   __itt_scope_global
+#define __itt_marker_scope_process  __itt_scope_track_group
+#define __itt_marker_scope_thread   __itt_scope_track
+#define __itt_marker_scope_task     __itt_scope_task
+/** @endcond */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance
+ * @param[in] domain The domain for this marker
+ * @param[in] id The instance ID for this marker or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z)
+#define __itt_marker_ptr      ITTNOTIFY_NAME(marker)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker(domain,id,name,scope)
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} markers group */
+
+/**
+ * @defgroup metadata Metadata
+ * The metadata API is used to attach extra information to named
+ * entities. Metadata can be attached to an identified named entity by ID,
+ * or to the current entity (which is always a task).
+ *
+ * Conceptually metadata has a type (what kind of metadata), a key (the
+ * name of the metadata), and a value (the actual data). The encoding of
+ * the value depends on the type of the metadata.
+ *
+ * The type of metadata is specified by an enumerated type __itt_metdata_type.
+ * @{
+ */
+
+/**
+ * @ingroup parameters
+ * @brief describes the type of metadata
+ */
+typedef enum {
+    __itt_metadata_unknown = 0,
+    __itt_metadata_u64,     /**< Unsigned 64-bit integer */
+    __itt_metadata_s64,     /**< Signed 64-bit integer */
+    __itt_metadata_u32,     /**< Unsigned 32-bit integer */
+    __itt_metadata_s32,     /**< Signed 32-bit integer */
+    __itt_metadata_u16,     /**< Unsigned 16-bit integer */
+    __itt_metadata_s16,     /**< Signed 16-bit integer */
+    __itt_metadata_float,   /**< Signed 32-bit floating-point */
+    __itt_metadata_double   /**< SIgned 64-bit floating-point */
+} __itt_metadata_type;
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b)
+#define __itt_metadata_add_ptr          ITTNOTIFY_NAME(metadata_add)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add(d,x,y,z,a,b)
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_metadata_str_add     __itt_metadata_str_addW
+#  define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr
+#else /* UNICODE */
+#  define __itt_metadata_str_add     __itt_metadata_str_addA
+#  define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr        ITTNOTIFY_NAME(metadata_str_addA)
+#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr        ITTNOTIFY_NAME(metadata_str_addW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)  ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a)
+#define __itt_metadata_str_add_ptr         ITTNOTIFY_NAME(metadata_str_add)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW(d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr          ITTNOTIFY_NAME(metadata_add_with_scope)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_metadata_str_add_with_scope     __itt_metadata_str_add_with_scopeW
+#  define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr
+#else /* UNICODE */
+#  define __itt_metadata_str_add_with_scope     __itt_metadata_str_add_with_scopeA
+#  define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr        ITTNOTIFY_NAME(metadata_str_add_with_scopeA)
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr        ITTNOTIFY_NAME(metadata_str_add_with_scopeW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)  ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr         ITTNOTIFY_NAME(metadata_str_add_with_scope)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr  0
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr  0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr   0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA_ptr  0
+#define __itt_metadata_str_add_with_scopeW_ptr  0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope_ptr   0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} metadata group */
+
+/**
+ * @defgroup relations Relations
+ * Instances of named entities can be explicitly associated with other
+ * instances using instance IDs and the relationship API calls.
+ *
+ * @{
+ */
+
+/**
+ * @ingroup relations
+ * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation.
+ * Relations between instances can be added with an API call. The relation
+ * API uses instance IDs. Relations can be added before or after the actual
+ * instances are created and persist independently of the instances. This
+ * is the motivation for having different lifetimes for instance IDs and
+ * the actual instances.
+ */
+typedef enum
+{
+    __itt_relation_is_unknown = 0,
+    __itt_relation_is_dependent_on,         /**< "A is dependent on B" means that A cannot start until B completes */
+    __itt_relation_is_sibling_of,           /**< "A is sibling of B" means that A and B were created as a group */
+    __itt_relation_is_parent_of,            /**< "A is parent of B" means that A created B */
+    __itt_relation_is_continuation_of,      /**< "A is continuation of B" means that A assumes the dependencies of B */
+    __itt_relation_is_child_of,             /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
+    __itt_relation_is_continued_by,         /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
+    __itt_relation_is_predecessor_to        /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
+} __itt_relation;
+
+/**
+ * @ingroup relations
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup relations
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add,            (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y)
+#define __itt_relation_add_to_current_ptr    ITTNOTIFY_NAME(relation_add_to_current)
+#define __itt_relation_add(d,x,y,z)          ITTNOTIFY_VOID_D3(relation_add,d,x,y,z)
+#define __itt_relation_add_ptr               ITTNOTIFY_NAME(relation_add)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current(d,x,y)
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add(d,x,y,z)
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} relations group */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_info
+{
+    unsigned long long clock_freq; /*!< Clock domain frequency */
+    unsigned long long clock_base; /*!< Clock domain base timestamp */
+} __itt_clock_info;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_domain
+{
+    __itt_clock_info info;      /*!< Most recent clock domain info */
+    __itt_get_clock_info_fn fn; /*!< Callback function pointer */
+    void* fn_data;              /*!< Input argument for the callback function */
+    int   extra1;               /*!< Reserved. Must be zero */
+    void* extra2;               /*!< Reserved. Must be zero */
+    struct ___itt_clock_domain* next;
+} __itt_clock_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Create a clock domain.
+ * Certain applications require the capability to trace their application using
+ * a clock domain different than the CPU, for instance the instrumentation of events
+ * that occur on a GPU.
+ * Because the set of domains is expected to be static over the application's execution time,
+ * there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of which thread created
+ * the domain. This call is thread-safe.
+ * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps
+ * @param[in] fn_data Argument for a callback function; may be NULL
+ */
+__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data))
+#define __itt_clock_domain_create     ITTNOTIFY_DATA(clock_domain_create)
+#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Recalculate clock domains frequences and clock base timestamps.
+ */
+void ITTAPI __itt_clock_domain_reset(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, clock_domain_reset, (void))
+#define __itt_clock_domain_reset     ITTNOTIFY_VOID(clock_domain_reset)
+#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_reset()
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Create an instance of identifier. This establishes the beginning of the lifetime of
+ * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to
+ * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/**
+ * @ingroup clockdomain
+ * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the
+ * given ID value in the trace. Any relationships that are established after this lifetime ends are
+ * invalid. This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create_ex,  (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+#define __itt_id_create_ex(d,x,y,z)  ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z)
+#define __itt_id_create_ex_ptr       ITTNOTIFY_NAME(id_create_ex)
+#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z)
+#define __itt_id_destroy_ex_ptr      ITTNOTIFY_NAME(id_destroy_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_create_ex_ptr    0
+#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ex_ptr    0
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, or __itt_null
+ * @param[in] parentid The parent of this task, or __itt_null
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup clockdomain
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ */
+void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_ex,        (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn_ex,     (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end_ex,          (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp))
+#define __itt_task_begin_ex(d,x,y,z,a,b)      ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b)
+#define __itt_task_begin_ex_ptr               ITTNOTIFY_NAME(task_begin_ex)
+#define __itt_task_begin_fn_ex(d,x,y,z,a,b)   ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b)
+#define __itt_task_begin_fn_ex_ptr            ITTNOTIFY_NAME(task_begin_fn_ex)
+#define __itt_task_end_ex(d,x,y)              ITTNOTIFY_VOID_D2(task_end_ex,d,x,y)
+#define __itt_task_end_ex_ptr                 ITTNOTIFY_NAME(task_end_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name)
+#define __itt_task_begin_ex_ptr          0
+#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn)
+#define __itt_task_begin_fn_ex_ptr       0
+#define __itt_task_end_ex(domain,clock_domain,timestamp)
+#define __itt_task_end_ex_ptr            0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ex_ptr          0
+#define __itt_task_begin_fn_ex_ptr       0
+#define __itt_task_end_ex_ptr            0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup counters Counters
+ * @ingroup public
+ * Counters are user-defined objects with a monotonically increasing
+ * value. Counter values are 64-bit unsigned integers.
+ * Counters have names that can be displayed in
+ * the tools.
+ * @{
+ */
+
+/**
+ * @brief opaque structure for counter identification
+ */
+/** @cond exclude_from_documentation */
+
+typedef struct ___itt_counter* __itt_counter;
+
+/**
+ * @brief Create an unsigned 64 bits integer counter with given name/domain
+ *
+ * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer
+ *
+ * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_createA(const char    *name, const char    *domain);
+__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_counter_create     __itt_counter_createW
+#  define __itt_counter_create_ptr __itt_counter_createW_ptr
+#else /* UNICODE */
+#  define __itt_counter_create     __itt_counter_createA
+#  define __itt_counter_create_ptr __itt_counter_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char    *name, const char    *domain))
+ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create,  (const char *name, const char *domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA     ITTNOTIFY_DATA(counter_createA)
+#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA)
+#define __itt_counter_createW     ITTNOTIFY_DATA(counter_createW)
+#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create     ITTNOTIFY_DATA(counter_create)
+#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA(name, domain)
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW(name, domain)
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create(name, domain)
+#define __itt_counter_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Increment the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id))
+#define __itt_counter_inc     ITTNOTIFY_VOID(counter_inc)
+#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc(id)
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Increment the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_inc_delta     ITTNOTIFY_VOID(counter_inc_delta)
+#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_delta(id, value)
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id))
+#define __itt_counter_dec     ITTNOTIFY_VOID(counter_dec)
+#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec(id)
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_dec_delta     ITTNOTIFY_VOID(counter_dec_delta)
+#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_delta(id, value)
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls increment the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to increment the counter
+ */
+void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_v3,       (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_inc_v3(d,x)         ITTNOTIFY_VOID_D1(counter_inc_v3,d,x)
+#define __itt_counter_inc_v3_ptr          ITTNOTIFY_NAME(counter_inc_v3)
+#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y)
+#define __itt_counter_inc_delta_v3_ptr    ITTNOTIFY_NAME(counter_inc_delta_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_v3(domain,name)
+#define __itt_counter_inc_v3_ptr       0
+#define __itt_counter_inc_delta_v3(domain,name,delta)
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_v3_ptr       0
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls decrement the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ *            The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to decrement the counter
+ */
+void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_v3,       (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_dec_v3(d,x)         ITTNOTIFY_VOID_D1(counter_dec_v3,d,x)
+#define __itt_counter_dec_v3_ptr          ITTNOTIFY_NAME(counter_dec_v3)
+#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y)
+#define __itt_counter_dec_delta_v3_ptr    ITTNOTIFY_NAME(counter_dec_delta_v3)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_v3(domain,name)
+#define __itt_counter_dec_v3_ptr       0
+#define __itt_counter_dec_delta_v3(domain,name,delta)
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_v3_ptr       0
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} counters group */
+
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr))
+#define __itt_counter_set_value     ITTNOTIFY_VOID(counter_set_value)
+#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value(id, value_ptr)
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr))
+#define __itt_counter_set_value_ex     ITTNOTIFY_VOID(counter_set_value_ex)
+#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create a typed counter with given name/domain
+ *
+ * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_create_typedA(const char    *name, const char    *domain, __itt_metadata_type type);
+__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_counter_create_typed     __itt_counter_create_typedW
+#  define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr
+#else /* UNICODE */
+#  define __itt_counter_create_typed     __itt_counter_create_typedA
+#  define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char    *name, const char    *domain, __itt_metadata_type type))
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typed,  (const char *name, const char *domain, __itt_metadata_type type))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA     ITTNOTIFY_DATA(counter_create_typedA)
+#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA)
+#define __itt_counter_create_typedW     ITTNOTIFY_DATA(counter_create_typedW)
+#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed     ITTNOTIFY_DATA(counter_create_typed)
+#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA(name, domain, type)
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW(name, domain, type)
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed(name, domain, type)
+#define __itt_counter_create_typed_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or
+ * __itt_counter_create_typed()
+ */
+void ITTAPI __itt_counter_destroy(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id))
+#define __itt_counter_destroy     ITTNOTIFY_VOID(counter_destroy)
+#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_destroy(id)
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} counters group */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance.
+ * @param[in] domain The domain for this marker
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The instance ID for this marker, or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker_ex,    (const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker_ex(d,x,y,z,a,b)    ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b)
+#define __itt_marker_ex_ptr             ITTNOTIFY_NAME(marker_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope)
+#define __itt_marker_ex_ptr    0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ex_ptr    0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_ex(const __itt_domain *domain,  __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add_ex,            (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a)
+#define __itt_relation_add_to_current_ex_ptr        ITTNOTIFY_NAME(relation_add_to_current_ex)
+#define __itt_relation_add_ex(d,x,y,z,a,b)          ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b)
+#define __itt_relation_add_ex_ptr                   ITTNOTIFY_NAME(relation_add_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail)
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail)
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum ___itt_track_group_type
+{
+    __itt_track_group_type_normal = 0
+} __itt_track_group_type;
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track_group
+{
+    __itt_string_handle* name;     /*!< Name of the track group */
+    struct ___itt_track* track;    /*!< List of child tracks    */
+    __itt_track_group_type tgtype; /*!< Type of the track group */
+    int   extra1;                  /*!< Reserved. Must be zero  */
+    void* extra2;                  /*!< Reserved. Must be zero  */
+    struct ___itt_track_group* next;
+} __itt_track_group;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Placeholder for custom track types. Currently, "normal" custom track
+ * is the only available track type.
+ */
+typedef enum ___itt_track_type
+{
+    __itt_track_type_normal = 0
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+    , __itt_track_type_queue
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+} __itt_track_type;
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track
+{
+    __itt_string_handle* name; /*!< Name of the track group */
+    __itt_track_group* group;  /*!< Parent group to a track */
+    __itt_track_type ttype;    /*!< Type of the track       */
+    int   extra1;              /*!< Reserved. Must be zero  */
+    void* extra2;              /*!< Reserved. Must be zero  */
+    struct ___itt_track* next;
+} __itt_track;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Create logical track group.
+ */
+__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type))
+#define __itt_track_group_create     ITTNOTIFY_DATA(track_group_create)
+#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_group_create(name)  (__itt_track_group*)0
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create logical track.
+ */
+__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type))
+#define __itt_track_create     ITTNOTIFY_DATA(track_create)
+#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_create(track_group,name,track_type)  (__itt_track*)0
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the logical track.
+ */
+void ITTAPI __itt_set_track(__itt_track* track);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track))
+#define __itt_set_track     ITTNOTIFY_VOID(set_track)
+#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_set_track(track)
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/* ========================================================================== */
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup events Events
+ * @ingroup public
+ * Events group
+ * @{
+ */
+/** @brief user event type */
+typedef int __itt_event;
+
+/**
+ * @brief Create an event notification
+ * @note name or namelen being null/name and namelen not matching, user event feature not enabled
+ * @return non-zero event identifier upon success and __itt_err otherwise
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_event LIBITTAPI __itt_event_createA(const char    *name, int namelen);
+__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_event_create     __itt_event_createW
+#  define __itt_event_create_ptr __itt_event_createW_ptr
+#else
+#  define __itt_event_create     __itt_event_createA
+#  define __itt_event_create_ptr __itt_event_createA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char    *name, int namelen))
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create,  (const char    *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA     ITTNOTIFY_DATA(event_createA)
+#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA)
+#define __itt_event_createW     ITTNOTIFY_DATA(event_createW)
+#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create      ITTNOTIFY_DATA(event_create)
+#define __itt_event_create_ptr  ITTNOTIFY_NAME(event_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA(name, namelen) (__itt_event)0
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW(name, namelen) (__itt_event)0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create(name, namelen)  (__itt_event)0
+#define __itt_event_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event occurrence.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_start(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event))
+#define __itt_event_start     ITTNOTIFY_DATA(event_start)
+#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_start(event) (int)0
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event end occurrence.
+ * @note It is optional if events do not have durations.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_end(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event))
+#define __itt_event_end     ITTNOTIFY_DATA(event_end)
+#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_end(event) (int)0
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} events group */
+
+
+/**
+ * @defgroup arrays Arrays Visualizer
+ * @ingroup public
+ * Visualize arrays
+ * @{
+ */
+
+/**
+ * @enum __itt_av_data_type
+ * @brief Defines types of arrays data (for C/C++ intrinsic types)
+ */
+typedef enum
+{
+    __itt_e_first = 0,
+    __itt_e_char = 0,  /* 1-byte integer */
+    __itt_e_uchar,     /* 1-byte unsigned integer */
+    __itt_e_int16,     /* 2-byte integer */
+    __itt_e_uint16,    /* 2-byte unsigned integer  */
+    __itt_e_int32,     /* 4-byte integer */
+    __itt_e_uint32,    /* 4-byte unsigned integer */
+    __itt_e_int64,     /* 8-byte integer */
+    __itt_e_uint64,    /* 8-byte unsigned integer */
+    __itt_e_float,     /* 4-byte floating */
+    __itt_e_double,    /* 8-byte floating */
+    __itt_e_last = __itt_e_double
+} __itt_av_data_type;
+
+/**
+ * @brief Save an array data to a file.
+ * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only).
+ * @param[in] data - pointer to the array data
+ * @param[in] rank - the rank of the array
+ * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions.
+ * The size of dimensions must be equal to the rank
+ * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types)
+ * @param[in] filePath - the file path; the output format is defined by the file extension
+ * @param[in] columnOrder - defines how the array is stored in the linear memory.
+ * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C).
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_av_save     __itt_av_saveW
+#  define __itt_av_save_ptr __itt_av_saveW_ptr
+#else /* UNICODE */
+#  define __itt_av_save     __itt_av_saveA
+#  define __itt_av_save_ptr __itt_av_saveA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save,  (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA     ITTNOTIFY_DATA(av_saveA)
+#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA)
+#define __itt_av_saveW     ITTNOTIFY_DATA(av_saveW)
+#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save     ITTNOTIFY_DATA(av_save)
+#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA(name)
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW(name)
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save(name)
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+void ITTAPI __itt_enable_attach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, enable_attach, (void))
+#define __itt_enable_attach     ITTNOTIFY_VOID(enable_attach)
+#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_enable_attach()
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/** @} arrays group */
+
+/** @endcond */
+
+/**
+ * @brief Module load info
+ * This API is used to report necessary information in case of module relocation
+ * @param[in] start_addr - relocated module start address
+ * @param[in] end_addr - relocated module end address
+ * @param[in] path - file system path to the module
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path);
+void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_module_load     __itt_module_loadW
+#  define __itt_module_load_ptr __itt_module_loadW_ptr
+#else /* UNICODE */
+#  define __itt_module_load     __itt_module_loadA
+#  define __itt_module_load_ptr __itt_module_loadA_ptr
+#endif /* UNICODE */
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path))
+ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, void, module_load,  (void *start_addr, void *end_addr, const char *path))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA     ITTNOTIFY_VOID(module_loadA)
+#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA)
+#define __itt_module_loadW     ITTNOTIFY_VOID(module_loadW)
+#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load     ITTNOTIFY_VOID(module_load)
+#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA(start_addr, end_addr, path)
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW(start_addr, end_addr, path)
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load(start_addr, end_addr, path)
+#define __itt_module_load_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_H_ */
+
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+
+#ifndef _ITTNOTIFY_PRIVATE_
+#define _ITTNOTIFY_PRIVATE_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex,       (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped_ex,         (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid))
+#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b)     ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b)
+#define __itt_task_begin_overlapped_ex_ptr              ITTNOTIFY_NAME(task_begin_overlapped_ex)
+#define __itt_task_end_overlapped_ex(d,x,y,z)           ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z)
+#define __itt_task_end_overlapped_ex_ptr                ITTNOTIFY_NAME(task_end_overlapped_ex)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ex_ptr      0
+#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid)
+#define __itt_task_end_overlapped_ex_ptr        0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_overlapped_ex_ptr      0
+#define __itt_task_end_overlapped_ptr           0
+#define __itt_task_end_overlapped_ex_ptr        0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup makrs_internal Marks
+ * @ingroup internal
+ * Marks group
+ * @warning Internal API:
+ *   - It is not shipped to outside of Intel
+ *   - It is delivered to internal Intel teams using e-mail or SVN access only
+ * @{
+ */
+/** @brief user mark type */
+typedef int __itt_mark_type;
+
+/**
+ * @brief Creates a user mark type with the specified name using char or Unicode string.
+ * @param[in] name - name of mark to create
+ * @return Returns a handle to the mark type
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_mark_type ITTAPI __itt_mark_createA(const char    *name);
+__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark_create     __itt_mark_createW
+#  define __itt_mark_create_ptr __itt_mark_createW_ptr
+#else /* UNICODE */
+#  define __itt_mark_create     __itt_mark_createA
+#  define __itt_mark_create_ptr __itt_mark_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_mark_type ITTAPI __itt_mark_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char    *name))
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_mark_type, mark_create,  (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA     ITTNOTIFY_DATA(mark_createA)
+#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA)
+#define __itt_mark_createW     ITTNOTIFY_DATA(mark_createW)
+#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create      ITTNOTIFY_DATA(mark_create)
+#define __itt_mark_create_ptr  ITTNOTIFY_NAME(mark_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA(name) (__itt_mark_type)0
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW(name) (__itt_mark_type)0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create(name)  (__itt_mark_type)0
+#define __itt_mark_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string.
+ *
+ * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign.
+ * - The call is "synchronous" - function returns after mark is actually added to results.
+ * - This function is useful, for example, to mark different phases of application
+ *   (beginning of the next mark automatically meand end of current region).
+ * - Can be used together with "continuous" marks (see below) at the same collection session
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @param[in] parameter - string parameter of mark
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_markA(__itt_mark_type mt, const char    *parameter);
+int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark     __itt_markW
+#  define __itt_mark_ptr __itt_markW_ptr
+#else /* UNICODE  */
+#  define __itt_mark     __itt_markA
+#  define __itt_mark_ptr __itt_markA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char    *parameter))
+ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark,  (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA     ITTNOTIFY_DATA(markA)
+#define __itt_markA_ptr ITTNOTIFY_NAME(markA)
+#define __itt_markW     ITTNOTIFY_DATA(markW)
+#define __itt_markW_ptr ITTNOTIFY_NAME(markW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark      ITTNOTIFY_DATA(mark)
+#define __itt_mark_ptr  ITTNOTIFY_NAME(mark)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA(mt, parameter) (int)0
+#define __itt_markA_ptr 0
+#define __itt_markW(mt, parameter) (int)0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark(mt, parameter)  (int)0
+#define __itt_mark_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA_ptr 0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create a "discrete" user event type (mark) for process
+ * rather then for one thread
+ * @see int __itt_mark(__itt_mark_type mt, const char* parameter);
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char    *parameter);
+int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+#  define __itt_mark_global     __itt_mark_globalW
+#  define __itt_mark_global_ptr __itt_mark_globalW_ptr
+#else /* UNICODE  */
+#  define __itt_mark_global     __itt_mark_globalA
+#  define __itt_mark_global_ptr __itt_mark_globalA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char    *parameter))
+ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter))
+#else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_global,  (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA     ITTNOTIFY_DATA(mark_globalA)
+#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA)
+#define __itt_mark_globalW     ITTNOTIFY_DATA(mark_globalW)
+#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global      ITTNOTIFY_DATA(mark_global)
+#define __itt_mark_global_ptr  ITTNOTIFY_NAME(mark_global)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA(mt, parameter) (int)0
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW(mt, parameter) (int)0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global(mt, parameter)  (int)0
+#define __itt_mark_global_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global_ptr  0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates an "end" point for "continuous" mark with specified name.
+ *
+ * - Returns zero value in case of success, non-zero value otherwise.
+ *   Also returns non-zero value when preceding "begin" point for the
+ *   mark with the same name failed to be created or not created.
+ * - The mark of "continuous" type is placed to collection results in
+ *   case of success. It appears in overtime view(s) as a special tick
+ *   sign (different from "discrete" mark) together with line from
+ *   corresponding "begin" mark to "end" mark.
+ * @note Continuous marks can overlap and be nested inside each other.
+ * Discrete mark can be nested inside marked region
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+int ITTAPI __itt_mark_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt))
+#define __itt_mark_off     ITTNOTIFY_DATA(mark_off)
+#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_off(mt) (int)0
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create an "end" point for mark of process
+ * @see int __itt_mark_off(__itt_mark_type mt);
+ */
+int ITTAPI __itt_mark_global_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt))
+#define __itt_mark_global_off     ITTNOTIFY_DATA(mark_global_off)
+#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_global_off(mt) (int)0
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} marks group */
+
+/**
+ * @defgroup counters_internal Counters
+ * @ingroup internal
+ * Counters group
+ * @{
+ */
+
+
+/**
+ * @defgroup stitch Stack Stitching
+ * @ingroup internal
+ * Stack Stitching group
+ * @{
+ */
+/**
+ * @brief opaque structure for counter identification
+ */
+typedef struct ___itt_caller *__itt_caller;
+
+/**
+ * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to.
+ * The function returns a unique identifier which is used to match the cut points with corresponding stitch points.
+ */
+__itt_caller ITTAPI __itt_stack_caller_create(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void))
+#define __itt_stack_caller_create     ITTNOTIFY_DATA(stack_caller_create)
+#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_create() (__itt_caller)0
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create()
+ */
+void ITTAPI __itt_stack_caller_destroy(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id))
+#define __itt_stack_caller_destroy     ITTNOTIFY_VOID(stack_caller_destroy)
+#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_destroy(id)
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Sets the cut point. Stack from each event which occurs after this call will be cut
+ * at the same stack level the function was called and stitched to the corresponding stitch point.
+ */
+void ITTAPI __itt_stack_callee_enter(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id))
+#define __itt_stack_callee_enter     ITTNOTIFY_VOID(stack_callee_enter)
+#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_enter(id)
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter().
+ */
+void ITTAPI __itt_stack_callee_leave(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id))
+#define __itt_stack_callee_leave     ITTNOTIFY_VOID(stack_callee_leave)
+#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_leave(id)
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} stitch group */
+
+/* ***************************************************************************************************************************** */
+
+#include <stdarg.h>
+
+/** @cond exclude_from_documentation */
+typedef enum __itt_error_code
+{
+    __itt_error_success       = 0, /*!< no error */
+    __itt_error_no_module     = 1, /*!< module can't be loaded */
+    /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */
+    __itt_error_no_symbol     = 2, /*!< symbol not found */
+    /* %1$s -- library name, %2$s -- symbol name. */
+    __itt_error_unknown_group = 3, /*!< unknown group specified */
+    /* %1$s -- env var name, %2$s -- group name. */
+    __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */
+    /* %1$s -- env var name, %2$d -- system error. */
+    __itt_error_env_too_long  = 5, /*!< variable value too long */
+    /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */
+    __itt_error_system        = 6  /*!< pthread_mutexattr_init or pthread_mutex_init failed */
+    /* %1$s -- function name, %2$d -- errno. */
+} __itt_error_code;
+
+typedef void (__itt_error_handler_t)(__itt_error_code code, va_list);
+__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*);
+
+const char* ITTAPI __itt_api_version(void);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler)
+void __itt_error_handler(__itt_error_code code, va_list args);
+extern const int ITTNOTIFY_NAME(err);
+#define __itt_err ITTNOTIFY_NAME(err)
+ITT_STUB(ITTAPI, const char*, api_version, (void))
+#define __itt_api_version     ITTNOTIFY_DATA(api_version)
+#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version)
+#else  /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_api_version()   (const char*)0
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else  /* INTEL_NO_MACRO_BODY */
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_PRIVATE_ */
+
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+
+// clang-format on
diff --git a/mozglue/baseprofiler/lul/AutoObjectMapper.cpp b/mozglue/baseprofiler/lul/AutoObjectMapper.cpp
new file mode 100644
index 0000000000..0037c943aa
--- /dev/null
+++ b/mozglue/baseprofiler/lul/AutoObjectMapper.cpp
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "BaseProfiler.h"
+#include "PlatformMacros.h"
+#include "AutoObjectMapper.h"
+
+// A helper function for creating failure error messages in
+// AutoObjectMapper*::Map.
+static void failedToMessage(void (*aLog)(const char*), const char* aHowFailed,
+                            std::string aFileName) {
+  char buf[300];
+  SprintfLiteral(buf, "AutoObjectMapper::Map: Failed to %s \'%s\'", aHowFailed,
+                 aFileName.c_str());
+  buf[sizeof(buf) - 1] = 0;
+  aLog(buf);
+}
+
+AutoObjectMapperPOSIX::AutoObjectMapperPOSIX(void (*aLog)(const char*))
+    : mImage(nullptr), mSize(0), mLog(aLog), mIsMapped(false) {}
+
+AutoObjectMapperPOSIX::~AutoObjectMapperPOSIX() {
+  if (!mIsMapped) {
+    // There's nothing to do.
+    MOZ_ASSERT(!mImage);
+    MOZ_ASSERT(mSize == 0);
+    return;
+  }
+  MOZ_ASSERT(mSize > 0);
+  // The following assertion doesn't necessarily have to be true,
+  // but we assume (reasonably enough) that no mmap facility would
+  // be crazy enough to map anything at page zero.
+  MOZ_ASSERT(mImage);
+  munmap(mImage, mSize);
+}
+
+bool AutoObjectMapperPOSIX::Map(/*OUT*/ void** start, /*OUT*/ size_t* length,
+                                std::string fileName) {
+  MOZ_ASSERT(!mIsMapped);
+
+  int fd = open(fileName.c_str(), O_RDONLY);
+  if (fd == -1) {
+    failedToMessage(mLog, "open", fileName);
+    return false;
+  }
+
+  struct stat st;
+  int err = fstat(fd, &st);
+  size_t sz = (err == 0) ? st.st_size : 0;
+  if (err != 0 || sz == 0) {
+    failedToMessage(mLog, "fstat", fileName);
+    close(fd);
+    return false;
+  }
+
+  void* image = mmap(nullptr, sz, PROT_READ, MAP_SHARED, fd, 0);
+  if (image == MAP_FAILED) {
+    failedToMessage(mLog, "mmap", fileName);
+    close(fd);
+    return false;
+  }
+
+  close(fd);
+  mIsMapped = true;
+  mImage = *start = image;
+  mSize = *length = sz;
+  return true;
+}
diff --git a/mozglue/baseprofiler/lul/AutoObjectMapper.h b/mozglue/baseprofiler/lul/AutoObjectMapper.h
new file mode 100644
index 0000000000..f63aa43e0e
--- /dev/null
+++ b/mozglue/baseprofiler/lul/AutoObjectMapper.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AutoObjectMapper_h
+#define AutoObjectMapper_h
+
+#include <string>
+
+#include "mozilla/Attributes.h"
+#include "PlatformMacros.h"
+
+// A (nearly-) RAII class that maps an object in and then unmaps it on
+// destruction.  This base class version uses the "normal" POSIX
+// functions: open, fstat, close, mmap, munmap.
+
+class MOZ_STACK_CLASS AutoObjectMapperPOSIX {
+ public:
+  // The constructor does not attempt to map the file, because that
+  // might fail.  Instead, once the object has been constructed,
+  // call Map() to attempt the mapping.  There is no corresponding
+  // Unmap() since the unmapping is done in the destructor.  Failure
+  // messages are sent to |aLog|.
+  explicit AutoObjectMapperPOSIX(void (*aLog)(const char*));
+
+  // Unmap the file on destruction of this object.
+  ~AutoObjectMapperPOSIX();
+
+  // Map |fileName| into the address space and return the mapping
+  // extents.  If the file is zero sized this will fail.  The file is
+  // mapped read-only and private.  Returns true iff the mapping
+  // succeeded, in which case *start and *length hold its extent.
+  // Once a call to Map succeeds, all subsequent calls to it will
+  // fail.
+  bool Map(/*OUT*/ void** start, /*OUT*/ size_t* length, std::string fileName);
+
+ protected:
+  // If we are currently holding a mapped object, these record the
+  // mapped address range.
+  void* mImage;
+  size_t mSize;
+
+  // A logging sink, for complaining about mapping failures.
+  void (*mLog)(const char*);
+
+ private:
+  // Are we currently holding a mapped object?  This is private to
+  // the base class.  Derived classes need to have their own way to
+  // track whether they are holding a mapped object.
+  bool mIsMapped;
+
+  // Disable copying and assignment.
+  AutoObjectMapperPOSIX(const AutoObjectMapperPOSIX&);
+  AutoObjectMapperPOSIX& operator=(const AutoObjectMapperPOSIX&);
+  // Disable heap allocation of this class.
+  void* operator new(size_t);
+  void* operator new[](size_t);
+  void operator delete(void*);
+  void operator delete[](void*);
+};
+
+#endif  // AutoObjectMapper_h
diff --git a/mozglue/baseprofiler/lul/LulCommon.cpp b/mozglue/baseprofiler/lul/LulCommon.cpp
new file mode 100644
index 0000000000..f014892a57
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulCommon.cpp
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2011, 2013 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/module.cc
+//   src/common/unique_string.cc
+
+// There's no internal-only interface for LulCommon.  Hence include
+// the external interface directly.
+#include "LulCommonExt.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <map>
+
+#include "BaseProfiler.h"
+
+namespace lul {
+
+using std::string;
+
+////////////////////////////////////////////////////////////////
+// Module
+//
+Module::Module(const string& name, const string& os, const string& architecture,
+               const string& id)
+    : name_(name), os_(os), architecture_(architecture), id_(id) {}
+
+Module::~Module() {}
+
+////////////////////////////////////////////////////////////////
+// UniqueString
+//
+class UniqueString {
+ public:
+  explicit UniqueString(string str) { str_ = strdup(str.c_str()); }
+  ~UniqueString() { free(reinterpret_cast<void*>(const_cast<char*>(str_))); }
+  const char* str_;
+};
+
+const char* FromUniqueString(const UniqueString* ustr) { return ustr->str_; }
+
+bool IsEmptyUniqueString(const UniqueString* ustr) {
+  return (ustr->str_)[0] == '\0';
+}
+
+////////////////////////////////////////////////////////////////
+// UniqueStringUniverse
+//
+UniqueStringUniverse::~UniqueStringUniverse() {
+  for (std::map<string, UniqueString*>::iterator it = map_.begin();
+       it != map_.end(); it++) {
+    delete it->second;
+  }
+}
+
+const UniqueString* UniqueStringUniverse::ToUniqueString(string str) {
+  std::map<string, UniqueString*>::iterator it = map_.find(str);
+  if (it == map_.end()) {
+    UniqueString* ustr = new UniqueString(str);
+    map_[str] = ustr;
+    return ustr;
+  } else {
+    return it->second;
+  }
+}
+
+}  // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulCommonExt.h b/mozglue/baseprofiler/lul/LulCommonExt.h
new file mode 100644
index 0000000000..b20a7321ff
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulCommonExt.h
@@ -0,0 +1,509 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2010, 2012, 2013 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// module.h: Define google_breakpad::Module. A Module holds debugging
+// information, and can write that information out as a Breakpad
+// symbol file.
+
+//  (C) Copyright Greg Colvin and Beman Dawes 1998, 1999.
+//  Copyright (c) 2001, 2002 Peter Dimov
+//
+//  Permission to copy, use, modify, sell and distribute this software
+//  is granted provided this copyright notice appears in all copies.
+//  This software is provided "as is" without express or implied
+//  warranty, and with no claim as to its suitability for any purpose.
+//
+//  See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation.
+//
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/unique_string.h
+//   src/common/scoped_ptr.h
+//   src/common/module.h
+
+// External interface for the "Common" component of LUL.
+
+#ifndef LulCommonExt_h
+#define LulCommonExt_h
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <string>
+#include <map>
+#include <vector>
+#include <cstddef>  // for std::ptrdiff_t
+
+#include "mozilla/Assertions.h"
+
+namespace lul {
+
+using std::map;
+using std::string;
+
+////////////////////////////////////////////////////////////////
+// UniqueString
+//
+
+// Abstract type
+class UniqueString;
+
+// Get the contained C string (debugging only)
+const char* FromUniqueString(const UniqueString*);
+
+// Is the given string empty (that is, "") ?
+bool IsEmptyUniqueString(const UniqueString*);
+
+////////////////////////////////////////////////////////////////
+// UniqueStringUniverse
+//
+
+// All UniqueStrings live in some specific UniqueStringUniverse.
+class UniqueStringUniverse {
+ public:
+  UniqueStringUniverse() {}
+  ~UniqueStringUniverse();
+  // Convert a |string| to a UniqueString, that lives in this universe.
+  const UniqueString* ToUniqueString(string str);
+
+ private:
+  map<string, UniqueString*> map_;
+};
+
+////////////////////////////////////////////////////////////////
+// GUID
+//
+
+typedef struct {
+  uint32_t data1;
+  uint16_t data2;
+  uint16_t data3;
+  uint8_t data4[8];
+} MDGUID;  // GUID
+
+typedef MDGUID GUID;
+
+////////////////////////////////////////////////////////////////
+// scoped_ptr
+//
+
+//  scoped_ptr mimics a built-in pointer except that it guarantees deletion
+//  of the object pointed to, either on destruction of the scoped_ptr or via
+//  an explicit reset(). scoped_ptr is a simple solution for simple needs;
+//  use shared_ptr or std::auto_ptr if your needs are more complex.
+
+//  *** NOTE ***
+//  If your scoped_ptr is a class member of class FOO pointing to a
+//  forward declared type BAR (as shown below), then you MUST use a non-inlined
+//  version of the destructor.  The destructor of a scoped_ptr (called from
+//  FOO's destructor) must have a complete definition of BAR in order to
+//  destroy it.  Example:
+//
+//  -- foo.h --
+//  class BAR;
+//
+//  class FOO {
+//   public:
+//    FOO();
+//    ~FOO();  // Required for sources that instantiate class FOO to compile!
+//
+//   private:
+//    scoped_ptr<BAR> bar_;
+//  };
+//
+//  -- foo.cc --
+//  #include "foo.h"
+//  FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition.
+
+//  scoped_ptr_malloc added by Google
+//  When one of these goes out of scope, instead of doing a delete or
+//  delete[], it calls free().  scoped_ptr_malloc<char> is likely to see
+//  much more use than any other specializations.
+
+//  release() added by Google
+//  Use this to conditionally transfer ownership of a heap-allocated object
+//  to the caller, usually on method success.
+
+template <typename T>
+class scoped_ptr {
+ private:
+  T* ptr;
+
+  scoped_ptr(scoped_ptr const&);
+  scoped_ptr& operator=(scoped_ptr const&);
+
+ public:
+  typedef T element_type;
+
+  explicit scoped_ptr(T* p = 0) : ptr(p) {}
+
+  ~scoped_ptr() { delete ptr; }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      delete ptr;
+      ptr = p;
+    }
+  }
+
+  T& operator*() const {
+    MOZ_ASSERT(ptr != 0);
+    return *ptr;
+  }
+
+  T* operator->() const {
+    MOZ_ASSERT(ptr != 0);
+    return ptr;
+  }
+
+  bool operator==(T* p) const { return ptr == p; }
+
+  bool operator!=(T* p) const { return ptr != p; }
+
+  T* get() const { return ptr; }
+
+  void swap(scoped_ptr& b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+  // no reason to use these: each scoped_ptr should have its own object
+  template <typename U>
+  bool operator==(scoped_ptr<U> const& p) const;
+  template <typename U>
+  bool operator!=(scoped_ptr<U> const& p) const;
+};
+
+template <typename T>
+inline void swap(scoped_ptr<T>& a, scoped_ptr<T>& b) {
+  a.swap(b);
+}
+
+template <typename T>
+inline bool operator==(T* p, const scoped_ptr<T>& b) {
+  return p == b.get();
+}
+
+template <typename T>
+inline bool operator!=(T* p, const scoped_ptr<T>& b) {
+  return p != b.get();
+}
+
+//  scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to
+//  is guaranteed, either on destruction of the scoped_array or via an explicit
+//  reset(). Use shared_array or std::vector if your needs are more complex.
+
+template <typename T>
+class scoped_array {
+ private:
+  T* ptr;
+
+  scoped_array(scoped_array const&);
+  scoped_array& operator=(scoped_array const&);
+
+ public:
+  typedef T element_type;
+
+  explicit scoped_array(T* p = 0) : ptr(p) {}
+
+  ~scoped_array() { delete[] ptr; }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      delete[] ptr;
+      ptr = p;
+    }
+  }
+
+  T& operator[](std::ptrdiff_t i) const {
+    MOZ_ASSERT(ptr != 0);
+    MOZ_ASSERT(i >= 0);
+    return ptr[i];
+  }
+
+  bool operator==(T* p) const { return ptr == p; }
+
+  bool operator!=(T* p) const { return ptr != p; }
+
+  T* get() const { return ptr; }
+
+  void swap(scoped_array& b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+  // no reason to use these: each scoped_array should have its own object
+  template <typename U>
+  bool operator==(scoped_array<U> const& p) const;
+  template <typename U>
+  bool operator!=(scoped_array<U> const& p) const;
+};
+
+template <class T>
+inline void swap(scoped_array<T>& a, scoped_array<T>& b) {
+  a.swap(b);
+}
+
+template <typename T>
+inline bool operator==(T* p, const scoped_array<T>& b) {
+  return p == b.get();
+}
+
+template <typename T>
+inline bool operator!=(T* p, const scoped_array<T>& b) {
+  return p != b.get();
+}
+
+// This class wraps the c library function free() in a class that can be
+// passed as a template argument to scoped_ptr_malloc below.
+class ScopedPtrMallocFree {
+ public:
+  inline void operator()(void* x) const { free(x); }
+};
+
+// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a
+// second template argument, the functor used to free the object.
+
+template <typename T, typename FreeProc = ScopedPtrMallocFree>
+class scoped_ptr_malloc {
+ private:
+  T* ptr;
+
+  scoped_ptr_malloc(scoped_ptr_malloc const&);
+  scoped_ptr_malloc& operator=(scoped_ptr_malloc const&);
+
+ public:
+  typedef T element_type;
+
+  explicit scoped_ptr_malloc(T* p = 0) : ptr(p) {}
+
+  ~scoped_ptr_malloc() { free_((void*)ptr); }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      free_((void*)ptr);
+      ptr = p;
+    }
+  }
+
+  T& operator*() const {
+    MOZ_ASSERT(ptr != 0);
+    return *ptr;
+  }
+
+  T* operator->() const {
+    MOZ_ASSERT(ptr != 0);
+    return ptr;
+  }
+
+  bool operator==(T* p) const { return ptr == p; }
+
+  bool operator!=(T* p) const { return ptr != p; }
+
+  T* get() const { return ptr; }
+
+  void swap(scoped_ptr_malloc& b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+  // no reason to use these: each scoped_ptr_malloc should have its own object
+  template <typename U, typename GP>
+  bool operator==(scoped_ptr_malloc<U, GP> const& p) const;
+  template <typename U, typename GP>
+  bool operator!=(scoped_ptr_malloc<U, GP> const& p) const;
+
+  static FreeProc const free_;
+};
+
+template <typename T, typename FP>
+FP const scoped_ptr_malloc<T, FP>::free_ = FP();
+
+template <typename T, typename FP>
+inline void swap(scoped_ptr_malloc<T, FP>& a, scoped_ptr_malloc<T, FP>& b) {
+  a.swap(b);
+}
+
+template <typename T, typename FP>
+inline bool operator==(T* p, const scoped_ptr_malloc<T, FP>& b) {
+  return p == b.get();
+}
+
+template <typename T, typename FP>
+inline bool operator!=(T* p, const scoped_ptr_malloc<T, FP>& b) {
+  return p != b.get();
+}
+
+////////////////////////////////////////////////////////////////
+// Module
+//
+
+// A Module represents the contents of a module, and supports methods
+// for adding information produced by parsing STABS or DWARF data
+// --- possibly both from the same file --- and then writing out the
+// unified contents as a Breakpad-format symbol file.
+class Module {
+ public:
+  // The type of addresses and sizes in a symbol table.
+  typedef uint64_t Address;
+
+  // Representation of an expression.  This can either be a postfix
+  // expression, in which case it is stored as a string, or a simple
+  // expression of the form (identifier + imm) or *(identifier + imm).
+  // It can also be invalid (denoting "no value").
+  enum ExprHow { kExprInvalid = 1, kExprPostfix, kExprSimple, kExprSimpleMem };
+
+  struct Expr {
+    // Construct a simple-form expression
+    Expr(const UniqueString* ident, long offset, bool deref) {
+      if (IsEmptyUniqueString(ident)) {
+        Expr();
+      } else {
+        postfix_ = "";
+        ident_ = ident;
+        offset_ = offset;
+        how_ = deref ? kExprSimpleMem : kExprSimple;
+      }
+    }
+
+    // Construct an invalid expression
+    Expr() {
+      postfix_ = "";
+      ident_ = nullptr;
+      offset_ = 0;
+      how_ = kExprInvalid;
+    }
+
+    // Return the postfix expression string, either directly,
+    // if this is a postfix expression, or by synthesising it
+    // for a simple expression.
+    std::string getExprPostfix() const {
+      switch (how_) {
+        case kExprPostfix:
+          return postfix_;
+        case kExprSimple:
+        case kExprSimpleMem: {
+          char buf[40];
+          sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+',
+                  how_ == kExprSimple ? "" : " ^");
+          return std::string(FromUniqueString(ident_)) + std::string(buf);
+        }
+        case kExprInvalid:
+        default:
+          MOZ_ASSERT(0 && "getExprPostfix: invalid Module::Expr type");
+          return "Expr::genExprPostfix: kExprInvalid";
+      }
+    }
+
+    // The identifier that gives the starting value for simple expressions.
+    const UniqueString* ident_;
+    // The offset to add for simple expressions.
+    long offset_;
+    // The Postfix expression string to evaluate for non-simple expressions.
+    std::string postfix_;
+    // The operation expressed by this expression.
+    ExprHow how_;
+  };
+
+  // A map from register names to expressions that recover
+  // their values. This can represent a complete set of rules to
+  // follow at some address, or a set of changes to be applied to an
+  // extant set of rules.
+  // NOTE! there are two completely different types called RuleMap.  This
+  // is one of them.
+  typedef std::map<const UniqueString*, Expr> RuleMap;
+
+  // A map from addresses to RuleMaps, representing changes that take
+  // effect at given addresses.
+  typedef std::map<Address, RuleMap> RuleChangeMap;
+
+  // A range of 'STACK CFI' stack walking information. An instance of
+  // this structure corresponds to a 'STACK CFI INIT' record and the
+  // subsequent 'STACK CFI' records that fall within its range.
+  struct StackFrameEntry {
+    // The starting address and number of bytes of machine code this
+    // entry covers.
+    Address address, size;
+
+    // The initial register recovery rules, in force at the starting
+    // address.
+    RuleMap initial_rules;
+
+    // A map from addresses to rule changes. To find the rules in
+    // force at a given address, start with initial_rules, and then
+    // apply the changes given in this map for all addresses up to and
+    // including the address you're interested in.
+    RuleChangeMap rule_changes;
+  };
+
+  // Create a new module with the given name, operating system,
+  // architecture, and ID string.
+  Module(const std::string& name, const std::string& os,
+         const std::string& architecture, const std::string& id);
+  ~Module();
+
+ private:
+  // Module header entries.
+  std::string name_, os_, architecture_, id_;
+};
+
+}  // namespace lul
+
+#endif  // LulCommonExt_h
diff --git a/mozglue/baseprofiler/lul/LulDwarf.cpp b/mozglue/baseprofiler/lul/LulDwarf.cpp
new file mode 100644
index 0000000000..c83296fc62
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarf.cpp
@@ -0,0 +1,2252 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
+// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/bytereader.cc
+//   src/common/dwarf/dwarf2reader.cc
+//   src/common/dwarf_cfi_to_module.cc
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <map>
+#include <stack>
+#include <string>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "LulCommonExt.h"
+#include "LulDwarfInt.h"
+
+// Set this to 1 for verbose logging
+#define DEBUG_DWARF 0
+
+namespace lul {
+
+using std::string;
+
+ByteReader::ByteReader(enum Endianness endian)
+    : offset_reader_(NULL),
+      address_reader_(NULL),
+      endian_(endian),
+      address_size_(0),
+      offset_size_(0),
+      have_section_base_(),
+      have_text_base_(),
+      have_data_base_(),
+      have_function_base_() {}
+
+ByteReader::~ByteReader() {}
+
+void ByteReader::SetOffsetSize(uint8 size) {
+  offset_size_ = size;
+  MOZ_ASSERT(size == 4 || size == 8);
+  if (size == 4) {
+    this->offset_reader_ = &ByteReader::ReadFourBytes;
+  } else {
+    this->offset_reader_ = &ByteReader::ReadEightBytes;
+  }
+}
+
+void ByteReader::SetAddressSize(uint8 size) {
+  address_size_ = size;
+  MOZ_ASSERT(size == 4 || size == 8);
+  if (size == 4) {
+    this->address_reader_ = &ByteReader::ReadFourBytes;
+  } else {
+    this->address_reader_ = &ByteReader::ReadEightBytes;
+  }
+}
+
+uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) {
+  const uint64 initial_length = ReadFourBytes(start);
+  start += 4;
+
+  // In DWARF2/3, if the initial length is all 1 bits, then the offset
+  // size is 8 and we need to read the next 8 bytes for the real length.
+  if (initial_length == 0xffffffff) {
+    SetOffsetSize(8);
+    *len = 12;
+    return ReadOffset(start);
+  } else {
+    SetOffsetSize(4);
+    *len = 4;
+  }
+  return initial_length;
+}
+
+bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const {
+  if (encoding == DW_EH_PE_omit) return true;
+  if (encoding == DW_EH_PE_aligned) return true;
+  if ((encoding & 0x7) > DW_EH_PE_udata8) return false;
+  if ((encoding & 0x70) > DW_EH_PE_funcrel) return false;
+  return true;
+}
+
+bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const {
+  switch (encoding & 0x70) {
+    case DW_EH_PE_absptr:
+      return true;
+    case DW_EH_PE_pcrel:
+      return have_section_base_;
+    case DW_EH_PE_textrel:
+      return have_text_base_;
+    case DW_EH_PE_datarel:
+      return have_data_base_;
+    case DW_EH_PE_funcrel:
+      return have_function_base_;
+    default:
+      return false;
+  }
+}
+
+uint64 ByteReader::ReadEncodedPointer(const char* buffer,
+                                      DwarfPointerEncoding encoding,
+                                      size_t* len) const {
+  // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't
+  // see it here.
+  MOZ_ASSERT(encoding != DW_EH_PE_omit);
+
+  // The Linux Standards Base 4.0 does not make this clear, but the
+  // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c)
+  // agree that aligned pointers are always absolute, machine-sized,
+  // machine-signed pointers.
+  if (encoding == DW_EH_PE_aligned) {
+    MOZ_ASSERT(have_section_base_);
+
+    // We don't need to align BUFFER in *our* address space. Rather, we
+    // need to find the next position in our buffer that would be aligned
+    // when the .eh_frame section the buffer contains is loaded into the
+    // program's memory. So align assuming that buffer_base_ gets loaded at
+    // address section_base_, where section_base_ itself may or may not be
+    // aligned.
+
+    // First, find the offset to START from the closest prior aligned
+    // address.
+    uint64 skew = section_base_ & (AddressSize() - 1);
+    // Now find the offset from that aligned address to buffer.
+    uint64 offset = skew + (buffer - buffer_base_);
+    // Round up to the next boundary.
+    uint64 aligned = (offset + AddressSize() - 1) & -AddressSize();
+    // Convert back to a pointer.
+    const char* aligned_buffer = buffer_base_ + (aligned - skew);
+    // Finally, store the length and actually fetch the pointer.
+    *len = aligned_buffer - buffer + AddressSize();
+    return ReadAddress(aligned_buffer);
+  }
+
+  // Extract the value first, ignoring whether it's a pointer or an
+  // offset relative to some base.
+  uint64 offset;
+  switch (encoding & 0x0f) {
+    case DW_EH_PE_absptr:
+      // DW_EH_PE_absptr is weird, as it is used as a meaningful value for
+      // both the high and low nybble of encoding bytes. When it appears in
+      // the high nybble, it means that the pointer is absolute, not an
+      // offset from some base address. When it appears in the low nybble,
+      // as here, it means that the pointer is stored as a normal
+      // machine-sized and machine-signed address. A low nybble of
+      // DW_EH_PE_absptr does not imply that the pointer is absolute; it is
+      // correct for us to treat the value as an offset from a base address
+      // if the upper nybble is not DW_EH_PE_absptr.
+      offset = ReadAddress(buffer);
+      *len = AddressSize();
+      break;
+
+    case DW_EH_PE_uleb128:
+      offset = ReadUnsignedLEB128(buffer, len);
+      break;
+
+    case DW_EH_PE_udata2:
+      offset = ReadTwoBytes(buffer);
+      *len = 2;
+      break;
+
+    case DW_EH_PE_udata4:
+      offset = ReadFourBytes(buffer);
+      *len = 4;
+      break;
+
+    case DW_EH_PE_udata8:
+      offset = ReadEightBytes(buffer);
+      *len = 8;
+      break;
+
+    case DW_EH_PE_sleb128:
+      offset = ReadSignedLEB128(buffer, len);
+      break;
+
+    case DW_EH_PE_sdata2:
+      offset = ReadTwoBytes(buffer);
+      // Sign-extend from 16 bits.
+      offset = (offset ^ 0x8000) - 0x8000;
+      *len = 2;
+      break;
+
+    case DW_EH_PE_sdata4:
+      offset = ReadFourBytes(buffer);
+      // Sign-extend from 32 bits.
+      offset = (offset ^ 0x80000000ULL) - 0x80000000ULL;
+      *len = 4;
+      break;
+
+    case DW_EH_PE_sdata8:
+      // No need to sign-extend; this is the full width of our type.
+      offset = ReadEightBytes(buffer);
+      *len = 8;
+      break;
+
+    default:
+      abort();
+  }
+
+  // Find the appropriate base address.
+  uint64 base;
+  switch (encoding & 0x70) {
+    case DW_EH_PE_absptr:
+      base = 0;
+      break;
+
+    case DW_EH_PE_pcrel:
+      MOZ_ASSERT(have_section_base_);
+      base = section_base_ + (buffer - buffer_base_);
+      break;
+
+    case DW_EH_PE_textrel:
+      MOZ_ASSERT(have_text_base_);
+      base = text_base_;
+      break;
+
+    case DW_EH_PE_datarel:
+      MOZ_ASSERT(have_data_base_);
+      base = data_base_;
+      break;
+
+    case DW_EH_PE_funcrel:
+      MOZ_ASSERT(have_function_base_);
+      base = function_base_;
+      break;
+
+    default:
+      abort();
+  }
+
+  uint64 pointer = base + offset;
+
+  // Remove inappropriate upper bits.
+  if (AddressSize() == 4)
+    pointer = pointer & 0xffffffff;
+  else
+    MOZ_ASSERT(AddressSize() == sizeof(uint64));
+
+  return pointer;
+}
+
+// A DWARF rule for recovering the address or value of a register, or
+// computing the canonical frame address. There is one subclass of this for
+// each '*Rule' member function in CallFrameInfo::Handler.
+//
+// It's annoying that we have to handle Rules using pointers (because
+// the concrete instances can have an arbitrary size). They're small,
+// so it would be much nicer if we could just handle them by value
+// instead of fretting about ownership and destruction.
+//
+// It seems like all these could simply be instances of std::tr1::bind,
+// except that we need instances to be EqualityComparable, too.
+//
+// This could logically be nested within State, but then the qualified names
+// get horrendous.
+class CallFrameInfo::Rule {
+ public:
+  virtual ~Rule() {}
+
+  // Tell HANDLER that, at ADDRESS in the program, REG can be
+  // recovered using this rule. If REG is kCFARegister, then this rule
+  // describes how to compute the canonical frame address. Return what the
+  // HANDLER member function returned.
+  virtual bool Handle(Handler* handler, uint64 address, int reg) const = 0;
+
+  // Equality on rules. We use these to decide which rules we need
+  // to report after a DW_CFA_restore_state instruction.
+  virtual bool operator==(const Rule& rhs) const = 0;
+
+  bool operator!=(const Rule& rhs) const { return !(*this == rhs); }
+
+  // Return a pointer to a copy of this rule.
+  virtual Rule* Copy() const = 0;
+
+  // If this is a base+offset rule, change its base register to REG.
+  // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
+  virtual void SetBaseRegister(unsigned reg) {}
+
+  // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
+  // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
+  virtual void SetOffset(long long offset) {}
+
+  // A RTTI workaround, to make it possible to implement equality
+  // comparisons on classes derived from this one.
+  enum CFIRTag {
+    CFIR_UNDEFINED_RULE,
+    CFIR_SAME_VALUE_RULE,
+    CFIR_OFFSET_RULE,
+    CFIR_VAL_OFFSET_RULE,
+    CFIR_REGISTER_RULE,
+    CFIR_EXPRESSION_RULE,
+    CFIR_VAL_EXPRESSION_RULE
+  };
+
+  // Produce the tag that identifies the child class of this object.
+  virtual CFIRTag getTag() const = 0;
+};
+
+// Rule: the value the register had in the caller cannot be recovered.
+class CallFrameInfo::UndefinedRule : public CallFrameInfo::Rule {
+ public:
+  UndefinedRule() {}
+  ~UndefinedRule() {}
+  CFIRTag getTag() const override { return CFIR_UNDEFINED_RULE; }
+  bool Handle(Handler* handler, uint64 address, int reg) const override {
+    return handler->UndefinedRule(address, reg);
+  }
+  bool operator==(const Rule& rhs) const override {
+    if (rhs.getTag() != CFIR_UNDEFINED_RULE) return false;
+    return true;
+  }
+  Rule* Copy() const override { return new UndefinedRule(*this); }
+};
+
+// Rule: the register's value is the same as that it had in the caller.
+class CallFrameInfo::SameValueRule : public CallFrameInfo::Rule {
+ public:
+  SameValueRule() {}
+  ~SameValueRule() {}
+  CFIRTag getTag() const override { return CFIR_SAME_VALUE_RULE; }
+  bool Handle(Handler* handler, uint64 address, int reg) const override {
+    return handler->SameValueRule(address, reg);
+  }
+  bool operator==(const Rule& rhs) const override {
+    if (rhs.getTag() != CFIR_SAME_VALUE_RULE) return false;
+    return true;
+  }
+  Rule* Copy() const override { return new SameValueRule(*this); }
+};
+
+// Rule: the register is saved at OFFSET from BASE_REGISTER.  BASE_REGISTER
+// may be CallFrameInfo::Handler::kCFARegister.
+class CallFrameInfo::OffsetRule : public CallFrameInfo::Rule {
+ public:
+  OffsetRule(int base_register, long offset)
+      : base_register_(base_register), offset_(offset) {}
+  ~OffsetRule() {}
+  CFIRTag getTag() const override { return CFIR_OFFSET_RULE; }
+  bool Handle(Handler* handler, uint64 address, int reg) const override {
+    return handler->OffsetRule(address, reg, base_register_, offset_);
+  }
+  bool operator==(const Rule& rhs) const override {
+    if (rhs.getTag() != CFIR_OFFSET_RULE) return false;
+    const OffsetRule* our_rhs = static_cast<const OffsetRule*>(&rhs);
+    return (base_register_ == our_rhs->base_register_ &&
+            offset_ == our_rhs->offset_);
+  }
+  Rule* Copy() const override { return new OffsetRule(*this); }
+  // We don't actually need SetBaseRegister or SetOffset here, since they
+  // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
+  // doesn't make sense to use OffsetRule for computing the CFA: it
+  // computes the address at which a register is saved, not a value.
+ private:
+  int base_register_;
+  long offset_;
+};
+
+// Rule: the value the register had in the caller is the value of
+// BASE_REGISTER plus offset. BASE_REGISTER may be
+// CallFrameInfo::Handler::kCFARegister.
+class CallFrameInfo::ValOffsetRule : public CallFrameInfo::Rule {
+ public:
+  ValOffsetRule(int base_register, long offset)
+      : base_register_(base_register), offset_(offset) {}
+  ~ValOffsetRule() {}
+  CFIRTag getTag() const override { return CFIR_VAL_OFFSET_RULE; }
+  bool Handle(Handler* handler, uint64 address, int reg) const override {
+    return handler->ValOffsetRule(address, reg, base_register_, offset_);
+  }
+  bool operator==(const Rule& rhs) const override {
+    if (rhs.getTag() != CFIR_VAL_OFFSET_RULE) return false;
+    const ValOffsetRule* our_rhs = static_cast<const ValOffsetRule*>(&rhs);
+    return (base_register_ == our_rhs->base_register_ &&
+            offset_ == our_rhs->offset_);
+  }
+  Rule* Copy() const override { return new ValOffsetRule(*this); }
+  void SetBaseRegister(unsigned reg) override { base_register_ = reg; }
+  void SetOffset(long long offset) override { offset_ = offset; }
+
+ private:
+  int base_register_;
+  long offset_;
+};
+
+// Rule: the register has been saved in another register REGISTER_NUMBER_.
+class CallFrameInfo::RegisterRule : public CallFrameInfo::Rule {
+ public:
+  explicit RegisterRule(int register_number)
+      : register_number_(register_number) {}
+  ~RegisterRule() {}
+  CFIRTag getTag() const override { return CFIR_REGISTER_RULE; }
+  bool Handle(Handler* handler, uint64 address, int reg) const override {
+    return handler->RegisterRule(address, reg, register_number_);
+  }
+  bool operator==(const Rule& rhs) const override {
+    if (rhs.getTag() != CFIR_REGISTER_RULE) return false;
+    const RegisterRule* our_rhs = static_cast<const RegisterRule*>(&rhs);
+    return (register_number_ == our_rhs->register_number_);
+  }
+  Rule* Copy() const override { return new RegisterRule(*this); }
+
+ private:
+  int register_number_;
+};
+
+// Rule: EXPRESSION evaluates to the address at which the register is saved.
+class CallFrameInfo::ExpressionRule : public CallFrameInfo::Rule {
+ public:
+  explicit ExpressionRule(const string& expression) : expression_(expression) {}
+  ~ExpressionRule() {}
+  CFIRTag getTag() const override { return CFIR_EXPRESSION_RULE; }
+  bool Handle(Handler* handler, uint64 address, int reg) const override {
+    return handler->ExpressionRule(address, reg, expression_);
+  }
+  bool operator==(const Rule& rhs) const override {
+    if (rhs.getTag() != CFIR_EXPRESSION_RULE) return false;
+    const ExpressionRule* our_rhs = static_cast<const ExpressionRule*>(&rhs);
+    return (expression_ == our_rhs->expression_);
+  }
+  Rule* Copy() const override { return new ExpressionRule(*this); }
+
+ private:
+  string expression_;
+};
+
+// Rule: EXPRESSION evaluates to the previous value of the register.
+class CallFrameInfo::ValExpressionRule : public CallFrameInfo::Rule {
+ public:
+  explicit ValExpressionRule(const string& expression)
+      : expression_(expression) {}
+  ~ValExpressionRule() {}
+  CFIRTag getTag() const override { return CFIR_VAL_EXPRESSION_RULE; }
+  bool Handle(Handler* handler, uint64 address, int reg) const override {
+    return handler->ValExpressionRule(address, reg, expression_);
+  }
+  bool operator==(const Rule& rhs) const override {
+    if (rhs.getTag() != CFIR_VAL_EXPRESSION_RULE) return false;
+    const ValExpressionRule* our_rhs =
+        static_cast<const ValExpressionRule*>(&rhs);
+    return (expression_ == our_rhs->expression_);
+  }
+  Rule* Copy() const override { return new ValExpressionRule(*this); }
+
+ private:
+  string expression_;
+};
+
+// A map from register numbers to rules.
+class CallFrameInfo::RuleMap {
+ public:
+  RuleMap() : cfa_rule_(NULL) {}
+  RuleMap(const RuleMap& rhs) : cfa_rule_(NULL) { *this = rhs; }
+  ~RuleMap() { Clear(); }
+
+  RuleMap& operator=(const RuleMap& rhs);
+
+  // Set the rule for computing the CFA to RULE. Take ownership of RULE.
+  void SetCFARule(Rule* rule) {
+    delete cfa_rule_;
+    cfa_rule_ = rule;
+  }
+
+  // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
+  // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
+  // DW_CFA_def_cfa_register, and for detecting references to the CFA before
+  // a rule for it has been established.
+  Rule* CFARule() const { return cfa_rule_; }
+
+  // Return the rule for REG, or NULL if there is none. The caller takes
+  // ownership of the result.
+  Rule* RegisterRule(int reg) const;
+
+  // Set the rule for computing REG to RULE. Take ownership of RULE.
+  void SetRegisterRule(int reg, Rule* rule);
+
+  // Make all the appropriate calls to HANDLER as if we were changing from
+  // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
+  // DW_CFA_restore_state, where lots of rules can change simultaneously.
+  // Return true if all handlers returned true; otherwise, return false.
+  bool HandleTransitionTo(Handler* handler, uint64 address,
+                          const RuleMap& new_rules) const;
+
+ private:
+  // A map from register numbers to Rules.
+  typedef std::map<int, Rule*> RuleByNumber;
+
+  // Remove all register rules and clear cfa_rule_.
+  void Clear();
+
+  // The rule for computing the canonical frame address. This RuleMap owns
+  // this rule.
+  Rule* cfa_rule_;
+
+  // A map from register numbers to postfix expressions to recover
+  // their values. This RuleMap owns the Rules the map refers to.
+  RuleByNumber registers_;
+};
+
+CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) {
+  Clear();
+  // Since each map owns the rules it refers to, assignment must copy them.
+  if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
+  for (RuleByNumber::const_iterator it = rhs.registers_.begin();
+       it != rhs.registers_.end(); it++)
+    registers_[it->first] = it->second->Copy();
+  return *this;
+}
+
+CallFrameInfo::Rule* CallFrameInfo::RuleMap::RegisterRule(int reg) const {
+  MOZ_ASSERT(reg != Handler::kCFARegister);
+  RuleByNumber::const_iterator it = registers_.find(reg);
+  if (it != registers_.end())
+    return it->second->Copy();
+  else
+    return NULL;
+}
+
+void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule* rule) {
+  MOZ_ASSERT(reg != Handler::kCFARegister);
+  MOZ_ASSERT(rule);
+  Rule** slot = &registers_[reg];
+  delete *slot;
+  *slot = rule;
+}
+
+bool CallFrameInfo::RuleMap::HandleTransitionTo(
+    Handler* handler, uint64 address, const RuleMap& new_rules) const {
+  // Transition from cfa_rule_ to new_rules.cfa_rule_.
+  if (cfa_rule_ && new_rules.cfa_rule_) {
+    if (*cfa_rule_ != *new_rules.cfa_rule_ &&
+        !new_rules.cfa_rule_->Handle(handler, address, Handler::kCFARegister))
+      return false;
+  } else if (cfa_rule_) {
+    // this RuleMap has a CFA rule but new_rules doesn't.
+    // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
+    // it's garbage input. The instruction interpreter should have
+    // detected this and warned, so take no action here.
+  } else if (new_rules.cfa_rule_) {
+    // This shouldn't be possible: NEW_RULES is some prior state, and
+    // there's no way to remove entries.
+    MOZ_ASSERT(0);
+  } else {
+    // Both CFA rules are empty.  No action needed.
+  }
+
+  // Traverse the two maps in order by register number, and report
+  // whatever differences we find.
+  RuleByNumber::const_iterator old_it = registers_.begin();
+  RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
+  while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
+    if (old_it->first < new_it->first) {
+      // This RuleMap has an entry for old_it->first, but NEW_RULES
+      // doesn't.
+      //
+      // This isn't really the right thing to do, but since CFI generally
+      // only mentions callee-saves registers, and GCC's convention for
+      // callee-saves registers is that they are unchanged, it's a good
+      // approximation.
+      if (!handler->SameValueRule(address, old_it->first)) return false;
+      old_it++;
+    } else if (old_it->first > new_it->first) {
+      // NEW_RULES has entry for new_it->first, but this RuleMap
+      // doesn't. This shouldn't be possible: NEW_RULES is some prior
+      // state, and there's no way to remove entries.
+      MOZ_ASSERT(0);
+    } else {
+      // Both maps have an entry for this register. Report the new
+      // rule if it is different.
+      if (*old_it->second != *new_it->second &&
+          !new_it->second->Handle(handler, address, new_it->first))
+        return false;
+      new_it++;
+      old_it++;
+    }
+  }
+  // Finish off entries from this RuleMap with no counterparts in new_rules.
+  while (old_it != registers_.end()) {
+    if (!handler->SameValueRule(address, old_it->first)) return false;
+    old_it++;
+  }
+  // Since we only make transitions from a rule set to some previously
+  // saved rule set, and we can only add rules to the map, NEW_RULES
+  // must have fewer rules than *this.
+  MOZ_ASSERT(new_it == new_rules.registers_.end());
+
+  return true;
+}
+
+// Remove all register rules and clear cfa_rule_.
+void CallFrameInfo::RuleMap::Clear() {
+  delete cfa_rule_;
+  cfa_rule_ = NULL;
+  for (RuleByNumber::iterator it = registers_.begin(); it != registers_.end();
+       it++)
+    delete it->second;
+  registers_.clear();
+}
+
+// The state of the call frame information interpreter as it processes
+// instructions from a CIE and FDE.
+class CallFrameInfo::State {
+ public:
+  // Create a call frame information interpreter state with the given
+  // reporter, reader, handler, and initial call frame info address.
+  State(ByteReader* reader, Handler* handler, Reporter* reporter,
+        uint64 address)
+      : reader_(reader),
+        handler_(handler),
+        reporter_(reporter),
+        address_(address),
+        entry_(NULL),
+        cursor_(NULL),
+        saved_rules_(NULL) {}
+
+  ~State() {
+    if (saved_rules_) delete saved_rules_;
+  }
+
+  // Interpret instructions from CIE, save the resulting rule set for
+  // DW_CFA_restore instructions, and return true. On error, report
+  // the problem to reporter_ and return false.
+  bool InterpretCIE(const CIE& cie);
+
+  // Interpret instructions from FDE, and return true. On error,
+  // report the problem to reporter_ and return false.
+  bool InterpretFDE(const FDE& fde);
+
+ private:
+  // The operands of a CFI instruction, for ParseOperands.
+  struct Operands {
+    unsigned register_number;  // A register number.
+    uint64 offset;             // An offset or address.
+    long signed_offset;        // A signed offset.
+    string expression;         // A DWARF expression.
+  };
+
+  // Parse CFI instruction operands from STATE's instruction stream as
+  // described by FORMAT. On success, populate OPERANDS with the
+  // results, and return true. On failure, report the problem and
+  // return false.
+  //
+  // Each character of FORMAT should be one of the following:
+  //
+  //   'r'  unsigned LEB128 register number (OPERANDS->register_number)
+  //   'o'  unsigned LEB128 offset          (OPERANDS->offset)
+  //   's'  signed LEB128 offset            (OPERANDS->signed_offset)
+  //   'a'  machine-size address            (OPERANDS->offset)
+  //        (If the CIE has a 'z' augmentation string, 'a' uses the
+  //        encoding specified by the 'R' argument.)
+  //   '1'  a one-byte offset               (OPERANDS->offset)
+  //   '2'  a two-byte offset               (OPERANDS->offset)
+  //   '4'  a four-byte offset              (OPERANDS->offset)
+  //   '8'  an eight-byte offset            (OPERANDS->offset)
+  //   'e'  a DW_FORM_block holding a       (OPERANDS->expression)
+  //        DWARF expression
+  bool ParseOperands(const char* format, Operands* operands);
+
+  // Interpret one CFI instruction from STATE's instruction stream, update
+  // STATE, report any rule changes to handler_, and return true. On
+  // failure, report the problem and return false.
+  bool DoInstruction();
+
+  // The following Do* member functions are subroutines of DoInstruction,
+  // factoring out the actual work of operations that have several
+  // different encodings.
+
+  // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
+  // return true. On failure, report and return false. (Used for
+  // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
+  bool DoDefCFA(unsigned base_register, long offset);
+
+  // Change the offset of the CFA rule to OFFSET, and return true. On
+  // failure, report and return false. (Subroutine for
+  // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
+  bool DoDefCFAOffset(long offset);
+
+  // Specify that REG can be recovered using RULE, and return true. On
+  // failure, report and return false.
+  bool DoRule(unsigned reg, Rule* rule);
+
+  // Specify that REG can be found at OFFSET from the CFA, and return true.
+  // On failure, report and return false. (Subroutine for DW_CFA_offset,
+  // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
+  bool DoOffset(unsigned reg, long offset);
+
+  // Specify that the caller's value for REG is the CFA plus OFFSET,
+  // and return true. On failure, report and return false. (Subroutine
+  // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
+  bool DoValOffset(unsigned reg, long offset);
+
+  // Restore REG to the rule established in the CIE, and return true. On
+  // failure, report and return false. (Subroutine for DW_CFA_restore and
+  // DW_CFA_restore_extended.)
+  bool DoRestore(unsigned reg);
+
+  // Return the section offset of the instruction at cursor. For use
+  // in error messages.
+  uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
+
+  // Report that entry_ is incomplete, and return false. For brevity.
+  bool ReportIncomplete() {
+    reporter_->Incomplete(entry_->offset, entry_->kind);
+    return false;
+  }
+
+  // For reading multi-byte values with the appropriate endianness.
+  ByteReader* reader_;
+
+  // The handler to which we should report the data we find.
+  Handler* handler_;
+
+  // For reporting problems in the info we're parsing.
+  Reporter* reporter_;
+
+  // The code address to which the next instruction in the stream applies.
+  uint64 address_;
+
+  // The entry whose instructions we are currently processing. This is
+  // first a CIE, and then an FDE.
+  const Entry* entry_;
+
+  // The next instruction to process.
+  const char* cursor_;
+
+  // The current set of rules.
+  RuleMap rules_;
+
+  // The set of rules established by the CIE, used by DW_CFA_restore
+  // and DW_CFA_restore_extended. We set this after interpreting the
+  // CIE's instructions.
+  RuleMap cie_rules_;
+
+  // A stack of saved states, for DW_CFA_remember_state and
+  // DW_CFA_restore_state.
+  std::stack<RuleMap>* saved_rules_;
+};
+
+bool CallFrameInfo::State::InterpretCIE(const CIE& cie) {
+  entry_ = &cie;
+  cursor_ = entry_->instructions;
+  while (cursor_ < entry_->end)
+    if (!DoInstruction()) return false;
+  // Note the rules established by the CIE, for use by DW_CFA_restore
+  // and DW_CFA_restore_extended.
+  cie_rules_ = rules_;
+  return true;
+}
+
+bool CallFrameInfo::State::InterpretFDE(const FDE& fde) {
+  entry_ = &fde;
+  cursor_ = entry_->instructions;
+  while (cursor_ < entry_->end)
+    if (!DoInstruction()) return false;
+  return true;
+}
+
+bool CallFrameInfo::State::ParseOperands(const char* format,
+                                         Operands* operands) {
+  size_t len;
+  const char* operand;
+
+  for (operand = format; *operand; operand++) {
+    size_t bytes_left = entry_->end - cursor_;
+    switch (*operand) {
+      case 'r':
+        operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 'o':
+        operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 's':
+        operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 'a':
+        operands->offset = reader_->ReadEncodedPointer(
+            cursor_, entry_->cie->pointer_encoding, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case '1':
+        if (1 > bytes_left) return ReportIncomplete();
+        operands->offset = static_cast<unsigned char>(*cursor_++);
+        break;
+
+      case '2':
+        if (2 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadTwoBytes(cursor_);
+        cursor_ += 2;
+        break;
+
+      case '4':
+        if (4 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadFourBytes(cursor_);
+        cursor_ += 4;
+        break;
+
+      case '8':
+        if (8 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadEightBytes(cursor_);
+        cursor_ += 8;
+        break;
+
+      case 'e': {
+        size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left || expression_length > bytes_left - len)
+          return ReportIncomplete();
+        cursor_ += len;
+        operands->expression = string(cursor_, expression_length);
+        cursor_ += expression_length;
+        break;
+      }
+
+      default:
+        MOZ_ASSERT(0);
+    }
+  }
+
+  return true;
+}
+
+bool CallFrameInfo::State::DoInstruction() {
+  CIE* cie = entry_->cie;
+  Operands ops;
+
+  // Our entry's kind should have been set by now.
+  MOZ_ASSERT(entry_->kind != kUnknown);
+
+  // We shouldn't have been invoked unless there were more
+  // instructions to parse.
+  MOZ_ASSERT(cursor_ < entry_->end);
+
+  unsigned opcode = *cursor_++;
+  if ((opcode & 0xc0) != 0) {
+    switch (opcode & 0xc0) {
+      // Advance the address.
+      case DW_CFA_advance_loc: {
+        size_t code_offset = opcode & 0x3f;
+        address_ += code_offset * cie->code_alignment_factor;
+        break;
+      }
+
+      // Find a register at an offset from the CFA.
+      case DW_CFA_offset:
+        if (!ParseOperands("o", &ops) ||
+            !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
+          return false;
+        break;
+
+      // Restore the rule established for a register by the CIE.
+      case DW_CFA_restore:
+        if (!DoRestore(opcode & 0x3f)) return false;
+        break;
+
+      // The 'if' above should have excluded this possibility.
+      default:
+        MOZ_ASSERT(0);
+    }
+
+    // Return here, so the big switch below won't be indented.
+    return true;
+  }
+
+  switch (opcode) {
+    // Set the address.
+    case DW_CFA_set_loc:
+      if (!ParseOperands("a", &ops)) return false;
+      address_ = ops.offset;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc1:
+      if (!ParseOperands("1", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc2:
+      if (!ParseOperands("2", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc4:
+      if (!ParseOperands("4", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_MIPS_advance_loc8:
+      if (!ParseOperands("8", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Compute the CFA by adding an offset to a register.
+    case DW_CFA_def_cfa:
+      if (!ParseOperands("ro", &ops) ||
+          !DoDefCFA(ops.register_number, ops.offset))
+        return false;
+      break;
+
+    // Compute the CFA by adding an offset to a register.
+    case DW_CFA_def_cfa_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoDefCFA(ops.register_number,
+                    ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // Change the base register used to compute the CFA.
+    case DW_CFA_def_cfa_register: {
+      Rule* cfa_rule = rules_.CFARule();
+      if (!cfa_rule) {
+        reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+        return false;
+      }
+      if (!ParseOperands("r", &ops)) return false;
+      cfa_rule->SetBaseRegister(ops.register_number);
+      if (!cfa_rule->Handle(handler_, address_, Handler::kCFARegister))
+        return false;
+      break;
+    }
+
+    // Change the offset used to compute the CFA.
+    case DW_CFA_def_cfa_offset:
+      if (!ParseOperands("o", &ops) || !DoDefCFAOffset(ops.offset))
+        return false;
+      break;
+
+    // Change the offset used to compute the CFA.
+    case DW_CFA_def_cfa_offset_sf:
+      if (!ParseOperands("s", &ops) ||
+          !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // Specify an expression whose value is the CFA.
+    case DW_CFA_def_cfa_expression: {
+      if (!ParseOperands("e", &ops)) return false;
+      Rule* rule = new ValExpressionRule(ops.expression);
+      rules_.SetCFARule(rule);
+      if (!rule->Handle(handler_, address_, Handler::kCFARegister))
+        return false;
+      break;
+    }
+
+    // The register's value cannot be recovered.
+    case DW_CFA_undefined: {
+      if (!ParseOperands("r", &ops) ||
+          !DoRule(ops.register_number, new UndefinedRule()))
+        return false;
+      break;
+    }
+
+    // The register's value is unchanged from its value in the caller.
+    case DW_CFA_same_value: {
+      if (!ParseOperands("r", &ops) ||
+          !DoRule(ops.register_number, new SameValueRule()))
+        return false;
+      break;
+    }
+
+    // Find a register at an offset from the CFA.
+    case DW_CFA_offset_extended:
+      if (!ParseOperands("ro", &ops) ||
+          !DoOffset(ops.register_number,
+                    ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register is saved at an offset from the CFA.
+    case DW_CFA_offset_extended_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoOffset(ops.register_number,
+                    ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register is saved at an offset from the CFA.
+    case DW_CFA_GNU_negative_offset_extended:
+      if (!ParseOperands("ro", &ops) ||
+          !DoOffset(ops.register_number,
+                    -ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register's value is the sum of the CFA plus an offset.
+    case DW_CFA_val_offset:
+      if (!ParseOperands("ro", &ops) ||
+          !DoValOffset(ops.register_number,
+                       ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register's value is the sum of the CFA plus an offset.
+    case DW_CFA_val_offset_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoValOffset(ops.register_number,
+                       ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register has been saved in another register.
+    case DW_CFA_register: {
+      if (!ParseOperands("ro", &ops) ||
+          !DoRule(ops.register_number, new RegisterRule(ops.offset)))
+        return false;
+      break;
+    }
+
+    // An expression yields the address at which the register is saved.
+    case DW_CFA_expression: {
+      if (!ParseOperands("re", &ops) ||
+          !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
+        return false;
+      break;
+    }
+
+    // An expression yields the caller's value for the register.
+    case DW_CFA_val_expression: {
+      if (!ParseOperands("re", &ops) ||
+          !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
+        return false;
+      break;
+    }
+
+    // Restore the rule established for a register by the CIE.
+    case DW_CFA_restore_extended:
+      if (!ParseOperands("r", &ops) || !DoRestore(ops.register_number))
+        return false;
+      break;
+
+    // Save the current set of rules on a stack.
+    case DW_CFA_remember_state:
+      if (!saved_rules_) {
+        saved_rules_ = new std::stack<RuleMap>();
+      }
+      saved_rules_->push(rules_);
+      break;
+
+    // Pop the current set of rules off the stack.
+    case DW_CFA_restore_state: {
+      if (!saved_rules_ || saved_rules_->empty()) {
+        reporter_->EmptyStateStack(entry_->offset, entry_->kind,
+                                   CursorOffset());
+        return false;
+      }
+      const RuleMap& new_rules = saved_rules_->top();
+      if (rules_.CFARule() && !new_rules.CFARule()) {
+        reporter_->ClearingCFARule(entry_->offset, entry_->kind,
+                                   CursorOffset());
+        return false;
+      }
+      rules_.HandleTransitionTo(handler_, address_, new_rules);
+      rules_ = new_rules;
+      saved_rules_->pop();
+      break;
+    }
+
+    // No operation.  (Padding instruction.)
+    case DW_CFA_nop:
+      break;
+
+    // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
+    // are saved in registers 24 through 31 (%i0-%i7), and registers
+    // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
+    // (0-15 * the register size). The register numbers must be
+    // hard-coded. A GNU extension, and not a pretty one.
+    case DW_CFA_GNU_window_save: {
+      // Save %o0-%o7 in %i0-%i7.
+      for (int i = 8; i < 16; i++)
+        if (!DoRule(i, new RegisterRule(i + 16))) return false;
+      // Save %l0-%l7 and %i0-%i7 at the CFA.
+      for (int i = 16; i < 32; i++)
+        // Assume that the byte reader's address size is the same as
+        // the architecture's register size. !@#%*^ hilarious.
+        if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
+                                      (i - 16) * reader_->AddressSize())))
+          return false;
+      break;
+    }
+
+    // I'm not sure what this is. GDB doesn't use it for unwinding.
+    case DW_CFA_GNU_args_size:
+      if (!ParseOperands("o", &ops)) return false;
+      break;
+
+    // An opcode we don't recognize.
+    default: {
+      reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
+  Rule* rule = new ValOffsetRule(base_register, offset);
+  rules_.SetCFARule(rule);
+  return rule->Handle(handler_, address_, Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
+  Rule* cfa_rule = rules_.CFARule();
+  if (!cfa_rule) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  cfa_rule->SetOffset(offset);
+  return cfa_rule->Handle(handler_, address_, Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoRule(unsigned reg, Rule* rule) {
+  rules_.SetRegisterRule(reg, rule);
+  return rule->Handle(handler_, address_, reg);
+}
+
+bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
+  if (!rules_.CFARule()) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  return DoRule(reg, new OffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
+  if (!rules_.CFARule()) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  return DoRule(reg, new ValOffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoRestore(unsigned reg) {
+  // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
+  if (entry_->kind == kCIE) {
+    reporter_->RestoreInCIE(entry_->offset, CursorOffset());
+    return false;
+  }
+  Rule* rule = cie_rules_.RegisterRule(reg);
+  if (!rule) {
+    // This isn't really the right thing to do, but since CFI generally
+    // only mentions callee-saves registers, and GCC's convention for
+    // callee-saves registers is that they are unchanged, it's a good
+    // approximation.
+    rule = new SameValueRule();
+  }
+  return DoRule(reg, rule);
+}
+
+bool CallFrameInfo::ReadEntryPrologue(const char* cursor, Entry* entry) {
+  const char* buffer_end = buffer_ + buffer_length_;
+
+  // Initialize enough of ENTRY for use in error reporting.
+  entry->offset = cursor - buffer_;
+  entry->start = cursor;
+  entry->kind = kUnknown;
+  entry->end = NULL;
+
+  // Read the initial length. This sets reader_'s offset size.
+  size_t length_size;
+  uint64 length = reader_->ReadInitialLength(cursor, &length_size);
+  if (length_size > size_t(buffer_end - cursor)) return ReportIncomplete(entry);
+  cursor += length_size;
+
+  // In a .eh_frame section, a length of zero marks the end of the series
+  // of entries.
+  if (length == 0 && eh_frame_) {
+    entry->kind = kTerminator;
+    entry->end = cursor;
+    return true;
+  }
+
+  // Validate the length.
+  if (length > size_t(buffer_end - cursor)) return ReportIncomplete(entry);
+
+  // The length is the number of bytes after the initial length field;
+  // we have that position handy at this point, so compute the end
+  // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
+  // and the length didn't fit in a size_t, we would have rejected it
+  // above.)
+  entry->end = cursor + length;
+
+  // Parse the next field: either the offset of a CIE or a CIE id.
+  size_t offset_size = reader_->OffsetSize();
+  if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
+  entry->id = reader_->ReadOffset(cursor);
+
+  // Don't advance cursor past id field yet; in .eh_frame data we need
+  // the id's position to compute the section offset of an FDE's CIE.
+
+  // Now we can decide what kind of entry this is.
+  if (eh_frame_) {
+    // In .eh_frame data, an ID of zero marks the entry as a CIE, and
+    // anything else is an offset from the id field of the FDE to the start
+    // of the CIE.
+    if (entry->id == 0) {
+      entry->kind = kCIE;
+    } else {
+      entry->kind = kFDE;
+      // Turn the offset from the id into an offset from the buffer's start.
+      entry->id = (cursor - buffer_) - entry->id;
+    }
+  } else {
+    // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
+    // offset size for the entry) marks the entry as a CIE, and anything
+    // else is the offset of the CIE from the beginning of the section.
+    if (offset_size == 4)
+      entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
+    else {
+      MOZ_ASSERT(offset_size == 8);
+      entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
+    }
+  }
+
+  // Now advance cursor past the id.
+  cursor += offset_size;
+
+  // The fields specific to this kind of entry start here.
+  entry->fields = cursor;
+
+  entry->cie = NULL;
+
+  return true;
+}
+
+bool CallFrameInfo::ReadCIEFields(CIE* cie) {
+  const char* cursor = cie->fields;
+  size_t len;
+
+  MOZ_ASSERT(cie->kind == kCIE);
+
+  // Prepare for early exit.
+  cie->version = 0;
+  cie->augmentation.clear();
+  cie->code_alignment_factor = 0;
+  cie->data_alignment_factor = 0;
+  cie->return_address_register = 0;
+  cie->has_z_augmentation = false;
+  cie->pointer_encoding = DW_EH_PE_absptr;
+  cie->instructions = 0;
+
+  // Parse the version number.
+  if (cie->end - cursor < 1) return ReportIncomplete(cie);
+  cie->version = reader_->ReadOneByte(cursor);
+  cursor++;
+
+  // If we don't recognize the version, we can't parse any more fields of the
+  // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
+  // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
+  // the difference between those versions seems to be the same as for
+  // .debug_frame.
+  if (cie->version < 1 || cie->version > 4) {
+    reporter_->UnrecognizedVersion(cie->offset, cie->version);
+    return false;
+  }
+
+  const char* augmentation_start = cursor;
+  const void* augmentation_end =
+      memchr(augmentation_start, '\0', cie->end - augmentation_start);
+  if (!augmentation_end) return ReportIncomplete(cie);
+  cursor = static_cast<const char*>(augmentation_end);
+  cie->augmentation = string(augmentation_start, cursor - augmentation_start);
+  // Skip the terminating '\0'.
+  cursor++;
+
+  // Is this CFI augmented?
+  if (!cie->augmentation.empty()) {
+    // Is it an augmentation we recognize?
+    if (cie->augmentation[0] == DW_Z_augmentation_start) {
+      // Linux C++ ABI 'z' augmentation, used for exception handling data.
+      cie->has_z_augmentation = true;
+    } else {
+      // Not an augmentation we recognize. Augmentations can have arbitrary
+      // effects on the form of rest of the content, so we have to give up.
+      reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+      return false;
+    }
+  }
+
+  if (cie->version >= 4) {
+    // Check that the address_size and segment_size fields are plausible.
+    if (cie->end - cursor < 2) {
+      return ReportIncomplete(cie);
+    }
+    uint8_t address_size = reader_->ReadOneByte(cursor);
+    cursor++;
+    if (address_size != sizeof(void*)) {
+      // This is not per-se invalid CFI.  But we can reasonably expect to
+      // be running on a target of the same word size as the CFI is for,
+      // so we reject this case.
+      reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid address_size");
+      return false;
+    }
+    uint8_t segment_size = reader_->ReadOneByte(cursor);
+    cursor++;
+    if (segment_size != 0) {
+      // This is also not per-se invalid CFI, but we don't currently handle
+      // the case of non-zero |segment_size|.
+      reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid segment_size");
+      return false;
+    }
+    // We only continue parsing if |segment_size| is zero.  If this routine
+    // is ever changed to allow non-zero |segment_size|, then
+    // ReadFDEFields() below will have to be changed to match, per comments
+    // there.
+  }
+
+  // Parse the code alignment factor.
+  cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
+  if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+  cursor += len;
+
+  // Parse the data alignment factor.
+  cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
+  if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+  cursor += len;
+
+  // Parse the return address register. This is a ubyte in version 1, and
+  // a ULEB128 in version 3.
+  if (cie->version == 1) {
+    if (cursor >= cie->end) return ReportIncomplete(cie);
+    cie->return_address_register = uint8(*cursor++);
+  } else {
+    cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
+    if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+    cursor += len;
+  }
+
+  // If we have a 'z' augmentation string, find the augmentation data and
+  // use the augmentation string to parse it.
+  if (cie->has_z_augmentation) {
+    uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
+    if (size_t(cie->end - cursor) < len + data_size)
+      return ReportIncomplete(cie);
+    cursor += len;
+    const char* data = cursor;
+    cursor += data_size;
+    const char* data_end = cursor;
+
+    cie->has_z_lsda = false;
+    cie->has_z_personality = false;
+    cie->has_z_signal_frame = false;
+
+    // Walk the augmentation string, and extract values from the
+    // augmentation data as the string directs.
+    for (size_t i = 1; i < cie->augmentation.size(); i++) {
+      switch (cie->augmentation[i]) {
+        case DW_Z_has_LSDA:
+          // The CIE's augmentation data holds the language-specific data
+          // area pointer's encoding, and the FDE's augmentation data holds
+          // the pointer itself.
+          cie->has_z_lsda = true;
+          // Fetch the LSDA encoding from the augmentation data.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->lsda_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->lsda_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
+            return false;
+          }
+          // Don't check if the encoding is usable here --- we haven't
+          // read the FDE's fields yet, so we're not prepared for
+          // DW_EH_PE_funcrel, although that's a fine encoding for the
+          // LSDA to use, since it appears in the FDE.
+          break;
+
+        case DW_Z_has_personality_routine:
+          // The CIE's augmentation data holds the personality routine
+          // pointer's encoding, followed by the pointer itself.
+          cie->has_z_personality = true;
+          // Fetch the personality routine pointer's encoding from the
+          // augmentation data.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->personality_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->personality_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset,
+                                              cie->personality_encoding);
+            return false;
+          }
+          if (!reader_->UsableEncoding(cie->personality_encoding)) {
+            reporter_->UnusablePointerEncoding(cie->offset,
+                                               cie->personality_encoding);
+            return false;
+          }
+          // Fetch the personality routine's pointer itself from the data.
+          cie->personality_address = reader_->ReadEncodedPointer(
+              data, cie->personality_encoding, &len);
+          if (len > size_t(data_end - data)) return ReportIncomplete(cie);
+          data += len;
+          break;
+
+        case DW_Z_has_FDE_address_encoding:
+          // The CIE's augmentation data holds the pointer encoding to use
+          // for addresses in the FDE.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->pointer_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->pointer_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset,
+                                              cie->pointer_encoding);
+            return false;
+          }
+          if (!reader_->UsableEncoding(cie->pointer_encoding)) {
+            reporter_->UnusablePointerEncoding(cie->offset,
+                                               cie->pointer_encoding);
+            return false;
+          }
+          break;
+
+        case DW_Z_is_signal_trampoline:
+          // Frames using this CIE are signal delivery frames.
+          cie->has_z_signal_frame = true;
+          break;
+
+        default:
+          // An augmentation we don't recognize.
+          reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+          return false;
+      }
+    }
+  }
+
+  // The CIE's instructions start here.
+  cie->instructions = cursor;
+
+  return true;
+}
+
+bool CallFrameInfo::ReadFDEFields(FDE* fde) {
+  const char* cursor = fde->fields;
+  size_t size;
+
+  // At this point, for Dwarf 4 and above, we are assuming that the
+  // associated CIE has its |segment_size| field equal to zero.  This is
+  // checked for in ReadCIEFields() above.  If ReadCIEFields() is ever
+  // changed to allow non-zero |segment_size| CIEs then we will have to read
+  // the segment_selector value at this point.
+
+  fde->address =
+      reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, &size);
+  if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde);
+  cursor += size;
+  reader_->SetFunctionBase(fde->address);
+
+  // For the length, we strip off the upper nybble of the encoding used for
+  // the starting address.
+  DwarfPointerEncoding length_encoding =
+      DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
+  fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
+  if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde);
+  cursor += size;
+
+  // If the CIE has a 'z' augmentation string, then augmentation data
+  // appears here.
+  if (fde->cie->has_z_augmentation) {
+    uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
+    if (size_t(fde->end - cursor) < size + data_size)
+      return ReportIncomplete(fde);
+    cursor += size;
+
+    // In the abstract, we should walk the augmentation string, and extract
+    // items from the FDE's augmentation data as we encounter augmentation
+    // string characters that specify their presence: the ordering of items
+    // in the augmentation string determines the arrangement of values in
+    // the augmentation data.
+    //
+    // In practice, there's only ever one value in FDE augmentation data
+    // that we support --- the LSDA pointer --- and we have to bail if we
+    // see any unrecognized augmentation string characters. So if there is
+    // anything here at all, we know what it is, and where it starts.
+    if (fde->cie->has_z_lsda) {
+      // Check whether the LSDA's pointer encoding is usable now: only once
+      // we've parsed the FDE's starting address do we call reader_->
+      // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
+      // usable.
+      if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
+        reporter_->UnusablePointerEncoding(fde->cie->offset,
+                                           fde->cie->lsda_encoding);
+        return false;
+      }
+
+      fde->lsda_address =
+          reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
+      if (size > data_size) return ReportIncomplete(fde);
+      // Ideally, we would also complain here if there were unconsumed
+      // augmentation data.
+    }
+
+    cursor += data_size;
+  }
+
+  // The FDE's instructions start after those.
+  fde->instructions = cursor;
+
+  return true;
+}
+
+bool CallFrameInfo::Start() {
+  const char* buffer_end = buffer_ + buffer_length_;
+  const char* cursor;
+  bool all_ok = true;
+  const char* entry_end;
+  bool ok;
+
+  // Traverse all the entries in buffer_, skipping CIEs and offering
+  // FDEs to the handler.
+  for (cursor = buffer_; cursor < buffer_end;
+       cursor = entry_end, all_ok = all_ok && ok) {
+    FDE fde;
+
+    // Make it easy to skip this entry with 'continue': assume that
+    // things are not okay until we've checked all the data, and
+    // prepare the address of the next entry.
+    ok = false;
+
+    // Read the entry's prologue.
+    if (!ReadEntryPrologue(cursor, &fde)) {
+      if (!fde.end) {
+        // If we couldn't even figure out this entry's extent, then we
+        // must stop processing entries altogether.
+        all_ok = false;
+        break;
+      }
+      entry_end = fde.end;
+      continue;
+    }
+
+    // The next iteration picks up after this entry.
+    entry_end = fde.end;
+
+    // Did we see an .eh_frame terminating mark?
+    if (fde.kind == kTerminator) {
+      // If there appears to be more data left in the section after the
+      // terminating mark, warn the user. But this is just a warning;
+      // we leave all_ok true.
+      if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
+      break;
+    }
+
+    // In this loop, we skip CIEs. We only parse them fully when we
+    // parse an FDE that refers to them. This limits our memory
+    // consumption (beyond the buffer itself) to that needed to
+    // process the largest single entry.
+    if (fde.kind != kFDE) {
+      ok = true;
+      continue;
+    }
+
+    // Validate the CIE pointer.
+    if (fde.id > buffer_length_) {
+      reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
+      continue;
+    }
+
+    CIE cie;
+
+    // Parse this FDE's CIE header.
+    if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) continue;
+    // This had better be an actual CIE.
+    if (cie.kind != kCIE) {
+      reporter_->BadCIEId(fde.offset, fde.id);
+      continue;
+    }
+    if (!ReadCIEFields(&cie)) continue;
+
+    // We now have the values that govern both the CIE and the FDE.
+    cie.cie = &cie;
+    fde.cie = &cie;
+
+    // Parse the FDE's header.
+    if (!ReadFDEFields(&fde)) continue;
+
+    // Call Entry to ask the consumer if they're interested.
+    if (!handler_->Entry(fde.offset, fde.address, fde.size, cie.version,
+                         cie.augmentation, cie.return_address_register)) {
+      // The handler isn't interested in this entry. That's not an error.
+      ok = true;
+      continue;
+    }
+
+    if (cie.has_z_augmentation) {
+      // Report the personality routine address, if we have one.
+      if (cie.has_z_personality) {
+        if (!handler_->PersonalityRoutine(
+                cie.personality_address,
+                IsIndirectEncoding(cie.personality_encoding)))
+          continue;
+      }
+
+      // Report the language-specific data area address, if we have one.
+      if (cie.has_z_lsda) {
+        if (!handler_->LanguageSpecificDataArea(
+                fde.lsda_address, IsIndirectEncoding(cie.lsda_encoding)))
+          continue;
+      }
+
+      // If this is a signal-handling frame, report that.
+      if (cie.has_z_signal_frame) {
+        if (!handler_->SignalHandler()) continue;
+      }
+    }
+
+    // Interpret the CIE's instructions, and then the FDE's instructions.
+    State state(reader_, handler_, reporter_, fde.address);
+    ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
+
+    // Tell the ByteReader that the function start address from the
+    // FDE header is no longer valid.
+    reader_->ClearFunctionBase();
+
+    // Report the end of the entry.
+    handler_->End();
+  }
+
+  return all_ok;
+}
+
+const char* CallFrameInfo::KindName(EntryKind kind) {
+  if (kind == CallFrameInfo::kUnknown)
+    return "entry";
+  else if (kind == CallFrameInfo::kCIE)
+    return "common information entry";
+  else if (kind == CallFrameInfo::kFDE)
+    return "frame description entry";
+  else {
+    MOZ_ASSERT(kind == CallFrameInfo::kTerminator);
+    return ".eh_frame sequence terminator";
+  }
+}
+
+bool CallFrameInfo::ReportIncomplete(Entry* entry) {
+  reporter_->Incomplete(entry->offset, entry->kind);
+  return false;
+}
+
+void CallFrameInfo::Reporter::Incomplete(uint64 offset,
+                                         CallFrameInfo::EntryKind kind) {
+  char buf[300];
+  SprintfLiteral(buf, "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
+                 " before end of section contents\n",
+                 filename_.c_str(), offset, section_.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
+                                                   uint64 cie_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE pointer is out of range: 0x%llx\n",
+                 filename_.c_str(), offset, section_.c_str(), cie_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE pointer does not point to a CIE: 0x%llx\n",
+                 filename_.c_str(), offset, section_.c_str(), cie_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE specifies unrecognized version: %d\n",
+                 filename_.c_str(), offset, section_.c_str(), version);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
+                                                       const string& aug) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE specifies unrecognized augmentation: '%s'\n",
+                 filename_.c_str(), offset, section_.c_str(), aug.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::InvalidDwarf4Artefact(uint64 offset,
+                                                    const char* what) {
+  char* what_safe = strndup(what, 100);
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE specifies invalid Dwarf4 artefact: %s\n",
+                 filename_.c_str(), offset, section_.c_str(), what_safe);
+  log_(buf);
+  free(what_safe);
+}
+
+void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
+                                                     uint8 encoding) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " 'z' augmentation specifies invalid pointer encoding: "
+                 "0x%02x\n",
+                 filename_.c_str(), offset, section_.c_str(), encoding);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
+                                                      uint8 encoding) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " 'z' augmentation specifies a pointer encoding for which"
+                 " we have no base address: 0x%02x\n",
+                 filename_.c_str(), offset, section_.c_str(), encoding);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " the DW_CFA_restore instruction at offset 0x%llx"
+                 " cannot be used in a common information entry\n",
+                 filename_.c_str(), offset, section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
+                                             CallFrameInfo::EntryKind kind,
+                                             uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the instruction at offset 0x%llx is unrecognized\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
+                                        CallFrameInfo::EntryKind kind,
+                                        uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the instruction at offset 0x%llx assumes that a CFA rule "
+                 "has been set, but none has been set\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
+                                              CallFrameInfo::EntryKind kind,
+                                              uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the DW_CFA_restore_state instruction at offset 0x%llx"
+                 " should pop a saved state from the stack, but the stack "
+                 "is empty\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
+                                              CallFrameInfo::EntryKind kind,
+                                              uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the DW_CFA_restore_state instruction at offset 0x%llx"
+                 " would clear the CFA rule in effect\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::I386() {
+  /*
+   8 "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi",
+   3 "$eip", "$eflags", "$unused1",
+   8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7",
+   2 "$unused2", "$unused3",
+   8 "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7",
+   8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7",
+   3 "$fcw", "$fsw", "$mxcsr",
+   8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5",
+   2 "$tr", "$ldtr"
+  */
+  return 8 + 3 + 8 + 2 + 8 + 8 + 3 + 8 + 2;
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::X86_64() {
+  /*
+   8 "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp",
+   8 "$r8",  "$r9",  "$r10", "$r11", "$r12", "$r13", "$r14", "$r15",
+   1 "$rip",
+   8 "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7",
+   8 "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15",
+   8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7",
+   8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7",
+   1 "$rflags",
+   8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2",
+   4 "$fs.base", "$gs.base", "$unused3", "$unused4",
+   2 "$tr", "$ldtr",
+   3 "$mxcsr", "$fcw", "$fsw"
+  */
+  return 8 + 8 + 1 + 8 + 8 + 8 + 8 + 1 + 8 + 4 + 2 + 3;
+}
+
+// Per ARM IHI 0040A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM() {
+  /*
+   8 "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+   8 "r8",  "r9",  "r10", "r11", "r12", "sp",  "lr",  "pc",
+   8 "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
+   8 "fps", "cpsr", "",   "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
+   8 "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
+   8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+   8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
+   8 "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7"
+  */
+  return 13 * 8;
+}
+
+// Per ARM IHI 0057A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM64() {
+  /*
+   8 "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+   8 "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+   8 "x16"  "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+   8 "x24", "x25", "x26", "x27", "x28", "x29",  "x30","sp",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+   8 "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+   8 "v16", "v17", "v18", "v19", "v20", "v21", "v22,  "v23",
+   8 "v24", "x25", "x26,  "x27", "v28", "v29", "v30", "v31",
+  */
+  return 12 * 8;
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::MIPS() {
+  /*
+   8 "$zero", "$at",  "$v0",  "$v1",  "$a0",   "$a1",  "$a2",  "$a3",
+   8 "$t0",   "$t1",  "$t2",  "$t3",  "$t4",   "$t5",  "$t6",  "$t7",
+   8 "$s0",   "$s1",  "$s2",  "$s3",  "$s4",   "$s5",  "$s6",  "$s7",
+   8 "$t8",   "$t9",  "$k0",  "$k1",  "$gp",   "$sp",  "$fp",  "$ra",
+   9 "$lo",   "$hi",  "$pc",  "$f0",  "$f1",   "$f2",  "$f3",  "$f4",  "$f5",
+   8 "$f6",   "$f7",  "$f8",  "$f9",  "$f10",  "$f11", "$f12", "$f13",
+   7 "$f14",  "$f15", "$f16", "$f17", "$f18",  "$f19", "$f20",
+   7 "$f21",  "$f22", "$f23", "$f24", "$f25",  "$f26", "$f27",
+   6 "$f28",  "$f29", "$f30", "$f31", "$fcsr", "$fir"
+  */
+  return 8 + 8 + 8 + 8 + 9 + 8 + 7 + 7 + 6;
+}
+
+// See prototype for comments.
+int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, string expr,
+                       bool debug, bool pushCfaAtStart, bool derefAtEnd) {
+  const char* cursor = expr.c_str();
+  const char* end1 = cursor + expr.length();
+
+  char buf[100];
+  if (debug) {
+    SprintfLiteral(buf, "LUL.DW  << DwarfExpr, len is %d\n",
+                   (int)(end1 - cursor));
+    summ->Log(buf);
+  }
+
+  // Add a marker for the start of this expression.  In it, indicate
+  // whether or not the CFA should be pushed onto the stack prior to
+  // evaluation.
+  int32_t start_ix =
+      summ->AddPfxInstr(PfxInstr(PX_Start, pushCfaAtStart ? 1 : 0));
+  MOZ_ASSERT(start_ix >= 0);
+
+  while (cursor < end1) {
+    uint8 opc = reader->ReadOneByte(cursor);
+    cursor++;
+
+    const char* nm = nullptr;
+    PfxExprOp pxop = PX_End;
+
+    switch (opc) {
+      case DW_OP_lit0 ... DW_OP_lit31: {
+        int32_t simm32 = (int32_t)(opc - DW_OP_lit0);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_lit%d\n", (int)simm32);
+          summ->Log(buf);
+        }
+        (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, simm32));
+        break;
+      }
+
+      case DW_OP_breg0 ... DW_OP_breg31: {
+        size_t len;
+        int64_t n = reader->ReadSignedLEB128(cursor, &len);
+        cursor += len;
+        DW_REG_NUMBER reg = (DW_REG_NUMBER)(opc - DW_OP_breg0);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_breg%d %lld\n", (int)reg,
+                         (long long int)n);
+          summ->Log(buf);
+        }
+        // PfxInstr only allows a 32 bit signed offset.  So we
+        // must fail if the immediate is out of range.
+        if (n < INT32_MIN || INT32_MAX < n) goto fail;
+        (void)summ->AddPfxInstr(PfxInstr(PX_DwReg, reg));
+        (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, (int32_t)n));
+        (void)summ->AddPfxInstr(PfxInstr(PX_Add));
+        break;
+      }
+
+      case DW_OP_const4s: {
+        uint64_t u64 = reader->ReadFourBytes(cursor);
+        cursor += 4;
+        // u64 is guaranteed by |ReadFourBytes| to be in the
+        // range 0 .. FFFFFFFF inclusive.  But to be safe:
+        uint32_t u32 = (uint32_t)(u64 & 0xFFFFFFFF);
+        int32_t s32 = (int32_t)u32;
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_const4s %d\n", (int)s32);
+          summ->Log(buf);
+        }
+        (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, s32));
+        break;
+      }
+
+      case DW_OP_deref:
+        nm = "deref";
+        pxop = PX_Deref;
+        goto no_operands;
+      case DW_OP_and:
+        nm = "and";
+        pxop = PX_And;
+        goto no_operands;
+      case DW_OP_plus:
+        nm = "plus";
+        pxop = PX_Add;
+        goto no_operands;
+      case DW_OP_minus:
+        nm = "minus";
+        pxop = PX_Sub;
+        goto no_operands;
+      case DW_OP_shl:
+        nm = "shl";
+        pxop = PX_Shl;
+        goto no_operands;
+      case DW_OP_ge:
+        nm = "ge";
+        pxop = PX_CmpGES;
+        goto no_operands;
+      no_operands:
+        MOZ_ASSERT(nm && pxop != PX_End);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_%s\n", nm);
+          summ->Log(buf);
+        }
+        (void)summ->AddPfxInstr(PfxInstr(pxop));
+        break;
+
+      default:
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   unknown opc %d\n", (int)opc);
+          summ->Log(buf);
+        }
+        goto fail;
+
+    }  // switch (opc)
+
+  }  // while (cursor < end1)
+
+  MOZ_ASSERT(cursor >= end1);
+
+  if (cursor > end1) {
+    // We overran the Dwarf expression.  Give up.
+    goto fail;
+  }
+
+  // For DW_CFA_expression, what the expression denotes is the address
+  // of where the previous value is located.  The caller of this routine
+  // may therefore request one last dereference before the end marker is
+  // inserted.
+  if (derefAtEnd) {
+    (void)summ->AddPfxInstr(PfxInstr(PX_Deref));
+  }
+
+  // Insert an end marker, and declare success.
+  (void)summ->AddPfxInstr(PfxInstr(PX_End));
+  if (debug) {
+    SprintfLiteral(buf,
+                   "LUL.DW   conversion of dwarf expression succeeded, "
+                   "ix = %d\n",
+                   (int)start_ix);
+    summ->Log(buf);
+    summ->Log("LUL.DW  >>\n");
+  }
+  return start_ix;
+
+fail:
+  if (debug) {
+    summ->Log("LUL.DW   conversion of dwarf expression failed\n");
+    summ->Log("LUL.DW  >>\n");
+  }
+  return -1;
+}
+
+bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length,
+                             uint8 version, const string& augmentation,
+                             unsigned return_address) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW DwarfCFIToModule::Entry 0x%llx,+%lld\n",
+                   address, length);
+    summ_->Log(buf);
+  }
+
+  summ_->Entry(address, length);
+
+  // If dwarf2reader::CallFrameInfo can handle this version and
+  // augmentation, then we should be okay with that, so there's no
+  // need to check them here.
+
+  // Get ready to collect entries.
+  return_address_ = return_address;
+
+  // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI
+  // may not establish any rule for .ra if the return address column
+  // is an ordinary register, and that register holds the return
+  // address on entry to the function. So establish an initial .ra
+  // rule citing the return address register.
+  if (return_address_ < num_dw_regs_) {
+    summ_->Rule(address, return_address_, NODEREF, return_address, 0);
+  }
+
+  return true;
+}
+
+const UniqueString* DwarfCFIToModule::RegisterName(int i) {
+  if (i < 0) {
+    MOZ_ASSERT(i == kCFARegister);
+    return usu_->ToUniqueString(".cfa");
+  }
+  unsigned reg = i;
+  if (reg == return_address_) return usu_->ToUniqueString(".ra");
+
+  char buf[30];
+  SprintfLiteral(buf, "dwarf_reg_%u", reg);
+  return usu_->ToUniqueString(buf);
+}
+
+bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) {
+  reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg));
+  // Treat this as a non-fatal error.
+  return true;
+}
+
+bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = Same\n", address, reg);
+    summ_->Log(buf);
+  }
+  // reg + 0
+  summ_->Rule(address, reg, NODEREF, reg, 0);
+  return true;
+}
+
+bool DwarfCFIToModule::OffsetRule(uint64 address, int reg, int base_register,
+                                  long offset) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = *(r%d + %ld)\n", address,
+                   reg, base_register, offset);
+    summ_->Log(buf);
+  }
+  // *(base_register + offset)
+  summ_->Rule(address, reg, DEREF, base_register, offset);
+  return true;
+}
+
+bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg, int base_register,
+                                     long offset) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = r%d + %ld\n", address, reg,
+                   base_register, offset);
+    summ_->Log(buf);
+  }
+  // base_register + offset
+  summ_->Rule(address, reg, NODEREF, base_register, offset);
+  return true;
+}
+
+bool DwarfCFIToModule::RegisterRule(uint64 address, int reg,
+                                    int base_register) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = r%d\n", address, reg,
+                   base_register);
+    summ_->Log(buf);
+  }
+  // base_register + 0
+  summ_->Rule(address, reg, NODEREF, base_register, 0);
+  return true;
+}
+
+bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg,
+                                      const string& expression) {
+  bool debug = !!DEBUG_DWARF;
+  int32_t start_ix =
+      parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/,
+                     true /*derefAtEnd*/);
+  if (start_ix >= 0) {
+    summ_->Rule(address, reg, PFXEXPR, 0, start_ix);
+  } else {
+    // Parsing of the Dwarf expression failed.  Treat this as a
+    // non-fatal error, hence return |true| even on this path.
+    reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg));
+  }
+  return true;
+}
+
+bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg,
+                                         const string& expression) {
+  bool debug = !!DEBUG_DWARF;
+  int32_t start_ix =
+      parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/,
+                     false /*!derefAtEnd*/);
+  if (start_ix >= 0) {
+    summ_->Rule(address, reg, PFXEXPR, 0, start_ix);
+  } else {
+    // Parsing of the Dwarf expression failed.  Treat this as a
+    // non-fatal error, hence return |true| even on this path.
+    reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg));
+  }
+  return true;
+}
+
+bool DwarfCFIToModule::End() {
+  // module_->AddStackFrameEntry(entry_);
+  if (DEBUG_DWARF) {
+    summ_->Log("LUL.DW DwarfCFIToModule::End()\n");
+  }
+  summ_->End();
+  return true;
+}
+
+void DwarfCFIToModule::Reporter::UndefinedNotSupported(
+    size_t offset, const UniqueString* reg) {
+  char buf[300];
+  SprintfLiteral(buf, "DwarfCFIToModule::Reporter::UndefinedNotSupported()\n");
+  log_(buf);
+  // BPLOG(INFO) << file_ << ", section '" << section_
+  //  << "': the call frame entry at offset 0x"
+  //  << std::setbase(16) << offset << std::setbase(10)
+  //  << " sets the rule for register '" << FromUniqueString(reg)
+  //  << "' to 'undefined', but the Breakpad symbol file format cannot "
+  //  << " express this";
+}
+
+// FIXME: move this somewhere sensible
+static bool is_power_of_2(uint64_t n) {
+  int i, nSetBits = 0;
+  for (i = 0; i < 8 * (int)sizeof(n); i++) {
+    if ((n & ((uint64_t)1) << i) != 0) nSetBits++;
+  }
+  return nSetBits <= 1;
+}
+
+void DwarfCFIToModule::Reporter::ExpressionCouldNotBeSummarised(
+    size_t offset, const UniqueString* reg) {
+  static uint64_t n_complaints = 0;  // This isn't threadsafe
+  n_complaints++;
+  if (!is_power_of_2(n_complaints)) return;
+  char buf[300];
+  SprintfLiteral(buf,
+                 "DwarfCFIToModule::Reporter::"
+                 "ExpressionCouldNotBeSummarised(shown %llu times)\n",
+                 (unsigned long long int)n_complaints);
+  log_(buf);
+}
+
+}  // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulDwarfExt.h b/mozglue/baseprofiler/lul/LulDwarfExt.h
new file mode 100644
index 0000000000..dcd2500e5a
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarfExt.h
@@ -0,0 +1,1289 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright 2006, 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/types.h
+//   src/common/dwarf/dwarf2enums.h
+//   src/common/dwarf/bytereader.h
+//   src/common/dwarf_cfi_to_module.h
+//   src/common/dwarf/dwarf2reader.h
+
+#ifndef LulDwarfExt_h
+#define LulDwarfExt_h
+
+#include "LulDwarfSummariser.h"
+
+#include "mozilla/Assertions.h"
+
+#include <stdint.h>
+#include <string>
+
+typedef signed char int8;
+typedef short int16;
+typedef int int32;
+typedef long long int64;
+
+typedef unsigned char uint8;
+typedef unsigned short uint16;
+typedef unsigned int uint32;
+typedef unsigned long long uint64;
+
+#ifdef __PTRDIFF_TYPE__
+typedef __PTRDIFF_TYPE__ intptr;
+typedef unsigned __PTRDIFF_TYPE__ uintptr;
+#else
+#  error "Can't find pointer-sized integral types."
+#endif
+
+namespace lul {
+
+class UniqueString;
+
+// Exception handling frame description pointer formats, as described
+// by the Linux Standard Base Core Specification 4.0, section 11.5,
+// DWARF Extensions.
+enum DwarfPointerEncoding {
+  DW_EH_PE_absptr = 0x00,
+  DW_EH_PE_omit = 0xff,
+  DW_EH_PE_uleb128 = 0x01,
+  DW_EH_PE_udata2 = 0x02,
+  DW_EH_PE_udata4 = 0x03,
+  DW_EH_PE_udata8 = 0x04,
+  DW_EH_PE_sleb128 = 0x09,
+  DW_EH_PE_sdata2 = 0x0A,
+  DW_EH_PE_sdata4 = 0x0B,
+  DW_EH_PE_sdata8 = 0x0C,
+  DW_EH_PE_pcrel = 0x10,
+  DW_EH_PE_textrel = 0x20,
+  DW_EH_PE_datarel = 0x30,
+  DW_EH_PE_funcrel = 0x40,
+  DW_EH_PE_aligned = 0x50,
+
+  // The GNU toolchain sources define this enum value as well,
+  // simply to help classify the lower nybble values into signed and
+  // unsigned groups.
+  DW_EH_PE_signed = 0x08,
+
+  // This is not documented in LSB 4.0, but it is used in both the
+  // Linux and OS X toolchains. It can be added to any other
+  // encoding (except DW_EH_PE_aligned), and indicates that the
+  // encoded value represents the address at which the true address
+  // is stored, not the true address itself.
+  DW_EH_PE_indirect = 0x80
+};
+
+// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
+// because it conflicts with a macro
+enum Endianness { ENDIANNESS_BIG, ENDIANNESS_LITTLE };
+
+// A ByteReader knows how to read single- and multi-byte values of
+// various endiannesses, sizes, and encodings, as used in DWARF
+// debugging information and Linux C++ exception handling data.
+class ByteReader {
+ public:
+  // Construct a ByteReader capable of reading one-, two-, four-, and
+  // eight-byte values according to ENDIANNESS, absolute machine-sized
+  // addresses, DWARF-style "initial length" values, signed and
+  // unsigned LEB128 numbers, and Linux C++ exception handling data's
+  // encoded pointers.
+  explicit ByteReader(enum Endianness endianness);
+  virtual ~ByteReader();
+
+  // Read a single byte from BUFFER and return it as an unsigned 8 bit
+  // number.
+  uint8 ReadOneByte(const char* buffer) const;
+
+  // Read two bytes from BUFFER and return them as an unsigned 16 bit
+  // number, using this ByteReader's endianness.
+  uint16 ReadTwoBytes(const char* buffer) const;
+
+  // Read four bytes from BUFFER and return them as an unsigned 32 bit
+  // number, using this ByteReader's endianness. This function returns
+  // a uint64 so that it is compatible with ReadAddress and
+  // ReadOffset. The number it returns will never be outside the range
+  // of an unsigned 32 bit integer.
+  uint64 ReadFourBytes(const char* buffer) const;
+
+  // Read eight bytes from BUFFER and return them as an unsigned 64
+  // bit number, using this ByteReader's endianness.
+  uint64 ReadEightBytes(const char* buffer) const;
+
+  // Read an unsigned LEB128 (Little Endian Base 128) number from
+  // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
+  // the number of bytes read.
+  //
+  // The unsigned LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between 0 and 0x7f, then its unsigned LEB128
+  //   representation is a single byte whose value is N.
+  //
+  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the unsigned LEB128 representation of N /
+  //   128, rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
+  uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;
+
+  // Read a signed LEB128 number from BUFFER and return it as an
+  // signed 64 bit integer. Set LEN to the number of bytes read.
+  //
+  // The signed LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between -0x40 and 0x3f, then its signed LEB128
+  //   representation is a single byte whose value is N in two's
+  //   complement.
+  //
+  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the signed LEB128 representation of N / 128,
+  //   rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
+  int64 ReadSignedLEB128(const char* buffer, size_t* len) const;
+
+  // Indicate that addresses on this architecture are SIZE bytes long. SIZE
+  // must be either 4 or 8. (DWARF allows addresses to be any number of
+  // bytes in length from 1 to 255, but we only support 32- and 64-bit
+  // addresses at the moment.) You must call this before using the
+  // ReadAddress member function.
+  //
+  // For data in a .debug_info section, or something that .debug_info
+  // refers to like line number or macro data, the compilation unit
+  // header's address_size field indicates the address size to use. Call
+  // frame information doesn't indicate its address size (a shortcoming of
+  // the spec); you must supply the appropriate size based on the
+  // architecture of the target machine.
+  void SetAddressSize(uint8 size);
+
+  // Return the current address size, in bytes. This is either 4,
+  // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
+  uint8 AddressSize() const { return address_size_; }
+
+  // Read an address from BUFFER and return it as an unsigned 64 bit
+  // integer, respecting this ByteReader's endianness and address size. You
+  // must call SetAddressSize before calling this function.
+  uint64 ReadAddress(const char* buffer) const;
+
+  // DWARF actually defines two slightly different formats: 32-bit DWARF
+  // and 64-bit DWARF. This is *not* related to the size of registers or
+  // addresses on the target machine; it refers only to the size of section
+  // offsets and data lengths appearing in the DWARF data. One only needs
+  // 64-bit DWARF when the debugging data itself is larger than 4GiB.
+  // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
+  // debugging data itself is very large.
+  //
+  // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
+  // compilation unit and call frame information entry begins with an
+  // "initial length" field, which, in addition to giving the length of the
+  // data, also indicates the size of section offsets and lengths appearing
+  // in that data. The ReadInitialLength member function, below, reads an
+  // initial length and sets the ByteReader's offset size as a side effect.
+  // Thus, in the normal process of reading DWARF data, the appropriate
+  // offset size is set automatically. So, you should only need to call
+  // SetOffsetSize if you are using the same ByteReader to jump from the
+  // midst of one block of DWARF data into another.
+
+  // Read a DWARF "initial length" field from START, and return it as
+  // an unsigned 64 bit integer, respecting this ByteReader's
+  // endianness. Set *LEN to the length of the initial length in
+  // bytes, either four or twelve. As a side effect, set this
+  // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
+  // initial length) or 8 (if we see a 64-bit DWARF initial length).
+  //
+  // A DWARF initial length is either:
+  //
+  // - a byte count stored as an unsigned 32-bit value less than
+  //   0xffffff00, indicating that the data whose length is being
+  //   measured uses the 32-bit DWARF format, or
+  //
+  // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
+  //   indicating that the data whose length is being measured uses
+  //   the 64-bit DWARF format.
+  uint64 ReadInitialLength(const char* start, size_t* len);
+
+  // Read an offset from BUFFER and return it as an unsigned 64 bit
+  // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
+  // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
+  // long. You must call ReadInitialLength or SetOffsetSize before calling
+  // this function; see the comments above for details.
+  uint64 ReadOffset(const char* buffer) const;
+
+  // Return the current offset size, in bytes.
+  // A return value of 4 indicates that we are reading 32-bit DWARF.
+  // A return value of 8 indicates that we are reading 64-bit DWARF.
+  uint8 OffsetSize() const { return offset_size_; }
+
+  // Indicate that section offsets and lengths are SIZE bytes long. SIZE
+  // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
+  // Usually, you should not call this function yourself; instead, let a
+  // call to ReadInitialLength establish the data's offset size
+  // automatically.
+  void SetOffsetSize(uint8 size);
+
+  // The Linux C++ ABI uses a variant of DWARF call frame information
+  // for exception handling. This data is included in the program's
+  // address space as the ".eh_frame" section, and intepreted at
+  // runtime to walk the stack, find exception handlers, and run
+  // cleanup code. The format is mostly the same as DWARF CFI, with
+  // some adjustments made to provide the additional
+  // exception-handling data, and to make the data easier to work with
+  // in memory --- for example, to allow it to be placed in read-only
+  // memory even when describing position-independent code.
+  //
+  // In particular, exception handling data can select a number of
+  // different encodings for pointers that appear in the data, as
+  // described by the DwarfPointerEncoding enum. There are actually
+  // four axes(!) to the encoding:
+  //
+  // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
+  //   the DWARF LEB128 encoding.
+  //
+  // - The pointer's signedness: pointers can be signed or unsigned.
+  //
+  // - The pointer's base address: the data stored in the exception
+  //   handling data can be the actual address (that is, an absolute
+  //   pointer), or relative to one of a number of different base
+  //   addreses --- including that of the encoded pointer itself, for
+  //   a form of "pc-relative" addressing.
+  //
+  // - The pointer may be indirect: it may be the address where the
+  //   true pointer is stored. (This is used to refer to things via
+  //   global offset table entries, program linkage table entries, or
+  //   other tricks used in position-independent code.)
+  //
+  // There are also two options that fall outside that matrix
+  // altogether: the pointer may be omitted, or it may have padding to
+  // align it on an appropriate address boundary. (That last option
+  // may seem like it should be just another axis, but it is not.)
+
+  // Indicate that the exception handling data is loaded starting at
+  // SECTION_BASE, and that the start of its buffer in our own memory
+  // is BUFFER_BASE. This allows us to find the address that a given
+  // byte in our buffer would have when loaded into the program the
+  // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
+  void SetCFIDataBase(uint64 section_base, const char* buffer_base);
+
+  // Indicate that the base address of the program's ".text" section
+  // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
+  void SetTextBase(uint64 text_base);
+
+  // Indicate that the base address for DW_EH_PE_datarel pointers is
+  // DATA_BASE. The proper value depends on the ABI; it is usually the
+  // address of the global offset table, held in a designated register in
+  // position-independent code. You will need to look at the startup code
+  // for the target system to be sure. I tried; my eyes bled.
+  void SetDataBase(uint64 data_base);
+
+  // Indicate that the base address for the FDE we are processing is
+  // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
+  // pointers. (This encoding does not seem to be used by the GNU
+  // toolchain.)
+  void SetFunctionBase(uint64 function_base);
+
+  // Indicate that we are no longer processing any FDE, so any use of
+  // a DW_EH_PE_funcrel encoding is an error.
+  void ClearFunctionBase();
+
+  // Return true if ENCODING is a valid pointer encoding.
+  bool ValidEncoding(DwarfPointerEncoding encoding) const;
+
+  // Return true if we have all the information we need to read a
+  // pointer that uses ENCODING. This checks that the appropriate
+  // SetFooBase function for ENCODING has been called.
+  bool UsableEncoding(DwarfPointerEncoding encoding) const;
+
+  // Read an encoded pointer from BUFFER using ENCODING; return the
+  // absolute address it represents, and set *LEN to the pointer's
+  // length in bytes, including any padding for aligned pointers.
+  //
+  // This function calls 'abort' if ENCODING is invalid or refers to a
+  // base address this reader hasn't been given, so you should check
+  // with ValidEncoding and UsableEncoding first if you would rather
+  // die in a more helpful way.
+  uint64 ReadEncodedPointer(const char* buffer, DwarfPointerEncoding encoding,
+                            size_t* len) const;
+
+ private:
+  // Function pointer type for our address and offset readers.
+  typedef uint64 (ByteReader::*AddressReader)(const char*) const;
+
+  // Read an offset from BUFFER and return it as an unsigned 64 bit
+  // integer.  DWARF2/3 define offsets as either 4 or 8 bytes,
+  // generally depending on the amount of DWARF2/3 info present.
+  // This function pointer gets set by SetOffsetSize.
+  AddressReader offset_reader_;
+
+  // Read an address from BUFFER and return it as an unsigned 64 bit
+  // integer.  DWARF2/3 allow addresses to be any size from 0-255
+  // bytes currently.  Internally we support 4 and 8 byte addresses,
+  // and will CHECK on anything else.
+  // This function pointer gets set by SetAddressSize.
+  AddressReader address_reader_;
+
+  Endianness endian_;
+  uint8 address_size_;
+  uint8 offset_size_;
+
+  // Base addresses for Linux C++ exception handling data's encoded pointers.
+  bool have_section_base_, have_text_base_, have_data_base_;
+  bool have_function_base_;
+  uint64 section_base_;
+  uint64 text_base_, data_base_, function_base_;
+  const char* buffer_base_;
+};
+
+inline uint8 ByteReader::ReadOneByte(const char* buffer) const {
+  return buffer[0];
+}
+
+inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const {
+  const unsigned char* buffer =
+      reinterpret_cast<const unsigned char*>(signed_buffer);
+  const uint16 buffer0 = buffer[0];
+  const uint16 buffer1 = buffer[1];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8;
+  } else {
+    return buffer1 | buffer0 << 8;
+  }
+}
+
+inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const {
+  const unsigned char* buffer =
+      reinterpret_cast<const unsigned char*>(signed_buffer);
+  const uint32 buffer0 = buffer[0];
+  const uint32 buffer1 = buffer[1];
+  const uint32 buffer2 = buffer[2];
+  const uint32 buffer3 = buffer[3];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24;
+  } else {
+    return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24;
+  }
+}
+
+inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const {
+  const unsigned char* buffer =
+      reinterpret_cast<const unsigned char*>(signed_buffer);
+  const uint64 buffer0 = buffer[0];
+  const uint64 buffer1 = buffer[1];
+  const uint64 buffer2 = buffer[2];
+  const uint64 buffer3 = buffer[3];
+  const uint64 buffer4 = buffer[4];
+  const uint64 buffer5 = buffer[5];
+  const uint64 buffer6 = buffer[6];
+  const uint64 buffer7 = buffer[7];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 |
+           buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56;
+  } else {
+    return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 |
+           buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56;
+  }
+}
+
+// Read an unsigned LEB128 number.  Each byte contains 7 bits of
+// information, plus one bit saying whether the number continues or
+// not.
+
+inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer,
+                                             size_t* len) const {
+  uint64 result = 0;
+  size_t num_read = 0;
+  unsigned int shift = 0;
+  unsigned char byte;
+
+  do {
+    byte = *buffer++;
+    num_read++;
+
+    result |= (static_cast<uint64>(byte & 0x7f)) << shift;
+
+    shift += 7;
+
+  } while (byte & 0x80);
+
+  *len = num_read;
+
+  return result;
+}
+
+// Read a signed LEB128 number.  These are like regular LEB128
+// numbers, except the last byte may have a sign bit set.
+
+inline int64 ByteReader::ReadSignedLEB128(const char* buffer,
+                                          size_t* len) const {
+  int64 result = 0;
+  unsigned int shift = 0;
+  size_t num_read = 0;
+  unsigned char byte;
+
+  do {
+    byte = *buffer++;
+    num_read++;
+    result |= (static_cast<uint64>(byte & 0x7f) << shift);
+    shift += 7;
+  } while (byte & 0x80);
+
+  if ((shift < 8 * sizeof(result)) && (byte & 0x40))
+    result |= -((static_cast<int64>(1)) << shift);
+  *len = num_read;
+  return result;
+}
+
+inline uint64 ByteReader::ReadOffset(const char* buffer) const {
+  MOZ_ASSERT(this->offset_reader_);
+  return (this->*offset_reader_)(buffer);
+}
+
+inline uint64 ByteReader::ReadAddress(const char* buffer) const {
+  MOZ_ASSERT(this->address_reader_);
+  return (this->*address_reader_)(buffer);
+}
+
+inline void ByteReader::SetCFIDataBase(uint64 section_base,
+                                       const char* buffer_base) {
+  section_base_ = section_base;
+  buffer_base_ = buffer_base;
+  have_section_base_ = true;
+}
+
+inline void ByteReader::SetTextBase(uint64 text_base) {
+  text_base_ = text_base;
+  have_text_base_ = true;
+}
+
+inline void ByteReader::SetDataBase(uint64 data_base) {
+  data_base_ = data_base;
+  have_data_base_ = true;
+}
+
+inline void ByteReader::SetFunctionBase(uint64 function_base) {
+  function_base_ = function_base;
+  have_function_base_ = true;
+}
+
+inline void ByteReader::ClearFunctionBase() { have_function_base_ = false; }
+
+// (derived from)
+// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which
+// accepts parsed DWARF call frame info and adds it to a Summariser object.
+
+// This class is a reader for DWARF's Call Frame Information.  CFI
+// describes how to unwind stack frames --- even for functions that do
+// not follow fixed conventions for saving registers, whose frame size
+// varies as they execute, etc.
+//
+// CFI describes, at each machine instruction, how to compute the
+// stack frame's base address, how to find the return address, and
+// where to find the saved values of the caller's registers (if the
+// callee has stashed them somewhere to free up the registers for its
+// own use).
+//
+// For example, suppose we have a function whose machine code looks
+// like this (imagine an assembly language that looks like C, for a
+// machine with 32-bit registers, and a stack that grows towards lower
+// addresses):
+//
+// func:                                ; entry point; return address at sp
+// func+0:      sp = sp - 16            ; allocate space for stack frame
+// func+1:      sp[12] = r0             ; save r0 at sp+12
+// ...                                  ; other code, not frame-related
+// func+10:     sp -= 4; *sp = x        ; push some x on the stack
+// ...                                  ; other code, not frame-related
+// func+20:     r0 = sp[16]             ; restore saved r0
+// func+21:     sp += 20                ; pop whole stack frame
+// func+22:     pc = *sp; sp += 4       ; pop return address and jump to it
+//
+// DWARF CFI is (a very compressed representation of) a table with a
+// row for each machine instruction address and a column for each
+// register showing how to restore it, if possible.
+//
+// A special column named "CFA", for "Canonical Frame Address", tells how
+// to compute the base address of the frame; registers' entries may
+// refer to the CFA in describing where the registers are saved.
+//
+// Another special column, named "RA", represents the return address.
+//
+// For example, here is a complete (uncompressed) table describing the
+// function above:
+//
+//     insn      cfa    r0      r1 ...  ra
+//     =======================================
+//     func+0:   sp                     cfa[0]
+//     func+1:   sp+16                  cfa[0]
+//     func+2:   sp+16  cfa[-4]         cfa[0]
+//     func+11:  sp+20  cfa[-4]         cfa[0]
+//     func+21:  sp+20                  cfa[0]
+//     func+22:  sp                     cfa[0]
+//
+// Some things to note here:
+//
+// - Each row describes the state of affairs *before* executing the
+//   instruction at the given address.  Thus, the row for func+0
+//   describes the state before we allocate the stack frame.  In the
+//   next row, the formula for computing the CFA has changed,
+//   reflecting that allocation.
+//
+// - The other entries are written in terms of the CFA; this allows
+//   them to remain unchanged as the stack pointer gets bumped around.
+//   For example, the rule for recovering the return address (the "ra"
+//   column) remains unchanged throughout the function, even as the
+//   stack pointer takes on three different offsets from the return
+//   address.
+//
+// - Although we haven't shown it, most calling conventions designate
+//   "callee-saves" and "caller-saves" registers. The callee must
+//   preserve the values of callee-saves registers; if it uses them,
+//   it must save their original values somewhere, and restore them
+//   before it returns. In contrast, the callee is free to trash
+//   caller-saves registers; if the callee uses these, it will
+//   probably not bother to save them anywhere, and the CFI will
+//   probably mark their values as "unrecoverable".
+//
+//   (However, since the caller cannot assume the callee was going to
+//   save them, caller-saves registers are probably dead in the caller
+//   anyway, so compilers usually don't generate CFA for caller-saves
+//   registers.)
+//
+// - Exactly where the CFA points is a matter of convention that
+//   depends on the architecture and ABI in use. In the example, the
+//   CFA is the value the stack pointer had upon entry to the
+//   function, pointing at the saved return address. But on the x86,
+//   the call frame information generated by GCC follows the
+//   convention that the CFA is the address *after* the saved return
+//   address.
+//
+//   But by definition, the CFA remains constant throughout the
+//   lifetime of the frame. This makes it a useful value for other
+//   columns to refer to. It is also gives debuggers a useful handle
+//   for identifying a frame.
+//
+// If you look at the table above, you'll notice that a given entry is
+// often the same as the one immediately above it: most instructions
+// change only one or two aspects of the stack frame, if they affect
+// it at all. The DWARF format takes advantage of this fact, and
+// reduces the size of the data by mentioning only the addresses and
+// columns at which changes take place. So for the above, DWARF CFI
+// data would only actually mention the following:
+//
+//     insn      cfa    r0      r1 ...  ra
+//     =======================================
+//     func+0:   sp                     cfa[0]
+//     func+1:   sp+16
+//     func+2:          cfa[-4]
+//     func+11:  sp+20
+//     func+21:         r0
+//     func+22:  sp
+//
+// In fact, this is the way the parser reports CFI to the consumer: as
+// a series of statements of the form, "At address X, column Y changed
+// to Z," and related conventions for describing the initial state.
+//
+// Naturally, it would be impractical to have to scan the entire
+// program's CFI, noting changes as we go, just to recover the
+// unwinding rules in effect at one particular instruction. To avoid
+// this, CFI data is grouped into "entries", each of which covers a
+// specified range of addresses and begins with a complete statement
+// of the rules for all recoverable registers at that starting
+// address. Each entry typically covers a single function.
+//
+// Thus, to compute the contents of a given row of the table --- that
+// is, rules for recovering the CFA, RA, and registers at a given
+// instruction --- the consumer should find the entry that covers that
+// instruction's address, start with the initial state supplied at the
+// beginning of the entry, and work forward until it has processed all
+// the changes up to and including those for the present instruction.
+//
+// There are seven kinds of rules that can appear in an entry of the
+// table:
+//
+// - "undefined": The given register is not preserved by the callee;
+//   its value cannot be recovered.
+//
+// - "same value": This register has the same value it did in the callee.
+//
+// - offset(N): The register is saved at offset N from the CFA.
+//
+// - val_offset(N): The value the register had in the caller is the
+//   CFA plus offset N. (This is usually only useful for describing
+//   the stack pointer.)
+//
+// - register(R): The register's value was saved in another register R.
+//
+// - expression(E): Evaluating the DWARF expression E using the
+//   current frame's registers' values yields the address at which the
+//   register was saved.
+//
+// - val_expression(E): Evaluating the DWARF expression E using the
+//   current frame's registers' values yields the value the register
+//   had in the caller.
+
+class CallFrameInfo {
+ public:
+  // The different kinds of entries one finds in CFI. Used internally,
+  // and for error reporting.
+  enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
+
+  // The handler class to which the parser hands the parsed call frame
+  // information.  Defined below.
+  class Handler;
+
+  // A reporter class, which CallFrameInfo uses to report errors
+  // encountered while parsing call frame information.  Defined below.
+  class Reporter;
+
+  // Create a DWARF CFI parser. BUFFER points to the contents of the
+  // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
+  // REPORTER is an error reporter the parser should use to report
+  // problems. READER is a ByteReader instance that has the endianness and
+  // address size set properly. Report the data we find to HANDLER.
+  //
+  // This class can also parse Linux C++ exception handling data, as found
+  // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
+  // placed in loadable segments so that it is present in the program's
+  // address space, and is interpreted by the C++ runtime to search the
+  // call stack for a handler interested in the exception being thrown,
+  // actually pop the frames, and find cleanup code to run.
+  //
+  // There are two differences between the call frame information described
+  // in the DWARF standard and the exception handling data Linux places in
+  // the .eh_frame section:
+  //
+  // - Exception handling data uses uses a different format for call frame
+  //   information entry headers. The distinguished CIE id, the way FDEs
+  //   refer to their CIEs, and the way the end of the series of entries is
+  //   determined are all slightly different.
+  //
+  //   If the constructor's EH_FRAME argument is true, then the
+  //   CallFrameInfo parses the entry headers as Linux C++ exception
+  //   handling data. If EH_FRAME is false or omitted, the CallFrameInfo
+  //   parses standard DWARF call frame information.
+  //
+  // - Linux C++ exception handling data uses CIE augmentation strings
+  //   beginning with 'z' to specify the presence of additional data after
+  //   the CIE and FDE headers and special encodings used for addresses in
+  //   frame description entries.
+  //
+  //   CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
+  //   exception handling data if you have supplied READER with the base
+  //   addresses needed to interpret the pointer encodings that 'z'
+  //   augmentations can specify. See the ByteReader interface for details
+  //   about the base addresses. See the CallFrameInfo::Handler interface
+  //   for details about the additional information one might find in
+  //   'z'-augmented data.
+  //
+  // Thus:
+  //
+  // - If you are parsing standard DWARF CFI, as found in a .debug_frame
+  //   section, you should pass false for the EH_FRAME argument, or omit
+  //   it, and you need not worry about providing READER with the
+  //   additional base addresses.
+  //
+  // - If you want to parse Linux C++ exception handling data from a
+  //   .eh_frame section, you should pass EH_FRAME as true, and call
+  //   READER's Set*Base member functions before calling our Start method.
+  //
+  // - If you want to parse DWARF CFI that uses the 'z' augmentations
+  //   (although I don't think any toolchain ever emits such data), you
+  //   could pass false for EH_FRAME, but call READER's Set*Base members.
+  //
+  // The extensions the Linux C++ ABI makes to DWARF for exception
+  // handling are described here, rather poorly:
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+  //
+  // The mechanics of C++ exception handling, personality routines,
+  // and language-specific data areas are described here, rather nicely:
+  // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
+
+  CallFrameInfo(const char* buffer, size_t buffer_length, ByteReader* reader,
+                Handler* handler, Reporter* reporter, bool eh_frame = false)
+      : buffer_(buffer),
+        buffer_length_(buffer_length),
+        reader_(reader),
+        handler_(handler),
+        reporter_(reporter),
+        eh_frame_(eh_frame) {}
+
+  ~CallFrameInfo() {}
+
+  // Parse the entries in BUFFER, reporting what we find to HANDLER.
+  // Return true if we reach the end of the section successfully, or
+  // false if we encounter an error.
+  bool Start();
+
+  // Return the textual name of KIND. For error reporting.
+  static const char* KindName(EntryKind kind);
+
+ private:
+  struct CIE;
+
+  // A CFI entry, either an FDE or a CIE.
+  struct Entry {
+    // The starting offset of the entry in the section, for error
+    // reporting.
+    size_t offset;
+
+    // The start of this entry in the buffer.
+    const char* start;
+
+    // Which kind of entry this is.
+    //
+    // We want to be able to use this for error reporting even while we're
+    // in the midst of parsing. Error reporting code may assume that kind,
+    // offset, and start fields are valid, although kind may be kUnknown.
+    EntryKind kind;
+
+    // The end of this entry's common prologue (initial length and id), and
+    // the start of this entry's kind-specific fields.
+    const char* fields;
+
+    // The start of this entry's instructions.
+    const char* instructions;
+
+    // The address past the entry's last byte in the buffer. (Note that
+    // since offset points to the entry's initial length field, and the
+    // length field is the number of bytes after that field, this is not
+    // simply buffer_ + offset + length.)
+    const char* end;
+
+    // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
+    // CIE, and the offset of the associated CIE in an FDE.
+    uint64 id;
+
+    // The CIE that applies to this entry, if we've parsed it. If this is a
+    // CIE, then this field points to this structure.
+    CIE* cie;
+  };
+
+  // A common information entry (CIE).
+  struct CIE : public Entry {
+    uint8 version;                     // CFI data version number
+    std::string augmentation;          // vendor format extension markers
+    uint64 code_alignment_factor;      // scale for code address adjustments
+    int data_alignment_factor;         // scale for stack pointer adjustments
+    unsigned return_address_register;  // which register holds the return addr
+
+    // True if this CIE includes Linux C++ ABI 'z' augmentation data.
+    bool has_z_augmentation;
+
+    // Parsed 'z' augmentation data. These are meaningful only if
+    // has_z_augmentation is true.
+    bool has_z_lsda;          // The 'z' augmentation included 'L'.
+    bool has_z_personality;   // The 'z' augmentation included 'P'.
+    bool has_z_signal_frame;  // The 'z' augmentation included 'S'.
+
+    // If has_z_lsda is true, this is the encoding to be used for language-
+    // specific data area pointers in FDEs.
+    DwarfPointerEncoding lsda_encoding;
+
+    // If has_z_personality is true, this is the encoding used for the
+    // personality routine pointer in the augmentation data.
+    DwarfPointerEncoding personality_encoding;
+
+    // If has_z_personality is true, this is the address of the personality
+    // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
+    // address where the personality routine's address is stored.
+    uint64 personality_address;
+
+    // This is the encoding used for addresses in the FDE header and
+    // in DW_CFA_set_loc instructions. This is always valid, whether
+    // or not we saw a 'z' augmentation string; its default value is
+    // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
+    DwarfPointerEncoding pointer_encoding;
+  };
+
+  // A frame description entry (FDE).
+  struct FDE : public Entry {
+    uint64 address;  // start address of described code
+    uint64 size;     // size of described code, in bytes
+
+    // If cie->has_z_lsda is true, then this is the language-specific data
+    // area's address --- or its address's address, if cie->lsda_encoding
+    // has the DW_EH_PE_indirect bit set.
+    uint64 lsda_address;
+  };
+
+  // Internal use.
+  class Rule;
+  class UndefinedRule;
+  class SameValueRule;
+  class OffsetRule;
+  class ValOffsetRule;
+  class RegisterRule;
+  class ExpressionRule;
+  class ValExpressionRule;
+  class RuleMap;
+  class State;
+
+  // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
+  // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
+  // data to parse. On success, populate ENTRY as appropriate, and return
+  // true. On failure, report the problem, and return false. Even if we
+  // return false, set ENTRY->end to the first byte after the entry if we
+  // were able to figure that out, or NULL if we weren't.
+  bool ReadEntryPrologue(const char* cursor, Entry* entry);
+
+  // Parse the fields of a CIE after the entry prologue, including any 'z'
+  // augmentation data. Assume that the 'Entry' fields of CIE are
+  // populated; use CIE->fields and CIE->end as the start and limit for
+  // parsing. On success, populate the rest of *CIE, and return true; on
+  // failure, report the problem and return false.
+  bool ReadCIEFields(CIE* cie);
+
+  // Parse the fields of an FDE after the entry prologue, including any 'z'
+  // augmentation data. Assume that the 'Entry' fields of *FDE are
+  // initialized; use FDE->fields and FDE->end as the start and limit for
+  // parsing. Assume that FDE->cie is fully initialized. On success,
+  // populate the rest of *FDE, and return true; on failure, report the
+  // problem and return false.
+  bool ReadFDEFields(FDE* fde);
+
+  // Report that ENTRY is incomplete, and return false. This is just a
+  // trivial wrapper for invoking reporter_->Incomplete; it provides a
+  // little brevity.
+  bool ReportIncomplete(Entry* entry);
+
+  // Return true if ENCODING has the DW_EH_PE_indirect bit set.
+  static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
+    return encoding & DW_EH_PE_indirect;
+  }
+
+  // The contents of the DWARF .debug_info section we're parsing.
+  const char* buffer_;
+  size_t buffer_length_;
+
+  // For reading multi-byte values with the appropriate endianness.
+  ByteReader* reader_;
+
+  // The handler to which we should report the data we find.
+  Handler* handler_;
+
+  // For reporting problems in the info we're parsing.
+  Reporter* reporter_;
+
+  // True if we are processing .eh_frame-format data.
+  bool eh_frame_;
+};
+
+// The handler class for CallFrameInfo.  The a CFI parser calls the
+// member functions of a handler object to report the data it finds.
+class CallFrameInfo::Handler {
+ public:
+  // The pseudo-register number for the canonical frame address.
+  enum { kCFARegister = DW_REG_CFA };
+
+  Handler() {}
+  virtual ~Handler() {}
+
+  // The parser has found CFI for the machine code at ADDRESS,
+  // extending for LENGTH bytes. OFFSET is the offset of the frame
+  // description entry in the section, for use in error messages.
+  // VERSION is the version number of the CFI format. AUGMENTATION is
+  // a string describing any producer-specific extensions present in
+  // the data. RETURN_ADDRESS is the number of the register that holds
+  // the address to which the function should return.
+  //
+  // Entry should return true to process this CFI, or false to skip to
+  // the next entry.
+  //
+  // The parser invokes Entry for each Frame Description Entry (FDE)
+  // it finds.  The parser doesn't report Common Information Entries
+  // to the handler explicitly; instead, if the handler elects to
+  // process a given FDE, the parser reiterates the appropriate CIE's
+  // contents at the beginning of the FDE's rules.
+  virtual bool Entry(size_t offset, uint64 address, uint64 length,
+                     uint8 version, const std::string& augmentation,
+                     unsigned return_address) = 0;
+
+  // When the Entry function returns true, the parser calls these
+  // handler functions repeatedly to describe the rules for recovering
+  // registers at each instruction in the given range of machine code.
+  // Immediately after a call to Entry, the handler should assume that
+  // the rule for each callee-saves register is "unchanged" --- that
+  // is, that the register still has the value it had in the caller.
+  //
+  // If a *Rule function returns true, we continue processing this entry's
+  // instructions. If a *Rule function returns false, we stop evaluating
+  // instructions, and skip to the next entry. Either way, we call End
+  // before going on to the next entry.
+  //
+  // In all of these functions, if the REG parameter is kCFARegister, then
+  // the rule describes how to find the canonical frame address.
+  // kCFARegister may be passed as a BASE_REGISTER argument, meaning that
+  // the canonical frame address should be used as the base address for the
+  // computation. All other REG values will be positive.
+
+  // At ADDRESS, register REG's value is not recoverable.
+  virtual bool UndefinedRule(uint64 address, int reg) = 0;
+
+  // At ADDRESS, register REG's value is the same as that it had in
+  // the caller.
+  virtual bool SameValueRule(uint64 address, int reg) = 0;
+
+  // At ADDRESS, register REG has been saved at offset OFFSET from
+  // BASE_REGISTER.
+  virtual bool OffsetRule(uint64 address, int reg, int base_register,
+                          long offset) = 0;
+
+  // At ADDRESS, the caller's value of register REG is the current
+  // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
+  // address at which the register's value is saved.)
+  virtual bool ValOffsetRule(uint64 address, int reg, int base_register,
+                             long offset) = 0;
+
+  // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
+  // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
+  // BASE_REGISTER is the "home" for REG's saved value: if you want to
+  // assign to a variable whose home is REG in the calling frame, you
+  // should put the value in BASE_REGISTER.
+  virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0;
+
+  // At ADDRESS, the DWARF expression EXPRESSION yields the address at
+  // which REG was saved.
+  virtual bool ExpressionRule(uint64 address, int reg,
+                              const std::string& expression) = 0;
+
+  // At ADDRESS, the DWARF expression EXPRESSION yields the caller's
+  // value for REG. (This rule doesn't provide an address at which the
+  // register's value is saved.)
+  virtual bool ValExpressionRule(uint64 address, int reg,
+                                 const std::string& expression) = 0;
+
+  // Indicate that the rules for the address range reported by the
+  // last call to Entry are complete.  End should return true if
+  // everything is okay, or false if an error has occurred and parsing
+  // should stop.
+  virtual bool End() = 0;
+
+  // Handler functions for Linux C++ exception handling data. These are
+  // only called if the data includes 'z' augmentation strings.
+
+  // The Linux C++ ABI uses an extension of the DWARF CFI format to
+  // walk the stack to propagate exceptions from the throw to the
+  // appropriate catch, and do the appropriate cleanups along the way.
+  // CFI entries used for exception handling have two additional data
+  // associated with them:
+  //
+  // - The "language-specific data area" describes which exception
+  //   types the function has 'catch' clauses for, and indicates how
+  //   to go about re-entering the function at the appropriate catch
+  //   clause. If the exception is not caught, it describes the
+  //   destructors that must run before the frame is popped.
+  //
+  // - The "personality routine" is responsible for interpreting the
+  //   language-specific data area's contents, and deciding whether
+  //   the exception should continue to propagate down the stack,
+  //   perhaps after doing some cleanup for this frame, or whether the
+  //   exception will be caught here.
+  //
+  // In principle, the language-specific data area is opaque to
+  // everybody but the personality routine. In practice, these values
+  // may be useful or interesting to readers with extra context, and
+  // we have to at least skip them anyway, so we might as well report
+  // them to the handler.
+
+  // This entry's exception handling personality routine's address is
+  // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+  // which the routine's address is stored. The default definition for
+  // this handler function simply returns true, allowing parsing of
+  // the entry to continue.
+  virtual bool PersonalityRoutine(uint64 address, bool indirect) {
+    return true;
+  }
+
+  // This entry's language-specific data area (LSDA) is located at
+  // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+  // which the area's address is stored. The default definition for
+  // this handler function simply returns true, allowing parsing of
+  // the entry to continue.
+  virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) {
+    return true;
+  }
+
+  // This entry describes a signal trampoline --- this frame is the
+  // caller of a signal handler. The default definition for this
+  // handler function simply returns true, allowing parsing of the
+  // entry to continue.
+  //
+  // The best description of the rationale for and meaning of signal
+  // trampoline CFI entries seems to be in the GCC bug database:
+  // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
+  virtual bool SignalHandler() { return true; }
+};
+
+// The CallFrameInfo class makes calls on an instance of this class to
+// report errors or warn about problems in the data it is parsing.
+// These messages are sent to the message sink |aLog| provided to the
+// constructor.
+class CallFrameInfo::Reporter {
+ public:
+  // Create an error reporter which attributes troubles to the section
+  // named SECTION in FILENAME.
+  //
+  // Normally SECTION would be .debug_frame, but the Mac puts CFI data
+  // in a Mach-O section named __debug_frame. If we support
+  // Linux-style exception handling data, we could be reading an
+  // .eh_frame section.
+  Reporter(void (*aLog)(const char*), const std::string& filename,
+           const std::string& section = ".debug_frame")
+      : log_(aLog), filename_(filename), section_(section) {}
+  virtual ~Reporter() {}
+
+  // The CFI entry at OFFSET ends too early to be well-formed. KIND
+  // indicates what kind of entry it is; KIND can be kUnknown if we
+  // haven't parsed enough of the entry to tell yet.
+  virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind);
+
+  // The .eh_frame data has a four-byte zero at OFFSET where the next
+  // entry's length would be; this is a terminator. However, the buffer
+  // length as given to the CallFrameInfo constructor says there should be
+  // more data.
+  virtual void EarlyEHTerminator(uint64 offset);
+
+  // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
+  // section is not that large.
+  virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset);
+
+  // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
+  // there is not a CIE.
+  virtual void BadCIEId(uint64 offset, uint64 cie_offset);
+
+  // The FDE at OFFSET refers to a CIE with version number VERSION,
+  // which we don't recognize. We cannot parse DWARF CFI if it uses
+  // a version number we don't recognize.
+  virtual void UnrecognizedVersion(uint64 offset, int version);
+
+  // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
+  // which we don't recognize. We cannot parse DWARF CFI if it uses
+  // augmentations we don't recognize.
+  virtual void UnrecognizedAugmentation(uint64 offset,
+                                        const std::string& augmentation);
+
+  // The FDE at OFFSET contains an invalid or otherwise unusable Dwarf4
+  // specific field (currently, only "address_size" or "segment_size").
+  // Parsing DWARF CFI with unexpected values here seems dubious at best,
+  // so we stop.  WHAT gives a little more information about what is wrong.
+  virtual void InvalidDwarf4Artefact(uint64 offset, const char* what);
+
+  // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
+  // a valid encoding.
+  virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding);
+
+  // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
+  // on a base address which has not been supplied.
+  virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding);
+
+  // The CIE at OFFSET contains a DW_CFA_restore instruction at
+  // INSN_OFFSET, which may not appear in a CIE.
+  virtual void RestoreInCIE(uint64 offset, uint64 insn_offset);
+
+  // The entry at OFFSET, of kind KIND, has an unrecognized
+  // instruction at INSN_OFFSET.
+  virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind,
+                              uint64 insn_offset);
+
+  // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+  // KIND, establishes a rule that cites the CFA, but we have not
+  // established a CFA rule yet.
+  virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
+                         uint64 insn_offset);
+
+  // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+  // KIND, is a DW_CFA_restore_state instruction, but the stack of
+  // saved states is empty.
+  virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind,
+                               uint64 insn_offset);
+
+  // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
+  // at OFFSET, of kind KIND, would restore a state that has no CFA
+  // rule, whereas the current state does have a CFA rule. This is
+  // bogus input, which the CallFrameInfo::Handler interface doesn't
+  // (and shouldn't) have any way to report.
+  virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
+                               uint64 insn_offset);
+
+ private:
+  // A logging sink function, as supplied by LUL's user.
+  void (*log_)(const char*);
+
+ protected:
+  // The name of the file whose CFI we're reading.
+  std::string filename_;
+
+  // The name of the CFI section in that file.
+  std::string section_;
+};
+
+using lul::CallFrameInfo;
+using lul::Summariser;
+
+// A class that accepts parsed call frame information from the DWARF
+// CFI parser and populates a google_breakpad::Module object with the
+// contents.
+class DwarfCFIToModule : public CallFrameInfo::Handler {
+ public:
+  // DwarfCFIToModule uses an instance of this class to report errors
+  // detected while converting DWARF CFI to Breakpad STACK CFI records.
+  class Reporter {
+   public:
+    // Create a reporter that writes messages to the message sink
+    // |aLog|. FILE is the name of the file we're processing, and
+    // SECTION is the name of the section within that file that we're
+    // looking at (.debug_frame, .eh_frame, etc.).
+    Reporter(void (*aLog)(const char*), const std::string& file,
+             const std::string& section)
+        : log_(aLog), file_(file), section_(section) {}
+    virtual ~Reporter() {}
+
+    // The DWARF CFI entry at OFFSET says that REG is undefined, but the
+    // Breakpad symbol file format cannot express this.
+    virtual void UndefinedNotSupported(size_t offset, const UniqueString* reg);
+
+    // The DWARF CFI entry at OFFSET says that REG uses a DWARF
+    // expression to find its value, but parseDwarfExpr could not
+    // convert it to a sequence of PfxInstrs.
+    virtual void ExpressionCouldNotBeSummarised(size_t offset,
+                                                const UniqueString* reg);
+
+   private:
+    // A logging sink function, as supplied by LUL's user.
+    void (*log_)(const char*);
+
+   protected:
+    std::string file_, section_;
+  };
+
+  // Register name tables. If TABLE is a vector returned by one of these
+  // functions, then TABLE[R] is the name of the register numbered R in
+  // DWARF call frame information.
+  class RegisterNames {
+   public:
+    // Intel's "x86" or IA-32.
+    static unsigned int I386();
+
+    // AMD x86_64, AMD64, Intel EM64T, or Intel 64
+    static unsigned int X86_64();
+
+    // ARM.
+    static unsigned int ARM();
+
+    // AARCH64.
+    static unsigned int ARM64();
+
+    // MIPS.
+    static unsigned int MIPS();
+  };
+
+  // Create a handler for the dwarf2reader::CallFrameInfo parser that
+  // records the stack unwinding information it receives in SUMM.
+  //
+  // Use REGISTER_NAMES[I] as the name of register number I; *this
+  // keeps a reference to the vector, so the vector should remain
+  // alive for as long as the DwarfCFIToModule does.
+  //
+  // Use REPORTER for reporting problems encountered in the conversion
+  // process.
+  DwarfCFIToModule(const unsigned int num_dw_regs, Reporter* reporter,
+                   ByteReader* reader,
+                   /*MOD*/ UniqueStringUniverse* usu,
+                   /*OUT*/ Summariser* summ)
+      : summ_(summ),
+        usu_(usu),
+        num_dw_regs_(num_dw_regs),
+        reporter_(reporter),
+        reader_(reader),
+        return_address_(-1) {}
+  virtual ~DwarfCFIToModule() {}
+
+  virtual bool Entry(size_t offset, uint64 address, uint64 length,
+                     uint8 version, const std::string& augmentation,
+                     unsigned return_address) override;
+  virtual bool UndefinedRule(uint64 address, int reg) override;
+  virtual bool SameValueRule(uint64 address, int reg) override;
+  virtual bool OffsetRule(uint64 address, int reg, int base_register,
+                          long offset) override;
+  virtual bool ValOffsetRule(uint64 address, int reg, int base_register,
+                             long offset) override;
+  virtual bool RegisterRule(uint64 address, int reg,
+                            int base_register) override;
+  virtual bool ExpressionRule(uint64 address, int reg,
+                              const std::string& expression) override;
+  virtual bool ValExpressionRule(uint64 address, int reg,
+                                 const std::string& expression) override;
+  virtual bool End() override;
+
+ private:
+  // Return the name to use for register I.
+  const UniqueString* RegisterName(int i);
+
+  // The Summariser to which we should give entries
+  Summariser* summ_;
+
+  // Universe for creating UniqueStrings in, should that be necessary.
+  UniqueStringUniverse* usu_;
+
+  // The number of Dwarf-defined register names for this architecture.
+  const unsigned int num_dw_regs_;
+
+  // The reporter to use to report problems.
+  Reporter* reporter_;
+
+  // The ByteReader to use for parsing Dwarf expressions.
+  ByteReader* reader_;
+
+  // The section offset of the current frame description entry, for
+  // use in error messages.
+  size_t entry_offset_;
+
+  // The return address column for that entry.
+  unsigned return_address_;
+};
+
+// Convert the Dwarf expression in |expr| into PfxInstrs stored in the
+// SecMap referred to by |summ|, and return the index of the starting
+// PfxInstr added, which must be >= 0.  In case of failure return -1.
+int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader,
+                       std::string expr, bool debug, bool pushCfaAtStart,
+                       bool derefAtEnd);
+
+}  // namespace lul
+
+#endif  // LulDwarfExt_h
diff --git a/mozglue/baseprofiler/lul/LulDwarfInt.h b/mozglue/baseprofiler/lul/LulDwarfInt.h
new file mode 100644
index 0000000000..b72c6e08e3
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarfInt.h
@@ -0,0 +1,193 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2008, 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following file in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/dwarf2enums.h
+
+#ifndef LulDwarfInt_h
+#define LulDwarfInt_h
+
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+
+namespace lul {
+
+// These enums do not follow the google3 style only because they are
+// known universally (specs, other implementations) by the names in
+// exactly this capitalization.
+// Tag names and codes.
+
+// Call Frame Info instructions.
+enum DwarfCFI {
+  DW_CFA_advance_loc = 0x40,
+  DW_CFA_offset = 0x80,
+  DW_CFA_restore = 0xc0,
+  DW_CFA_nop = 0x00,
+  DW_CFA_set_loc = 0x01,
+  DW_CFA_advance_loc1 = 0x02,
+  DW_CFA_advance_loc2 = 0x03,
+  DW_CFA_advance_loc4 = 0x04,
+  DW_CFA_offset_extended = 0x05,
+  DW_CFA_restore_extended = 0x06,
+  DW_CFA_undefined = 0x07,
+  DW_CFA_same_value = 0x08,
+  DW_CFA_register = 0x09,
+  DW_CFA_remember_state = 0x0a,
+  DW_CFA_restore_state = 0x0b,
+  DW_CFA_def_cfa = 0x0c,
+  DW_CFA_def_cfa_register = 0x0d,
+  DW_CFA_def_cfa_offset = 0x0e,
+  DW_CFA_def_cfa_expression = 0x0f,
+  DW_CFA_expression = 0x10,
+  DW_CFA_offset_extended_sf = 0x11,
+  DW_CFA_def_cfa_sf = 0x12,
+  DW_CFA_def_cfa_offset_sf = 0x13,
+  DW_CFA_val_offset = 0x14,
+  DW_CFA_val_offset_sf = 0x15,
+  DW_CFA_val_expression = 0x16,
+
+  // Opcodes in this range are reserved for user extensions.
+  DW_CFA_lo_user = 0x1c,
+  DW_CFA_hi_user = 0x3f,
+
+  // SGI/MIPS specific.
+  DW_CFA_MIPS_advance_loc8 = 0x1d,
+
+  // GNU extensions.
+  DW_CFA_GNU_window_save = 0x2d,
+  DW_CFA_GNU_args_size = 0x2e,
+  DW_CFA_GNU_negative_offset_extended = 0x2f
+};
+
+// Exception handling 'z' augmentation letters.
+enum DwarfZAugmentationCodes {
+  // If the CFI augmentation string begins with 'z', then the CIE and FDE
+  // have an augmentation data area just before the instructions, whose
+  // contents are determined by the subsequent augmentation letters.
+  DW_Z_augmentation_start = 'z',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding, and the FDE
+  // augmentation data includes a language-specific data area pointer,
+  // represented using that encoding.
+  DW_Z_has_LSDA = 'L',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding, followed by a pointer
+  // to a personality routine, represented using that encoding.
+  DW_Z_has_personality_routine = 'P',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding describing how the FDE's
+  // initial location, address range, and DW_CFA_set_loc operands are
+  // encoded.
+  DW_Z_has_FDE_address_encoding = 'R',
+
+  // If this letter is present in a 'z' augmentation string, then code
+  // addresses covered by FDEs that cite this CIE are signal delivery
+  // trampolines. Return addresses of frames in trampolines should not be
+  // adjusted as described in section 6.4.4 of the DWARF 3 spec.
+  DW_Z_is_signal_trampoline = 'S'
+};
+
+// Expression opcodes
+enum DwarfExpressionOpcodes {
+  DW_OP_addr = 0x03,
+  DW_OP_deref = 0x06,
+  DW_OP_const1s = 0x09,
+  DW_OP_const2u = 0x0a,
+  DW_OP_const2s = 0x0b,
+  DW_OP_const4u = 0x0c,
+  DW_OP_const4s = 0x0d,
+  DW_OP_const8u = 0x0e,
+  DW_OP_const8s = 0x0f,
+  DW_OP_constu = 0x10,
+  DW_OP_consts = 0x11,
+  DW_OP_dup = 0x12,
+  DW_OP_drop = 0x13,
+  DW_OP_over = 0x14,
+  DW_OP_pick = 0x15,
+  DW_OP_swap = 0x16,
+  DW_OP_rot = 0x17,
+  DW_OP_xderef = 0x18,
+  DW_OP_abs = 0x19,
+  DW_OP_and = 0x1a,
+  DW_OP_div = 0x1b,
+  DW_OP_minus = 0x1c,
+  DW_OP_mod = 0x1d,
+  DW_OP_mul = 0x1e,
+  DW_OP_neg = 0x1f,
+  DW_OP_not = 0x20,
+  DW_OP_or = 0x21,
+  DW_OP_plus = 0x22,
+  DW_OP_plus_uconst = 0x23,
+  DW_OP_shl = 0x24,
+  DW_OP_shr = 0x25,
+  DW_OP_shra = 0x26,
+  DW_OP_xor = 0x27,
+  DW_OP_skip = 0x2f,
+  DW_OP_bra = 0x28,
+  DW_OP_eq = 0x29,
+  DW_OP_ge = 0x2a,
+  DW_OP_gt = 0x2b,
+  DW_OP_le = 0x2c,
+  DW_OP_lt = 0x2d,
+  DW_OP_ne = 0x2e,
+  DW_OP_lit0 = 0x30,
+  DW_OP_lit31 = 0x4f,
+  DW_OP_reg0 = 0x50,
+  DW_OP_reg31 = 0x6f,
+  DW_OP_breg0 = 0x70,
+  DW_OP_breg31 = 0x8f,
+  DW_OP_regx = 0x90,
+  DW_OP_fbreg = 0x91,
+  DW_OP_bregx = 0x92,
+  DW_OP_piece = 0x93,
+  DW_OP_deref_size = 0x94,
+  DW_OP_xderef_size = 0x95,
+  DW_OP_nop = 0x96,
+  DW_OP_push_object_address = 0x97,
+  DW_OP_call2 = 0x98,
+  DW_OP_call4 = 0x99,
+  DW_OP_call_ref = 0x9a,
+  DW_OP_form_tls_address = 0x9b,
+  DW_OP_call_frame_cfa = 0x9c,
+  DW_OP_bit_piece = 0x9d,
+  DW_OP_lo_user = 0xe0,
+  DW_OP_hi_user = 0xff
+};
+
+}  // namespace lul
+
+#endif  // LulDwarfInt_h
diff --git a/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp b/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp
new file mode 100644
index 0000000000..ff0f212f6c
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarfSummariser.cpp
@@ -0,0 +1,553 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LulDwarfSummariser.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "LulDwarfExt.h"
+
+// Set this to 1 for verbose logging
+#define DEBUG_SUMMARISER 0
+
+namespace lul {
+
+// Do |s64|'s lowest 32 bits sign extend back to |s64| itself?
+static inline bool fitsIn32Bits(int64 s64) {
+  return s64 == ((s64 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+}
+
+// Check a LExpr prefix expression, starting at pfxInstrs[start] up to
+// the next PX_End instruction, to ensure that:
+// * It only mentions registers that are tracked on this target
+// * The start point is sane
+// If the expression is ok, return NULL.  Else return a pointer
+// a const char* holding a bit of text describing the problem.
+static const char* checkPfxExpr(const vector<PfxInstr>* pfxInstrs,
+                                int64_t start) {
+  size_t nInstrs = pfxInstrs->size();
+  if (start < 0 || start >= (ssize_t)nInstrs) {
+    return "bogus start point";
+  }
+  size_t i;
+  for (i = start; i < nInstrs; i++) {
+    PfxInstr pxi = (*pfxInstrs)[i];
+    if (pxi.mOpcode == PX_End) break;
+    if (pxi.mOpcode == PX_DwReg &&
+        !registerIsTracked((DW_REG_NUMBER)pxi.mOperand)) {
+      return "uses untracked reg";
+    }
+  }
+  return nullptr;  // success
+}
+
+Summariser::Summariser(SecMap* aSecMap, uintptr_t aTextBias,
+                       void (*aLog)(const char*))
+    : mSecMap(aSecMap), mTextBias(aTextBias), mLog(aLog) {
+  mCurrAddr = 0;
+  mMax1Addr = 0;  // Gives an empty range.
+
+  // Initialise the running RuleSet to "haven't got a clue" status.
+  new (&mCurrRules) RuleSet();
+}
+
+void Summariser::Entry(uintptr_t aAddress, uintptr_t aLength) {
+  aAddress += mTextBias;
+  if (DEBUG_SUMMARISER) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL Entry(%llx, %llu)\n",
+                   (unsigned long long int)aAddress,
+                   (unsigned long long int)aLength);
+    mLog(buf);
+  }
+  // This throws away any previous summary, that is, assumes
+  // that the previous summary, if any, has been properly finished
+  // by a call to End().
+  mCurrAddr = aAddress;
+  mMax1Addr = aAddress + aLength;
+  new (&mCurrRules) RuleSet();
+}
+
+void Summariser::Rule(uintptr_t aAddress, int aNewReg, LExprHow how,
+                      int16_t oldReg, int64_t offset) {
+  aAddress += mTextBias;
+  if (DEBUG_SUMMARISER) {
+    char buf[100];
+    if (how == NODEREF || how == DEREF) {
+      bool deref = how == DEREF;
+      SprintfLiteral(buf, "LUL  0x%llx  old-r%d = %sr%d + %lld%s\n",
+                     (unsigned long long int)aAddress, aNewReg,
+                     deref ? "*(" : "", (int)oldReg, (long long int)offset,
+                     deref ? ")" : "");
+    } else if (how == PFXEXPR) {
+      SprintfLiteral(buf, "LUL  0x%llx  old-r%d = pfx-expr-at %lld\n",
+                     (unsigned long long int)aAddress, aNewReg,
+                     (long long int)offset);
+    } else {
+      SprintfLiteral(buf, "LUL  0x%llx  old-r%d = (invalid LExpr!)\n",
+                     (unsigned long long int)aAddress, aNewReg);
+    }
+    mLog(buf);
+  }
+
+  if (mCurrAddr < aAddress) {
+    // Flush the existing summary first.
+    mCurrRules.mAddr = mCurrAddr;
+    mCurrRules.mLen = aAddress - mCurrAddr;
+    mSecMap->AddRuleSet(&mCurrRules);
+    if (DEBUG_SUMMARISER) {
+      mLog("LUL  ");
+      mCurrRules.Print(mLog);
+      mLog("\n");
+    }
+    mCurrAddr = aAddress;
+  }
+
+  // If for some reason summarisation fails, either or both of these
+  // become non-null and point at constant text describing the
+  // problem.  Using two rather than just one avoids complications of
+  // having to concatenate two strings to produce a complete error message.
+  const char* reason1 = nullptr;
+  const char* reason2 = nullptr;
+
+  // |offset| needs to be a 32 bit value that sign extends to 64 bits
+  // on a 64 bit target.  We will need to incorporate |offset| into
+  // any LExpr made here.  So we may as well check it right now.
+  if (!fitsIn32Bits(offset)) {
+    reason1 = "offset not in signed 32-bit range";
+    goto cant_summarise;
+  }
+
+  // FIXME: factor out common parts of the arch-dependent summarisers.
+
+#if defined(GP_ARCH_arm)
+
+  // ----------------- arm ----------------- //
+
+  // Now, can we add the rule to our summary?  This depends on whether
+  // the registers and the overall expression are representable.  This
+  // is the heart of the summarisation process.
+  switch (aNewReg) {
+    case DW_REG_CFA:
+      // This is a rule that defines the CFA.  The only forms we
+      // choose to represent are: r7/11/12/13 + offset.  The offset
+      // must fit into 32 bits since 'uintptr_t' is 32 bit on ARM,
+      // hence there is no need to check it for overflow.
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      switch (oldReg) {
+        case DW_REG_ARM_R7:
+        case DW_REG_ARM_R11:
+        case DW_REG_ARM_R12:
+        case DW_REG_ARM_R13:
+          break;
+        default:
+          reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_ARM_R7:
+    case DW_REG_ARM_R11:
+    case DW_REG_ARM_R12:
+    case DW_REG_ARM_R13:
+    case DW_REG_ARM_R14:
+    case DW_REG_ARM_R15: {
+      // This is a new rule for R7, R11, R12, R13 (SP), R14 (LR) or
+      // R15 (the return address).
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for R7/11/12/13/14/15: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for R7/11/12/13/14/15: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_ARM_R7:
+          mCurrRules.mR7expr = expr;
+          break;
+        case DW_REG_ARM_R11:
+          mCurrRules.mR11expr = expr;
+          break;
+        case DW_REG_ARM_R12:
+          mCurrRules.mR12expr = expr;
+          break;
+        case DW_REG_ARM_R13:
+          mCurrRules.mR13expr = expr;
+          break;
+        case DW_REG_ARM_R14:
+          mCurrRules.mR14expr = expr;
+          break;
+        case DW_REG_ARM_R15:
+          mCurrRules.mR15expr = expr;
+          break;
+        default:
+          MOZ_ASSERT(0);
+      }
+      break;
+    }
+
+    default:
+      // Leave |reason1| and |reason2| unset here.  This program point
+      // is reached so often that it causes a flood of "Can't
+      // summarise" messages.  In any case, we don't really care about
+      // the fact that this summary would produce a new value for a
+      // register that we're not tracking.  We do on the other hand
+      // care if the summary's expression *uses* a register that we're
+      // not tracking.  But in that case one of the above failures
+      // should tell us which.
+      goto cant_summarise;
+  }
+
+  // Mark callee-saved registers (r4 .. r11) as unchanged, if there is
+  // no other information about them.  FIXME: do this just once, at
+  // the point where the ruleset is committed.
+  if (mCurrRules.mR7expr.mHow == UNKNOWN) {
+    mCurrRules.mR7expr = LExpr(NODEREF, DW_REG_ARM_R7, 0);
+  }
+  if (mCurrRules.mR11expr.mHow == UNKNOWN) {
+    mCurrRules.mR11expr = LExpr(NODEREF, DW_REG_ARM_R11, 0);
+  }
+  if (mCurrRules.mR12expr.mHow == UNKNOWN) {
+    mCurrRules.mR12expr = LExpr(NODEREF, DW_REG_ARM_R12, 0);
+  }
+
+  // The old r13 (SP) value before the call is always the same as the
+  // CFA.
+  mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0);
+
+  // If there's no information about R15 (the return address), say
+  // it's a copy of R14 (the link register).
+  if (mCurrRules.mR15expr.mHow == UNKNOWN) {
+    mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0);
+  }
+
+#elif defined(GP_ARCH_arm64)
+
+  // ----------------- arm64 ----------------- //
+
+  switch (aNewReg) {
+    case DW_REG_CFA:
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      switch (oldReg) {
+        case DW_REG_AARCH64_X29:
+        case DW_REG_AARCH64_SP:
+          break;
+        default:
+          reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_AARCH64_X29:
+    case DW_REG_AARCH64_X30:
+    case DW_REG_AARCH64_SP: {
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for X29/X30/SP: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for X29/X30/SP: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_AARCH64_X29:
+          mCurrRules.mX29expr = expr;
+          break;
+        case DW_REG_AARCH64_X30:
+          mCurrRules.mX30expr = expr;
+          break;
+        case DW_REG_AARCH64_SP:
+          mCurrRules.mSPexpr = expr;
+          break;
+        default:
+          MOZ_ASSERT(0);
+      }
+      break;
+    }
+    default:
+      // Leave |reason1| and |reason2| unset here, for the reasons explained
+      // in the analogous point
+      goto cant_summarise;
+  }
+
+  if (mCurrRules.mX29expr.mHow == UNKNOWN) {
+    mCurrRules.mX29expr = LExpr(NODEREF, DW_REG_AARCH64_X29, 0);
+  }
+  if (mCurrRules.mX30expr.mHow == UNKNOWN) {
+    mCurrRules.mX30expr = LExpr(NODEREF, DW_REG_AARCH64_X30, 0);
+  }
+  // On aarch64, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mSPexpr.mHow == UNKNOWN) {
+    mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
+#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+
+  // ---------------- x64/x86 ---------------- //
+
+  // Now, can we add the rule to our summary?  This depends on whether
+  // the registers and the overall expression are representable.  This
+  // is the heart of the summarisation process.
+  switch (aNewReg) {
+    case DW_REG_CFA: {
+      // This is a rule that defines the CFA.  The only forms we choose to
+      // represent are: = SP+offset, = FP+offset, or =prefix-expr.
+      switch (how) {
+        case NODEREF:
+          if (oldReg != DW_REG_INTEL_XSP && oldReg != DW_REG_INTEL_XBP) {
+            reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+            goto cant_summarise;
+          }
+          break;
+        case DEREF:
+          reason1 = "rule for DW_REG_CFA: invalid |how|";
+          goto cant_summarise;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for CFA: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+    }
+
+    case DW_REG_INTEL_XSP:
+    case DW_REG_INTEL_XBP:
+    case DW_REG_INTEL_XIP: {
+      // This is a new rule for XSP, XBP or XIP (the return address).
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for XSP/XBP/XIP: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for XSP/XBP/XIP: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_INTEL_XBP:
+          mCurrRules.mXbpExpr = expr;
+          break;
+        case DW_REG_INTEL_XSP:
+          mCurrRules.mXspExpr = expr;
+          break;
+        case DW_REG_INTEL_XIP:
+          mCurrRules.mXipExpr = expr;
+          break;
+        default:
+          MOZ_CRASH("impossible value for aNewReg");
+      }
+      break;
+    }
+
+    default:
+      // Leave |reason1| and |reason2| unset here, for the reasons
+      // explained in the analogous point in the ARM case just above.
+      goto cant_summarise;
+  }
+
+  // On Intel, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mXspExpr.mHow == UNKNOWN) {
+    mCurrRules.mXspExpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
+
+  // Also, gcc says "Undef" for BP when it is unchanged.
+  if (mCurrRules.mXbpExpr.mHow == UNKNOWN) {
+    mCurrRules.mXbpExpr = LExpr(NODEREF, DW_REG_INTEL_XBP, 0);
+  }
+
+#elif defined(GP_ARCH_mips64)
+  // ---------------- mips ---------------- //
+  //
+  // Now, can we add the rule to our summary?  This depends on whether
+  // the registers and the overall expression are representable.  This
+  // is the heart of the summarisation process.
+  switch (aNewReg) {
+    case DW_REG_CFA:
+      // This is a rule that defines the CFA.  The only forms we can
+      // represent are: = SP+offset or = FP+offset.
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      if (oldReg != DW_REG_MIPS_SP && oldReg != DW_REG_MIPS_FP) {
+        reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+        goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_MIPS_SP:
+    case DW_REG_MIPS_FP:
+    case DW_REG_MIPS_PC: {
+      // This is a new rule for SP, FP or PC (the return address).
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for SP/FP/PC: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for SP/FP/PC: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_MIPS_FP:
+          mCurrRules.mFPexpr = expr;
+          break;
+        case DW_REG_MIPS_SP:
+          mCurrRules.mSPexpr = expr;
+          break;
+        case DW_REG_MIPS_PC:
+          mCurrRules.mPCexpr = expr;
+          break;
+        default:
+          MOZ_CRASH("impossible value for aNewReg");
+      }
+      break;
+    }
+    default:
+      // Leave |reason1| and |reason2| unset here, for the reasons
+      // explained in the analogous point in the ARM case just above.
+      goto cant_summarise;
+  }
+
+  // On MIPS, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mSPexpr.mHow == UNKNOWN) {
+    mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
+
+  // Also, gcc says "Undef" for FP when it is unchanged.
+  if (mCurrRules.mFPexpr.mHow == UNKNOWN) {
+    mCurrRules.mFPexpr = LExpr(NODEREF, DW_REG_MIPS_FP, 0);
+  }
+
+#else
+
+#  error "Unsupported arch"
+#endif
+
+  return;
+
+cant_summarise:
+  if (reason1 || reason2) {
+    char buf[200];
+    SprintfLiteral(buf,
+                   "LUL  can't summarise: "
+                   "SVMA=0x%llx: %s%s, expr=LExpr(%s,%u,%lld)\n",
+                   (unsigned long long int)(aAddress - mTextBias),
+                   reason1 ? reason1 : "", reason2 ? reason2 : "",
+                   NameOf_LExprHow(how), (unsigned int)oldReg,
+                   (long long int)offset);
+    mLog(buf);
+  }
+}
+
+uint32_t Summariser::AddPfxInstr(PfxInstr pfxi) {
+  return mSecMap->AddPfxInstr(pfxi);
+}
+
+void Summariser::End() {
+  if (DEBUG_SUMMARISER) {
+    mLog("LUL End\n");
+  }
+  if (mCurrAddr < mMax1Addr) {
+    mCurrRules.mAddr = mCurrAddr;
+    mCurrRules.mLen = mMax1Addr - mCurrAddr;
+    mSecMap->AddRuleSet(&mCurrRules);
+    if (DEBUG_SUMMARISER) {
+      mLog("LUL  ");
+      mCurrRules.Print(mLog);
+      mLog("\n");
+    }
+  }
+}
+
+}  // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulDwarfSummariser.h b/mozglue/baseprofiler/lul/LulDwarfSummariser.h
new file mode 100644
index 0000000000..30f1ba23c1
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulDwarfSummariser.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulDwarfSummariser_h
+#define LulDwarfSummariser_h
+
+#include "LulMainInt.h"
+
+namespace lul {
+
+class Summariser {
+ public:
+  Summariser(SecMap* aSecMap, uintptr_t aTextBias, void (*aLog)(const char*));
+
+  virtual void Entry(uintptr_t aAddress, uintptr_t aLength);
+  virtual void End();
+
+  // Tell the summariser that the value for |aNewReg| at |aAddress| is
+  // recovered using the LExpr that can be constructed using the
+  // components |how|, |oldReg| and |offset|.  The summariser will
+  // inspect the components and may reject them for various reasons,
+  // but the hope is that it will find them acceptable and record this
+  // rule permanently.
+  virtual void Rule(uintptr_t aAddress, int aNewReg, LExprHow how,
+                    int16_t oldReg, int64_t offset);
+
+  virtual uint32_t AddPfxInstr(PfxInstr pfxi);
+
+  // Send output to the logging sink, for debugging.
+  virtual void Log(const char* str) { mLog(str); }
+
+ private:
+  // The SecMap in which we park the finished summaries (RuleSets) and
+  // also any PfxInstrs derived from Dwarf expressions.
+  SecMap* mSecMap;
+
+  // Running state for the current summary (RuleSet) under construction.
+  RuleSet mCurrRules;
+
+  // The start of the address range to which the RuleSet under
+  // construction applies.
+  uintptr_t mCurrAddr;
+
+  // The highest address, plus one, for which the RuleSet under
+  // construction could possibly apply.  If there are no further
+  // incoming events then mCurrRules will eventually be emitted
+  // as-is, for the range mCurrAddr.. mMax1Addr - 1, if that is
+  // nonempty.
+  uintptr_t mMax1Addr;
+
+  // The bias value (to add to the SVMAs, to get AVMAs) to be used
+  // when adding entries into mSecMap.
+  uintptr_t mTextBias;
+
+  // A logging sink, for debugging.
+  void (*mLog)(const char* aFmt);
+};
+
+}  // namespace lul
+
+#endif  // LulDwarfSummariser_h
diff --git a/mozglue/baseprofiler/lul/LulElf.cpp b/mozglue/baseprofiler/lul/LulElf.cpp
new file mode 100644
index 0000000000..b5d5e772be
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulElf.cpp
@@ -0,0 +1,873 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2011, 2012 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// (derived from)
+// dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
+// Find all the debugging info in a file and dump it as a Breakpad symbol file.
+//
+// dump_symbols.h: Read debugging information from an ELF file, and write
+// it out as a Breakpad symbol file.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/linux/dump_symbols.cc
+//   src/common/linux/elfutils.cc
+//   src/common/linux/file_id.cc
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+#include <cstdlib>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "PlatformMacros.h"
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+#include "LulElfInt.h"
+#include "LulMainInt.h"
+
+#if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
+// bionic and older glibsc don't define it
+#  define SHT_ARM_EXIDX (SHT_LOPROC + 1)
+#endif
+
+// Old Linux header doesn't define EM_AARCH64
+#ifndef EM_AARCH64
+#  define EM_AARCH64 183
+#endif
+
+// This namespace contains helper functions.
+namespace {
+
+using lul::DwarfCFIToModule;
+using lul::FindElfSectionByName;
+using lul::GetOffset;
+using lul::IsValidElf;
+using lul::Module;
+using lul::scoped_ptr;
+using lul::Summariser;
+using lul::UniqueStringUniverse;
+using std::set;
+using std::string;
+using std::vector;
+
+//
+// FDWrapper
+//
+// Wrapper class to make sure opened file is closed.
+//
+class FDWrapper {
+ public:
+  explicit FDWrapper(int fd) : fd_(fd) {}
+  ~FDWrapper() {
+    if (fd_ != -1) close(fd_);
+  }
+  int get() { return fd_; }
+  int release() {
+    int fd = fd_;
+    fd_ = -1;
+    return fd;
+  }
+
+ private:
+  int fd_;
+};
+
+//
+// MmapWrapper
+//
+// Wrapper class to make sure mapped regions are unmapped.
+//
+class MmapWrapper {
+ public:
+  MmapWrapper() : is_set_(false), base_(NULL), size_(0) {}
+  ~MmapWrapper() {
+    if (is_set_ && base_ != NULL) {
+      MOZ_ASSERT(size_ > 0);
+      munmap(base_, size_);
+    }
+  }
+  void set(void* mapped_address, size_t mapped_size) {
+    is_set_ = true;
+    base_ = mapped_address;
+    size_ = mapped_size;
+  }
+  void release() {
+    MOZ_ASSERT(is_set_);
+    is_set_ = false;
+    base_ = NULL;
+    size_ = 0;
+  }
+
+ private:
+  bool is_set_;
+  void* base_;
+  size_t size_;
+};
+
+// Set NUM_DW_REGNAMES to be the number of Dwarf register names
+// appropriate to the machine architecture given in HEADER.  Return
+// true on success, or false if HEADER's machine architecture is not
+// supported.
+template <typename ElfClass>
+bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
+                           unsigned int* num_dw_regnames) {
+  switch (elf_header->e_machine) {
+    case EM_386:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
+      return true;
+    case EM_ARM:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
+      return true;
+    case EM_X86_64:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
+      return true;
+    case EM_MIPS:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS();
+      return true;
+    case EM_AARCH64:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64();
+      return true;
+    default:
+      MOZ_ASSERT(0);
+      return false;
+  }
+}
+
+template <typename ElfClass>
+bool LoadDwarfCFI(const string& dwarf_filename,
+                  const typename ElfClass::Ehdr* elf_header,
+                  const char* section_name,
+                  const typename ElfClass::Shdr* section, const bool eh_frame,
+                  const typename ElfClass::Shdr* got_section,
+                  const typename ElfClass::Shdr* text_section,
+                  const bool big_endian, SecMap* smap, uintptr_t text_bias,
+                  UniqueStringUniverse* usu, void (*log)(const char*)) {
+  // Find the appropriate set of register names for this file's
+  // architecture.
+  unsigned int num_dw_regs = 0;
+  if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
+    fprintf(stderr,
+            "%s: unrecognized ELF machine architecture '%d';"
+            " cannot convert DWARF call frame information\n",
+            dwarf_filename.c_str(), elf_header->e_machine);
+    return false;
+  }
+
+  const lul::Endianness endianness =
+      big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
+
+  // Find the call frame information and its size.
+  const char* cfi = GetOffset<ElfClass, char>(elf_header, section->sh_offset);
+  size_t cfi_size = section->sh_size;
+
+  // Plug together the parser, handler, and their entourages.
+
+  // Here's a summariser, which will receive the output of the
+  // parser, create summaries, and add them to |smap|.
+  Summariser summ(smap, text_bias, log);
+
+  lul::ByteReader reader(endianness);
+  reader.SetAddressSize(ElfClass::kAddrSize);
+
+  DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
+  DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ);
+
+  // Provide the base addresses for .eh_frame encoded pointers, if
+  // possible.
+  reader.SetCFIDataBase(section->sh_addr, cfi);
+  if (got_section) reader.SetDataBase(got_section->sh_addr);
+  if (text_section) reader.SetTextBase(text_section->sh_addr);
+
+  lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
+                                              section_name);
+  lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter,
+                            eh_frame);
+  parser.Start();
+
+  return true;
+}
+
+bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
+             void** elf_header) {
+  int obj_fd = open(obj_file.c_str(), O_RDONLY);
+  if (obj_fd < 0) {
+    fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(),
+            strerror(errno));
+    return false;
+  }
+  FDWrapper obj_fd_wrapper(obj_fd);
+  struct stat st;
+  if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
+    fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(),
+            strerror(errno));
+    return false;
+  }
+  // Mapping it read-only is good enough.  In any case, mapping it
+  // read-write confuses Valgrind's debuginfo acquire/discard
+  // heuristics, making it hard to profile the profiler.
+  void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0);
+  if (obj_base == MAP_FAILED) {
+    fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(),
+            strerror(errno));
+    return false;
+  }
+  map_wrapper->set(obj_base, st.st_size);
+  *elf_header = obj_base;
+  if (!IsValidElf(*elf_header)) {
+    fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
+    return false;
+  }
+  return true;
+}
+
+// Get the endianness of ELF_HEADER. If it's invalid, return false.
+template <typename ElfClass>
+bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
+                   bool* big_endian) {
+  if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
+    *big_endian = false;
+    return true;
+  }
+  if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
+    *big_endian = true;
+    return true;
+  }
+
+  fprintf(stderr, "bad data encoding in ELF header: %d\n",
+          elf_header->e_ident[EI_DATA]);
+  return false;
+}
+
+//
+// LoadSymbolsInfo
+//
+// Holds the state between the two calls to LoadSymbols() in case it's necessary
+// to follow the .gnu_debuglink section and load debug information from a
+// different file.
+//
+template <typename ElfClass>
+class LoadSymbolsInfo {
+ public:
+  typedef typename ElfClass::Addr Addr;
+
+  explicit LoadSymbolsInfo(const vector<string>& dbg_dirs)
+      : debug_dirs_(dbg_dirs), has_loading_addr_(false) {}
+
+  // Keeps track of which sections have been loaded so sections don't
+  // accidentally get loaded twice from two different files.
+  void LoadedSection(const string& section) {
+    if (loaded_sections_.count(section) == 0) {
+      loaded_sections_.insert(section);
+    } else {
+      fprintf(stderr, "Section %s has already been loaded.\n", section.c_str());
+    }
+  }
+
+  string debuglink_file() const { return debuglink_file_; }
+
+ private:
+  const vector<string>& debug_dirs_;  // Directories in which to
+                                      // search for the debug ELF file.
+
+  string debuglink_file_;  // Full path to the debug ELF file.
+
+  bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
+
+  set<string> loaded_sections_;  // Tracks the Loaded ELF sections
+                                 // between calls to LoadSymbols().
+};
+
+// Find the preferred loading address of the binary.
+template <typename ElfClass>
+typename ElfClass::Addr GetLoadingAddress(
+    const typename ElfClass::Phdr* program_headers, int nheader) {
+  typedef typename ElfClass::Phdr Phdr;
+
+  // For non-PIC executables (e_type == ET_EXEC), the load address is
+  // the start address of the first PT_LOAD segment.  (ELF requires
+  // the segments to be sorted by load address.)  For PIC executables
+  // and dynamic libraries (e_type == ET_DYN), this address will
+  // normally be zero.
+  for (int i = 0; i < nheader; ++i) {
+    const Phdr& header = program_headers[i];
+    if (header.p_type == PT_LOAD) return header.p_vaddr;
+  }
+  return 0;
+}
+
+template <typename ElfClass>
+bool LoadSymbols(const string& obj_file, const bool big_endian,
+                 const typename ElfClass::Ehdr* elf_header,
+                 const bool read_gnu_debug_link,
+                 LoadSymbolsInfo<ElfClass>* info, SecMap* smap, void* rx_avma,
+                 size_t rx_size, UniqueStringUniverse* usu,
+                 void (*log)(const char*)) {
+  typedef typename ElfClass::Phdr Phdr;
+  typedef typename ElfClass::Shdr Shdr;
+
+  char buf[500];
+  SprintfLiteral(buf, "LoadSymbols: BEGIN   %s\n", obj_file.c_str());
+  buf[sizeof(buf) - 1] = 0;
+  log(buf);
+
+  // This is how the text bias is calculated.
+  // BEGIN CALCULATE BIAS
+  uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
+      GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
+      elf_header->e_phnum);
+  uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
+  SprintfLiteral(buf, "LoadSymbols:   rx_avma=%llx, text_bias=%llx",
+                 (unsigned long long int)(uintptr_t)rx_avma,
+                 (unsigned long long int)text_bias);
+  buf[sizeof(buf) - 1] = 0;
+  log(buf);
+  // END CALCULATE BIAS
+
+  const Shdr* sections =
+      GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+  const Shdr* section_names = sections + elf_header->e_shstrndx;
+  const char* names =
+      GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+  const char* names_end = names + section_names->sh_size;
+  bool found_usable_info = false;
+
+  // Dwarf Call Frame Information (CFI) is actually independent from
+  // the other DWARF debugging information, and can be used alone.
+  const Shdr* dwarf_cfi_section =
+      FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, sections,
+                                     names, names_end, elf_header->e_shnum);
+  if (dwarf_cfi_section) {
+    // Ignore the return value of this function; even without call frame
+    // information, the other debugging information could be perfectly
+    // useful.
+    info->LoadedSection(".debug_frame");
+    bool result = LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
+                                         dwarf_cfi_section, false, 0, 0,
+                                         big_endian, smap, text_bias, usu, log);
+    found_usable_info = found_usable_info || result;
+    if (result) log("LoadSymbols:   read CFI from .debug_frame");
+  }
+
+  // Linux C++ exception handling information can also provide
+  // unwinding data.
+  const Shdr* eh_frame_section =
+      FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, sections, names,
+                                     names_end, elf_header->e_shnum);
+  if (eh_frame_section) {
+    // Pointers in .eh_frame data may be relative to the base addresses of
+    // certain sections. Provide those sections if present.
+    const Shdr* got_section = FindElfSectionByName<ElfClass>(
+        ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
+    const Shdr* text_section = FindElfSectionByName<ElfClass>(
+        ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum);
+    info->LoadedSection(".eh_frame");
+    // As above, ignore the return value of this function.
+    bool result = LoadDwarfCFI<ElfClass>(
+        obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section,
+        text_section, big_endian, smap, text_bias, usu, log);
+    found_usable_info = found_usable_info || result;
+    if (result) log("LoadSymbols:   read CFI from .eh_frame");
+  }
+
+  SprintfLiteral(buf, "LoadSymbols: END     %s\n", obj_file.c_str());
+  buf[sizeof(buf) - 1] = 0;
+  log(buf);
+
+  return found_usable_info;
+}
+
+// Return the breakpad symbol file identifier for the architecture of
+// ELF_HEADER.
+template <typename ElfClass>
+const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
+  typedef typename ElfClass::Half Half;
+  Half arch = elf_header->e_machine;
+  switch (arch) {
+    case EM_386:
+      return "x86";
+    case EM_ARM:
+      return "arm";
+    case EM_AARCH64:
+      return "arm64";
+    case EM_MIPS:
+      return "mips";
+    case EM_PPC64:
+      return "ppc64";
+    case EM_PPC:
+      return "ppc";
+    case EM_S390:
+      return "s390";
+    case EM_SPARC:
+      return "sparc";
+    case EM_SPARCV9:
+      return "sparcv9";
+    case EM_X86_64:
+      return "x86_64";
+    default:
+      return NULL;
+  }
+}
+
+// Format the Elf file identifier in IDENTIFIER as a UUID with the
+// dashes removed.
+string FormatIdentifier(unsigned char identifier[16]) {
+  char identifier_str[40];
+  lul::FileID::ConvertIdentifierToString(identifier, identifier_str,
+                                         sizeof(identifier_str));
+  string id_no_dash;
+  for (int i = 0; identifier_str[i] != '\0'; ++i)
+    if (identifier_str[i] != '-') id_no_dash += identifier_str[i];
+  // Add an extra "0" by the end.  PDB files on Windows have an 'age'
+  // number appended to the end of the file identifier; this isn't
+  // really used or necessary on other platforms, but be consistent.
+  id_no_dash += '0';
+  return id_no_dash;
+}
+
+// Return the non-directory portion of FILENAME: the portion after the
+// last slash, or the whole filename if there are no slashes.
+string BaseFileName(const string& filename) {
+  // Lots of copies!  basename's behavior is less than ideal.
+  char* c_filename = strdup(filename.c_str());
+  string base = basename(c_filename);
+  free(c_filename);
+  return base;
+}
+
+template <typename ElfClass>
+bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
+                            const string& obj_filename,
+                            const vector<string>& debug_dirs, SecMap* smap,
+                            void* rx_avma, size_t rx_size,
+                            UniqueStringUniverse* usu,
+                            void (*log)(const char*)) {
+  typedef typename ElfClass::Ehdr Ehdr;
+
+  unsigned char identifier[16];
+  if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
+    fprintf(stderr, "%s: unable to generate file identifier\n",
+            obj_filename.c_str());
+    return false;
+  }
+
+  const char* architecture = ElfArchitecture<ElfClass>(elf_header);
+  if (!architecture) {
+    fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
+            obj_filename.c_str(), elf_header->e_machine);
+    return false;
+  }
+
+  // Figure out what endianness this file is.
+  bool big_endian;
+  if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) return false;
+
+  string name = BaseFileName(obj_filename);
+  string os = "Linux";
+  string id = FormatIdentifier(identifier);
+
+  LoadSymbolsInfo<ElfClass> info(debug_dirs);
+  if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
+                             !debug_dirs.empty(), &info, smap, rx_avma, rx_size,
+                             usu, log)) {
+    const string debuglink_file = info.debuglink_file();
+    if (debuglink_file.empty()) return false;
+
+    // Load debuglink ELF file.
+    fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
+    MmapWrapper debug_map_wrapper;
+    Ehdr* debug_elf_header = NULL;
+    if (!LoadELF(debuglink_file, &debug_map_wrapper,
+                 reinterpret_cast<void**>(&debug_elf_header)))
+      return false;
+    // Sanity checks to make sure everything matches up.
+    const char* debug_architecture =
+        ElfArchitecture<ElfClass>(debug_elf_header);
+    if (!debug_architecture) {
+      fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
+              debuglink_file.c_str(), debug_elf_header->e_machine);
+      return false;
+    }
+    if (strcmp(architecture, debug_architecture)) {
+      fprintf(stderr,
+              "%s with ELF machine architecture %s does not match "
+              "%s with ELF architecture %s\n",
+              debuglink_file.c_str(), debug_architecture, obj_filename.c_str(),
+              architecture);
+      return false;
+    }
+
+    bool debug_big_endian;
+    if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
+      return false;
+    if (debug_big_endian != big_endian) {
+      fprintf(stderr, "%s and %s does not match in endianness\n",
+              obj_filename.c_str(), debuglink_file.c_str());
+      return false;
+    }
+
+    if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
+                               debug_elf_header, false, &info, smap, rx_avma,
+                               rx_size, usu, log)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace
+
+namespace lul {
+
+bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename,
+                            const vector<string>& debug_dirs, SecMap* smap,
+                            void* rx_avma, size_t rx_size,
+                            UniqueStringUniverse* usu,
+                            void (*log)(const char*)) {
+  if (!IsValidElf(obj_file)) {
+    fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
+    return false;
+  }
+
+  int elfclass = ElfClass(obj_file);
+  if (elfclass == ELFCLASS32) {
+    return ReadSymbolDataElfClass<ElfClass32>(
+        reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs,
+        smap, rx_avma, rx_size, usu, log);
+  }
+  if (elfclass == ELFCLASS64) {
+    return ReadSymbolDataElfClass<ElfClass64>(
+        reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs,
+        smap, rx_avma, rx_size, usu, log);
+  }
+
+  return false;
+}
+
+bool ReadSymbolData(const string& obj_file, const vector<string>& debug_dirs,
+                    SecMap* smap, void* rx_avma, size_t rx_size,
+                    UniqueStringUniverse* usu, void (*log)(const char*)) {
+  MmapWrapper map_wrapper;
+  void* elf_header = NULL;
+  if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false;
+
+  return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
+                                obj_file, debug_dirs, smap, rx_avma, rx_size,
+                                usu, log);
+}
+
+namespace {
+
+template <typename ElfClass>
+void FindElfClassSection(const char* elf_base, const char* section_name,
+                         typename ElfClass::Word section_type,
+                         const void** section_start, int* section_size) {
+  typedef typename ElfClass::Ehdr Ehdr;
+  typedef typename ElfClass::Shdr Shdr;
+
+  MOZ_ASSERT(elf_base);
+  MOZ_ASSERT(section_start);
+  MOZ_ASSERT(section_size);
+
+  MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
+
+  const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+  MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
+
+  const Shdr* sections =
+      GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+  const Shdr* section_names = sections + elf_header->e_shstrndx;
+  const char* names =
+      GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+  const char* names_end = names + section_names->sh_size;
+
+  const Shdr* section =
+      FindElfSectionByName<ElfClass>(section_name, section_type, sections,
+                                     names, names_end, elf_header->e_shnum);
+
+  if (section != NULL && section->sh_size > 0) {
+    *section_start = elf_base + section->sh_offset;
+    *section_size = section->sh_size;
+  }
+}
+
+template <typename ElfClass>
+void FindElfClassSegment(const char* elf_base,
+                         typename ElfClass::Word segment_type,
+                         const void** segment_start, int* segment_size) {
+  typedef typename ElfClass::Ehdr Ehdr;
+  typedef typename ElfClass::Phdr Phdr;
+
+  MOZ_ASSERT(elf_base);
+  MOZ_ASSERT(segment_start);
+  MOZ_ASSERT(segment_size);
+
+  MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
+
+  const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+  MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
+
+  const Phdr* phdrs =
+      GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff);
+
+  for (int i = 0; i < elf_header->e_phnum; ++i) {
+    if (phdrs[i].p_type == segment_type) {
+      *segment_start = elf_base + phdrs[i].p_offset;
+      *segment_size = phdrs[i].p_filesz;
+      return;
+    }
+  }
+}
+
+}  // namespace
+
+bool IsValidElf(const void* elf_base) {
+  return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == 0;
+}
+
+int ElfClass(const void* elf_base) {
+  const ElfW(Ehdr)* elf_header = reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
+
+  return elf_header->e_ident[EI_CLASS];
+}
+
+bool FindElfSection(const void* elf_mapped_base, const char* section_name,
+                    uint32_t section_type, const void** section_start,
+                    int* section_size, int* elfclass) {
+  MOZ_ASSERT(elf_mapped_base);
+  MOZ_ASSERT(section_start);
+  MOZ_ASSERT(section_size);
+
+  *section_start = NULL;
+  *section_size = 0;
+
+  if (!IsValidElf(elf_mapped_base)) return false;
+
+  int cls = ElfClass(elf_mapped_base);
+  if (elfclass) {
+    *elfclass = cls;
+  }
+
+  const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+  if (cls == ELFCLASS32) {
+    FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
+                                    section_start, section_size);
+    return *section_start != NULL;
+  } else if (cls == ELFCLASS64) {
+    FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
+                                    section_start, section_size);
+    return *section_start != NULL;
+  }
+
+  return false;
+}
+
+bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
+                    const void** segment_start, int* segment_size,
+                    int* elfclass) {
+  MOZ_ASSERT(elf_mapped_base);
+  MOZ_ASSERT(segment_start);
+  MOZ_ASSERT(segment_size);
+
+  *segment_start = NULL;
+  *segment_size = 0;
+
+  if (!IsValidElf(elf_mapped_base)) return false;
+
+  int cls = ElfClass(elf_mapped_base);
+  if (elfclass) {
+    *elfclass = cls;
+  }
+
+  const char* elf_base = static_cast<const char*>(elf_mapped_base);
+
+  if (cls == ELFCLASS32) {
+    FindElfClassSegment<ElfClass32>(elf_base, segment_type, segment_start,
+                                    segment_size);
+    return *segment_start != NULL;
+  } else if (cls == ELFCLASS64) {
+    FindElfClassSegment<ElfClass64>(elf_base, segment_type, segment_start,
+                                    segment_size);
+    return *segment_start != NULL;
+  }
+
+  return false;
+}
+
+// (derived from)
+// file_id.cc: Return a unique identifier for a file
+//
+// See file_id.h for documentation
+//
+
+// ELF note name and desc are 32-bits word padded.
+#define NOTE_PADDING(a) ((a + 3) & ~3)
+
+// These functions are also used inside the crashed process, so be safe
+// and use the syscall/libc wrappers instead of direct syscalls or libc.
+
+template <typename ElfClass>
+static bool ElfClassBuildIDNoteIdentifier(const void* section, int length,
+                                          uint8_t identifier[kMDGUIDSize]) {
+  typedef typename ElfClass::Nhdr Nhdr;
+
+  const void* section_end = reinterpret_cast<const char*>(section) + length;
+  const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
+  while (reinterpret_cast<const void*>(note_header) < section_end) {
+    if (note_header->n_type == NT_GNU_BUILD_ID) break;
+    note_header = reinterpret_cast<const Nhdr*>(
+        reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
+        NOTE_PADDING(note_header->n_namesz) +
+        NOTE_PADDING(note_header->n_descsz));
+  }
+  if (reinterpret_cast<const void*>(note_header) >= section_end ||
+      note_header->n_descsz == 0) {
+    return false;
+  }
+
+  const char* build_id = reinterpret_cast<const char*>(note_header) +
+                         sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
+  // Copy as many bits of the build ID as will fit
+  // into the GUID space.
+  memset(identifier, 0, kMDGUIDSize);
+  memcpy(identifier, build_id,
+         std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
+
+  return true;
+}
+
+// Attempt to locate a .note.gnu.build-id section in an ELF binary
+// and copy as many bytes of it as will fit into |identifier|.
+static bool FindElfBuildIDNote(const void* elf_mapped_base,
+                               uint8_t identifier[kMDGUIDSize]) {
+  void* note_section;
+  int note_size, elfclass;
+  if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)&note_section,
+                       &note_size, &elfclass) ||
+       note_size == 0) &&
+      (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
+                       (const void**)&note_section, &note_size, &elfclass) ||
+       note_size == 0)) {
+    return false;
+  }
+
+  if (elfclass == ELFCLASS32) {
+    return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
+                                                     identifier);
+  } else if (elfclass == ELFCLASS64) {
+    return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
+                                                     identifier);
+  }
+
+  return false;
+}
+
+// Attempt to locate the .text section of an ELF binary and generate
+// a simple hash by XORing the first page worth of bytes into |identifier|.
+static bool HashElfTextSection(const void* elf_mapped_base,
+                               uint8_t identifier[kMDGUIDSize]) {
+  void* text_section;
+  int text_size;
+  if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
+                      (const void**)&text_section, &text_size, NULL) ||
+      text_size == 0) {
+    return false;
+  }
+
+  memset(identifier, 0, kMDGUIDSize);
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
+  const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
+  while (ptr < ptr_end) {
+    for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i];
+    ptr += kMDGUIDSize;
+  }
+  return true;
+}
+
+// static
+bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
+                                             uint8_t identifier[kMDGUIDSize]) {
+  // Look for a build id note first.
+  if (FindElfBuildIDNote(base, identifier)) return true;
+
+  // Fall back on hashing the first page of the text section.
+  return HashElfTextSection(base, identifier);
+}
+
+// static
+void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
+                                       char* buffer, int buffer_length) {
+  uint8_t identifier_swapped[kMDGUIDSize];
+
+  // Endian-ness swap to match dump processor expectation.
+  memcpy(identifier_swapped, identifier, kMDGUIDSize);
+  uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
+  *data1 = htonl(*data1);
+  uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
+  *data2 = htons(*data2);
+  uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
+  *data3 = htons(*data3);
+
+  int buffer_idx = 0;
+  for (unsigned int idx = 0;
+       (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) {
+    int hi = (identifier_swapped[idx] >> 4) & 0x0F;
+    int lo = (identifier_swapped[idx]) & 0x0F;
+
+    if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
+      buffer[buffer_idx++] = '-';
+
+    buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
+    buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
+  }
+
+  // NULL terminate
+  buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
+}
+
+}  // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulElfExt.h b/mozglue/baseprofiler/lul/LulElfExt.h
new file mode 100644
index 0000000000..73d9ff7f15
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulElfExt.h
@@ -0,0 +1,69 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2011, 2012 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/linux/dump_symbols.h
+
+#ifndef LulElfExt_h
+#define LulElfExt_h
+
+// These two functions are the external interface to the
+// ELF/Dwarf/EXIDX reader.
+
+#include "LulMainInt.h"
+
+using lul::SecMap;
+
+namespace lul {
+
+class UniqueStringUniverse;
+
+// Find all the unwind information in OBJ_FILE, an ELF executable
+// or shared library, and add it to SMAP.
+bool ReadSymbolData(const std::string& obj_file,
+                    const std::vector<std::string>& debug_dirs, SecMap* smap,
+                    void* rx_avma, size_t rx_size, UniqueStringUniverse* usu,
+                    void (*log)(const char*));
+
+// The same as ReadSymbolData, except that OBJ_FILE is assumed to
+// point to a mapped-in image of OBJ_FILENAME.
+bool ReadSymbolDataInternal(const uint8_t* obj_file,
+                            const std::string& obj_filename,
+                            const std::vector<std::string>& debug_dirs,
+                            SecMap* smap, void* rx_avma, size_t rx_size,
+                            UniqueStringUniverse* usu,
+                            void (*log)(const char*));
+
+}  // namespace lul
+
+#endif  // LulElfExt_h
diff --git a/mozglue/baseprofiler/lul/LulElfInt.h b/mozglue/baseprofiler/lul/LulElfInt.h
new file mode 100644
index 0000000000..31ffba8ff0
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulElfInt.h
@@ -0,0 +1,218 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2012, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/android/include/elf.h
+//   src/common/linux/elfutils.h
+//   src/common/linux/file_id.h
+//   src/common/linux/elfutils-inl.h
+
+#ifndef LulElfInt_h
+#define LulElfInt_h
+
+// This header defines functions etc internal to the ELF reader.  It
+// should not be included outside of LulElf.cpp.
+
+#include <elf.h>
+#include <stdlib.h>
+
+#include "mozilla/Assertions.h"
+
+#include "PlatformMacros.h"
+
+// (derived from)
+// elfutils.h: Utilities for dealing with ELF files.
+//
+#include <link.h>
+
+#if defined(GP_OS_android)
+
+// From toolkit/crashreporter/google-breakpad/src/common/android/include/elf.h
+// The Android headers don't always define this constant.
+#  ifndef EM_X86_64
+#    define EM_X86_64 62
+#  endif
+
+#  ifndef EM_PPC64
+#    define EM_PPC64 21
+#  endif
+
+#  ifndef EM_S390
+#    define EM_S390 22
+#  endif
+
+#  ifndef NT_GNU_BUILD_ID
+#    define NT_GNU_BUILD_ID 3
+#  endif
+
+#  ifndef ElfW
+#    define ElfW(type) _ElfW(Elf, ELFSIZE, type)
+#    define _ElfW(e, w, t) _ElfW_1(e, w, _##t)
+#    define _ElfW_1(e, w, t) e##w##t
+#  endif
+
+#endif
+
+#if defined(GP_OS_freebsd)
+
+#  ifndef ElfW
+#    define ElfW(type) Elf_##type
+#  endif
+
+#endif
+
+namespace lul {
+
+// Traits classes so consumers can write templatized code to deal
+// with specific ELF bits.
+struct ElfClass32 {
+  typedef Elf32_Addr Addr;
+  typedef Elf32_Ehdr Ehdr;
+  typedef Elf32_Nhdr Nhdr;
+  typedef Elf32_Phdr Phdr;
+  typedef Elf32_Shdr Shdr;
+  typedef Elf32_Half Half;
+  typedef Elf32_Off Off;
+  typedef Elf32_Word Word;
+  static const int kClass = ELFCLASS32;
+  static const size_t kAddrSize = sizeof(Elf32_Addr);
+};
+
+struct ElfClass64 {
+  typedef Elf64_Addr Addr;
+  typedef Elf64_Ehdr Ehdr;
+  typedef Elf64_Nhdr Nhdr;
+  typedef Elf64_Phdr Phdr;
+  typedef Elf64_Shdr Shdr;
+  typedef Elf64_Half Half;
+  typedef Elf64_Off Off;
+  typedef Elf64_Word Word;
+  static const int kClass = ELFCLASS64;
+  static const size_t kAddrSize = sizeof(Elf64_Addr);
+};
+
+bool IsValidElf(const void* elf_header);
+int ElfClass(const void* elf_base);
+
+// Attempt to find a section named |section_name| of type |section_type|
+// in the ELF binary data at |elf_mapped_base|. On success, returns true
+// and sets |*section_start| to point to the start of the section data,
+// and |*section_size| to the size of the section's data. If |elfclass|
+// is not NULL, set |*elfclass| to the ELF file class.
+bool FindElfSection(const void* elf_mapped_base, const char* section_name,
+                    uint32_t section_type, const void** section_start,
+                    int* section_size, int* elfclass);
+
+// Internal helper method, exposed for convenience for callers
+// that already have more info.
+template <typename ElfClass>
+const typename ElfClass::Shdr* FindElfSectionByName(
+    const char* name, typename ElfClass::Word section_type,
+    const typename ElfClass::Shdr* sections, const char* section_names,
+    const char* names_end, int nsection);
+
+// Attempt to find the first segment of type |segment_type| in the ELF
+// binary data at |elf_mapped_base|. On success, returns true and sets
+// |*segment_start| to point to the start of the segment data, and
+// and |*segment_size| to the size of the segment's data. If |elfclass|
+// is not NULL, set |*elfclass| to the ELF file class.
+bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type,
+                    const void** segment_start, int* segment_size,
+                    int* elfclass);
+
+// Convert an offset from an Elf header into a pointer to the mapped
+// address in the current process. Takes an extra template parameter
+// to specify the return type to avoid having to dynamic_cast the
+// result.
+template <typename ElfClass, typename T>
+const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+                   typename ElfClass::Off offset);
+
+// (derived from)
+// file_id.h: Return a unique identifier for a file
+//
+
+static const size_t kMDGUIDSize = sizeof(MDGUID);
+
+class FileID {
+ public:
+  // Load the identifier for the elf file mapped into memory at |base| into
+  // |identifier|.  Return false if the identifier could not be created for the
+  // file.
+  static bool ElfFileIdentifierFromMappedFile(const void* base,
+                                              uint8_t identifier[kMDGUIDSize]);
+
+  // Convert the |identifier| data to a NULL terminated string.  The string will
+  // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE).
+  // The |buffer| should be at least 37 bytes long to receive all of the data
+  // and termination.  Shorter buffers will contain truncated data.
+  static void ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
+                                        char* buffer, int buffer_length);
+};
+
+template <typename ElfClass, typename T>
+const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+                   typename ElfClass::Off offset) {
+  return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) +
+                                    offset);
+}
+
+template <typename ElfClass>
+const typename ElfClass::Shdr* FindElfSectionByName(
+    const char* name, typename ElfClass::Word section_type,
+    const typename ElfClass::Shdr* sections, const char* section_names,
+    const char* names_end, int nsection) {
+  MOZ_ASSERT(name != NULL);
+  MOZ_ASSERT(sections != NULL);
+  MOZ_ASSERT(nsection > 0);
+
+  int name_len = strlen(name);
+  if (name_len == 0) return NULL;
+
+  for (int i = 0; i < nsection; ++i) {
+    const char* section_name = section_names + sections[i].sh_name;
+    if (sections[i].sh_type == section_type &&
+        names_end - section_name >= name_len + 1 &&
+        strcmp(name, section_name) == 0) {
+      return sections + i;
+    }
+  }
+  return NULL;
+}
+
+}  // namespace lul
+
+// And finally, the external interface, offered to LulMain.cpp
+#include "LulElfExt.h"
+
+#endif  // LulElfInt_h
diff --git a/mozglue/baseprofiler/lul/LulMain.cpp b/mozglue/baseprofiler/lul/LulMain.cpp
new file mode 100644
index 0000000000..f513920a0b
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulMain.cpp
@@ -0,0 +1,1956 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LulMain.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>  // write(), only for testing LUL
+
+#include <algorithm>  // std::sort
+#include <string>
+#include <utility>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryChecking.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+
+#include "BaseProfiler.h"
+#include "LulCommonExt.h"
+#include "LulElfExt.h"
+#include "LulMainInt.h"
+
+using mozilla::baseprofiler::profiler_current_process_id;
+using mozilla::baseprofiler::profiler_current_thread_id;
+
+// Set this to 1 for verbose logging
+#define DEBUG_MAIN 0
+
+namespace lul {
+
+using mozilla::CheckedInt;
+using mozilla::DebugOnly;
+using mozilla::MallocSizeOf;
+using mozilla::Unused;
+using std::pair;
+using std::string;
+using std::vector;
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+//
+// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
+// Any such function -- and, hence, the transitive closure of those
+// reachable from it -- must not do any dynamic memory allocation.
+// Doing so risks deadlock.  There is exactly one root function for
+// the transitive closure: Lul::Unwind.
+//
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+
+////////////////////////////////////////////////////////////////
+// RuleSet                                                    //
+////////////////////////////////////////////////////////////////
+
+static const char* NameOf_DW_REG(int16_t aReg) {
+  switch (aReg) {
+    case DW_REG_CFA:
+      return "cfa";
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    case DW_REG_INTEL_XBP:
+      return "xbp";
+    case DW_REG_INTEL_XSP:
+      return "xsp";
+    case DW_REG_INTEL_XIP:
+      return "xip";
+#elif defined(GP_ARCH_arm)
+    case DW_REG_ARM_R7:
+      return "r7";
+    case DW_REG_ARM_R11:
+      return "r11";
+    case DW_REG_ARM_R12:
+      return "r12";
+    case DW_REG_ARM_R13:
+      return "r13";
+    case DW_REG_ARM_R14:
+      return "r14";
+    case DW_REG_ARM_R15:
+      return "r15";
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:
+      return "x29";
+    case DW_REG_AARCH64_X30:
+      return "x30";
+    case DW_REG_AARCH64_SP:
+      return "sp";
+#elif defined(GP_ARCH_mips64)
+    case DW_REG_MIPS_SP:
+      return "sp";
+    case DW_REG_MIPS_FP:
+      return "fp";
+    case DW_REG_MIPS_PC:
+      return "pc";
+#else
+#  error "Unsupported arch"
+#endif
+    default:
+      return "???";
+  }
+}
+
+string LExpr::ShowRule(const char* aNewReg) const {
+  char buf[64];
+  string res = string(aNewReg) + "=";
+  switch (mHow) {
+    case UNKNOWN:
+      res += "Unknown";
+      break;
+    case NODEREF:
+      SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
+      res += buf;
+      break;
+    case DEREF:
+      SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
+      res += buf;
+      break;
+    case PFXEXPR:
+      SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
+      res += buf;
+      break;
+    default:
+      res += "???";
+      break;
+  }
+  return res;
+}
+
+void RuleSet::Print(void (*aLog)(const char*)) const {
+  char buf[96];
+  SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr,
+                 (unsigned long long int)(mAddr + mLen - 1));
+  string res = string(buf);
+  res += mCfaExpr.ShowRule("cfa");
+  res += " in";
+  // For each reg we care about, print the recovery expression.
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  res += mXipExpr.ShowRule(" RA");
+  res += mXspExpr.ShowRule(" SP");
+  res += mXbpExpr.ShowRule(" BP");
+#elif defined(GP_ARCH_arm)
+  res += mR15expr.ShowRule(" R15");
+  res += mR7expr.ShowRule(" R7");
+  res += mR11expr.ShowRule(" R11");
+  res += mR12expr.ShowRule(" R12");
+  res += mR13expr.ShowRule(" R13");
+  res += mR14expr.ShowRule(" R14");
+#elif defined(GP_ARCH_arm64)
+  res += mX29expr.ShowRule(" X29");
+  res += mX30expr.ShowRule(" X30");
+  res += mSPexpr.ShowRule(" SP");
+#elif defined(GP_ARCH_mips64)
+  res += mPCexpr.ShowRule(" PC");
+  res += mSPexpr.ShowRule(" SP");
+  res += mFPexpr.ShowRule(" FP");
+#else
+#  error "Unsupported arch"
+#endif
+  aLog(res.c_str());
+}
+
+LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
+  switch (aRegno) {
+    case DW_REG_CFA:
+      return &mCfaExpr;
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    case DW_REG_INTEL_XIP:
+      return &mXipExpr;
+    case DW_REG_INTEL_XSP:
+      return &mXspExpr;
+    case DW_REG_INTEL_XBP:
+      return &mXbpExpr;
+#elif defined(GP_ARCH_arm)
+    case DW_REG_ARM_R15:
+      return &mR15expr;
+    case DW_REG_ARM_R14:
+      return &mR14expr;
+    case DW_REG_ARM_R13:
+      return &mR13expr;
+    case DW_REG_ARM_R12:
+      return &mR12expr;
+    case DW_REG_ARM_R11:
+      return &mR11expr;
+    case DW_REG_ARM_R7:
+      return &mR7expr;
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:
+      return &mX29expr;
+    case DW_REG_AARCH64_X30:
+      return &mX30expr;
+    case DW_REG_AARCH64_SP:
+      return &mSPexpr;
+#elif defined(GP_ARCH_mips64)
+    case DW_REG_MIPS_SP:
+      return &mSPexpr;
+    case DW_REG_MIPS_FP:
+      return &mFPexpr;
+    case DW_REG_MIPS_PC:
+      return &mPCexpr;
+#else
+#  error "Unknown arch"
+#endif
+    default:
+      return nullptr;
+  }
+}
+
+RuleSet::RuleSet() {
+  mAddr = 0;
+  mLen = 0;
+  // The only other fields are of type LExpr and those are initialised
+  // by LExpr::LExpr().
+}
+
+////////////////////////////////////////////////////////////////
+// SecMap                                                     //
+////////////////////////////////////////////////////////////////
+
+// See header file LulMainInt.h for comments about invariants.
+
+SecMap::SecMap(void (*aLog)(const char*))
+    : mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {}
+
+SecMap::~SecMap() { mRuleSets.clear(); }
+
+// RUNS IN NO-MALLOC CONTEXT
+RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
+  // Binary search mRuleSets to find one that brackets |ia|.
+  // lo and hi need to be signed, else the loop termination tests
+  // don't work properly.  Note that this works correctly even when
+  // mRuleSets.size() == 0.
+
+  // Can't do this until the array has been sorted and preened.
+  MOZ_ASSERT(mUsable);
+
+  long int lo = 0;
+  long int hi = (long int)mRuleSets.size() - 1;
+  while (true) {
+    // current unsearched space is from lo to hi, inclusive.
+    if (lo > hi) {
+      // not found
+      return nullptr;
+    }
+    long int mid = lo + ((hi - lo) / 2);
+    RuleSet* mid_ruleSet = &mRuleSets[mid];
+    uintptr_t mid_minAddr = mid_ruleSet->mAddr;
+    uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
+    if (ia < mid_minAddr) {
+      hi = mid - 1;
+      continue;
+    }
+    if (ia > mid_maxAddr) {
+      lo = mid + 1;
+      continue;
+    }
+    MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
+    return mid_ruleSet;
+  }
+  // NOTREACHED
+}
+
+// Add a RuleSet to the collection.  The rule is copied in.  Calling
+// this makes the map non-searchable.
+void SecMap::AddRuleSet(const RuleSet* rs) {
+  mUsable = false;
+  mRuleSets.push_back(*rs);
+}
+
+// Add a PfxInstr to the vector of such instrs, and return the index
+// in the vector.  Calling this makes the map non-searchable.
+uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
+  mUsable = false;
+  mPfxInstrs.push_back(pfxi);
+  return mPfxInstrs.size() - 1;
+}
+
+static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
+  return rs1.mAddr < rs2.mAddr;
+}
+
+// Prepare the map for searching.  Completely remove any which don't
+// fall inside the specified range [start, +len).
+void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) {
+  if (mRuleSets.empty()) {
+    return;
+  }
+
+  MOZ_ASSERT(aLen > 0);
+  if (aLen == 0) {
+    // This should never happen.
+    mRuleSets.clear();
+    return;
+  }
+
+  // Sort by start addresses.
+  std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
+
+  // Detect any entry not completely contained within [start, +len).
+  // Set its length to zero, so that the next pass will remove it.
+  for (size_t i = 0; i < mRuleSets.size(); ++i) {
+    RuleSet* rs = &mRuleSets[i];
+    if (rs->mLen > 0 &&
+        (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
+      rs->mLen = 0;
+    }
+  }
+
+  // Iteratively truncate any overlaps and remove any zero length
+  // entries that might result, or that may have been present
+  // initially.  Unless the input is seriously screwy, this is
+  // expected to iterate only once.
+  while (true) {
+    size_t i;
+    size_t n = mRuleSets.size();
+    size_t nZeroLen = 0;
+
+    if (n == 0) {
+      break;
+    }
+
+    for (i = 1; i < n; ++i) {
+      RuleSet* prev = &mRuleSets[i - 1];
+      RuleSet* here = &mRuleSets[i];
+      MOZ_ASSERT(prev->mAddr <= here->mAddr);
+      if (prev->mAddr + prev->mLen > here->mAddr) {
+        prev->mLen = here->mAddr - prev->mAddr;
+      }
+      if (prev->mLen == 0) nZeroLen++;
+    }
+
+    if (mRuleSets[n - 1].mLen == 0) {
+      nZeroLen++;
+    }
+
+    // At this point, the entries are in-order and non-overlapping.
+    // If none of them are zero-length, we are done.
+    if (nZeroLen == 0) {
+      break;
+    }
+
+    // Slide back the entries to remove the zero length ones.
+    size_t j = 0;  // The write-point.
+    for (i = 0; i < n; ++i) {
+      if (mRuleSets[i].mLen == 0) {
+        continue;
+      }
+      if (j != i) mRuleSets[j] = mRuleSets[i];
+      ++j;
+    }
+    MOZ_ASSERT(i == n);
+    MOZ_ASSERT(nZeroLen <= n);
+    MOZ_ASSERT(j == n - nZeroLen);
+    while (nZeroLen > 0) {
+      mRuleSets.pop_back();
+      nZeroLen--;
+    }
+
+    MOZ_ASSERT(mRuleSets.size() == j);
+  }
+
+  size_t n = mRuleSets.size();
+
+#ifdef DEBUG
+  // Do a final check on the rules: their address ranges must be
+  // ascending, non overlapping, non zero sized.
+  if (n > 0) {
+    MOZ_ASSERT(mRuleSets[0].mLen > 0);
+    for (size_t i = 1; i < n; ++i) {
+      RuleSet* prev = &mRuleSets[i - 1];
+      RuleSet* here = &mRuleSets[i];
+      MOZ_ASSERT(prev->mAddr < here->mAddr);
+      MOZ_ASSERT(here->mLen > 0);
+      MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
+    }
+  }
+#endif
+
+  // Set the summary min and max address values.
+  if (n == 0) {
+    // Use the values defined in comments in the class declaration.
+    mSummaryMinAddr = 1;
+    mSummaryMaxAddr = 0;
+  } else {
+    mSummaryMinAddr = mRuleSets[0].mAddr;
+    mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1;
+  }
+  char buf[150];
+  SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
+                 (int)n, (unsigned long long int)mSummaryMinAddr,
+                 (unsigned long long int)mSummaryMaxAddr);
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+
+  // Is now usable for binary search.
+  mUsable = true;
+
+#if 0
+  mLog("\nRulesets after preening\n");
+  for (size_t i = 0; i < mRuleSets.size(); ++i) {
+    mRuleSets[i].Print(mLog);
+    mLog("\n");
+  }
+  mLog("\n");
+#endif
+}
+
+bool SecMap::IsEmpty() { return mRuleSets.empty(); }
+
+size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+  size_t n = aMallocSizeOf(this);
+
+  // It's conceivable that these calls would be unsafe with some
+  // implementations of std::vector, but it seems to be working for now...
+  n += aMallocSizeOf(mRuleSets.data());
+  n += aMallocSizeOf(mPfxInstrs.data());
+
+  return n;
+}
+
+////////////////////////////////////////////////////////////////
+// SegArray                                                   //
+////////////////////////////////////////////////////////////////
+
+// A SegArray holds a set of address ranges that together exactly
+// cover an address range, with no overlaps or holes.  Each range has
+// an associated value, which in this case has been specialised to be
+// a simple boolean.  The representation is kept to minimal canonical
+// form in which adjacent ranges with the same associated value are
+// merged together.  Each range is represented by a |struct Seg|.
+//
+// SegArrays are used to keep track of which parts of the address
+// space are known to contain instructions.
+class SegArray {
+ public:
+  void add(uintptr_t lo, uintptr_t hi, bool val) {
+    if (lo > hi) {
+      return;
+    }
+    split_at(lo);
+    if (hi < UINTPTR_MAX) {
+      split_at(hi + 1);
+    }
+    std::vector<Seg>::size_type iLo, iHi, i;
+    iLo = find(lo);
+    iHi = find(hi);
+    for (i = iLo; i <= iHi; ++i) {
+      mSegs[i].val = val;
+    }
+    preen();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
+                              /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
+    std::vector<Seg>::size_type i = find(addr);
+    if (!mSegs[i].val) {
+      return false;
+    }
+    *rx_min = mSegs[i].lo;
+    *rx_max = mSegs[i].hi;
+    return true;
+  }
+
+  SegArray() {
+    Seg s(0, UINTPTR_MAX, false);
+    mSegs.push_back(s);
+  }
+
+ private:
+  struct Seg {
+    Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
+    uintptr_t lo;
+    uintptr_t hi;
+    bool val;
+  };
+
+  void preen() {
+    for (std::vector<Seg>::iterator iter = mSegs.begin();
+         iter < mSegs.end() - 1; ++iter) {
+      if (iter[0].val != iter[1].val) {
+        continue;
+      }
+      iter[0].hi = iter[1].hi;
+      mSegs.erase(iter + 1);
+      // Back up one, so as not to miss an opportunity to merge
+      // with the entry after this one.
+      --iter;
+    }
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  std::vector<Seg>::size_type find(uintptr_t a) {
+    long int lo = 0;
+    long int hi = (long int)mSegs.size();
+    while (true) {
+      // The unsearched space is lo .. hi inclusive.
+      if (lo > hi) {
+        // Not found.  This can't happen.
+        return (std::vector<Seg>::size_type)(-1);
+      }
+      long int mid = lo + ((hi - lo) / 2);
+      uintptr_t mid_lo = mSegs[mid].lo;
+      uintptr_t mid_hi = mSegs[mid].hi;
+      if (a < mid_lo) {
+        hi = mid - 1;
+        continue;
+      }
+      if (a > mid_hi) {
+        lo = mid + 1;
+        continue;
+      }
+      return (std::vector<Seg>::size_type)mid;
+    }
+  }
+
+  void split_at(uintptr_t a) {
+    std::vector<Seg>::size_type i = find(a);
+    if (mSegs[i].lo == a) {
+      return;
+    }
+    mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
+    mSegs[i].hi = a - 1;
+    mSegs[i + 1].lo = a;
+  }
+
+  void show() {
+    printf("<< %d entries:\n", (int)mSegs.size());
+    for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
+         ++iter) {
+      printf("  %016llx  %016llx  %s\n", (unsigned long long int)(*iter).lo,
+             (unsigned long long int)(*iter).hi,
+             (*iter).val ? "true" : "false");
+    }
+    printf(">>\n");
+  }
+
+  std::vector<Seg> mSegs;
+};
+
+////////////////////////////////////////////////////////////////
+// PriMap                                                     //
+////////////////////////////////////////////////////////////////
+
+class PriMap {
+ public:
+  explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
+
+  // RUNS IN NO-MALLOC CONTEXT
+  pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
+    SecMap* sm = FindSecMap(ia);
+    return pair<const RuleSet*, const vector<PfxInstr>*>(
+        sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
+  }
+
+  // Add a secondary map.  No overlaps allowed w.r.t. existing
+  // secondary maps.
+  void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
+    // We can't add an empty SecMap to the PriMap.  But that's OK
+    // since we'd never be able to find anything in it anyway.
+    if (aSecMap->IsEmpty()) {
+      return;
+    }
+
+    // Iterate through the SecMaps and find the right place for this
+    // one.  At the same time, ensure that the in-order
+    // non-overlapping invariant is preserved (and, generally, holds).
+    // FIXME: this gives a cost that is O(N^2) in the total number of
+    // shared objects in the system.  ToDo: better.
+    MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
+
+    size_t num_secMaps = mSecMaps.size();
+    uintptr_t i;
+    for (i = 0; i < num_secMaps; ++i) {
+      mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
+      MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
+      if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
+        // |aSecMap| needs to be inserted immediately before mSecMaps[i].
+        break;
+      }
+    }
+    MOZ_ASSERT(i <= num_secMaps);
+    if (i == num_secMaps) {
+      // It goes at the end.
+      mSecMaps.push_back(std::move(aSecMap));
+    } else {
+      std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
+          mSecMaps.begin() + i;
+      mSecMaps.insert(iter, std::move(aSecMap));
+    }
+    char buf[100];
+    SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
+                   (int)mSecMaps.size());
+    buf[sizeof(buf) - 1] = 0;
+    mLog(buf);
+  }
+
+  // Remove and delete any SecMaps in the mapping, that intersect
+  // with the specified address range.
+  void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
+    MOZ_ASSERT(avma_min <= avma_max);
+    size_t num_secMaps = mSecMaps.size();
+    if (num_secMaps > 0) {
+      intptr_t i;
+      // Iterate from end to start over the vector, so as to ensure
+      // that the special case where |avma_min| and |avma_max| denote
+      // the entire address space, can be completed in time proportional
+      // to the number of elements in the map.
+      for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
+        mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
+        if (sm_i->mSummaryMaxAddr < avma_min ||
+            avma_max < sm_i->mSummaryMinAddr) {
+          // There's no overlap.  Move on.
+          continue;
+        }
+        // We need to remove mSecMaps[i] and slide all those above it
+        // downwards to cover the hole.
+        mSecMaps.erase(mSecMaps.begin() + i);
+      }
+    }
+  }
+
+  // Return the number of currently contained SecMaps.
+  size_t CountSecMaps() { return mSecMaps.size(); }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    size_t n = aMallocSizeOf(this);
+
+    // It's conceivable that this call would be unsafe with some
+    // implementations of std::vector, but it seems to be working for now...
+    n += aMallocSizeOf(mSecMaps.data());
+
+    for (size_t i = 0; i < mSecMaps.size(); i++) {
+      n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
+    }
+
+    return n;
+  }
+
+ private:
+  // RUNS IN NO-MALLOC CONTEXT
+  SecMap* FindSecMap(uintptr_t ia) {
+    // Binary search mSecMaps to find one that brackets |ia|.
+    // lo and hi need to be signed, else the loop termination tests
+    // don't work properly.
+    long int lo = 0;
+    long int hi = (long int)mSecMaps.size() - 1;
+    while (true) {
+      // current unsearched space is from lo to hi, inclusive.
+      if (lo > hi) {
+        // not found
+        return nullptr;
+      }
+      long int mid = lo + ((hi - lo) / 2);
+      mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
+      uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
+      uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
+      if (ia < mid_minAddr) {
+        hi = mid - 1;
+        continue;
+      }
+      if (ia > mid_maxAddr) {
+        lo = mid + 1;
+        continue;
+      }
+      MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
+      return mid_secMap.get();
+    }
+    // NOTREACHED
+  }
+
+ private:
+  // sorted array of per-object ranges, non overlapping, non empty
+  std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
+
+  // a logging sink, for debugging.
+  void (*mLog)(const char*);
+};
+
+////////////////////////////////////////////////////////////////
+// LUL                                                        //
+////////////////////////////////////////////////////////////////
+
+#define LUL_LOG(_str)                                           \
+  do {                                                          \
+    char buf[200];                                              \
+    SprintfLiteral(buf, "LUL: pid %d tid %d lul-obj %p: %s",    \
+                   profiler_current_process_id(),               \
+                   profiler_current_thread_id(), this, (_str)); \
+    buf[sizeof(buf) - 1] = 0;                                   \
+    mLog(buf);                                                  \
+  } while (0)
+
+LUL::LUL(void (*aLog)(const char*))
+    : mLog(aLog),
+      mAdminMode(true),
+      mAdminThreadId(profiler_current_thread_id()),
+      mPriMap(new PriMap(aLog)),
+      mSegArray(new SegArray()),
+      mUSU(new UniqueStringUniverse()) {
+  LUL_LOG("LUL::LUL: Created object");
+}
+
+LUL::~LUL() {
+  LUL_LOG("LUL::~LUL: Destroyed object");
+  delete mPriMap;
+  delete mSegArray;
+  mLog = nullptr;
+  delete mUSU;
+}
+
+void LUL::MaybeShowStats() {
+  // This is racey in the sense that it can't guarantee that
+  //   n_new == n_new_Context + n_new_CFI + n_new_Scanned
+  // if it should happen that mStats is updated by some other thread
+  // in between computation of n_new and n_new_{Context,CFI,FP}.
+  // But it's just stats printing, so we don't really care.
+  uint32_t n_new = mStats - mStatsPrevious;
+  if (n_new >= 5000) {
+    uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
+    uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
+    uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
+    mStatsPrevious = mStats;
+    char buf[200];
+    SprintfLiteral(buf,
+                   "LUL frame stats: TOTAL %5u"
+                   "    CTX %4u    CFI %4u    FP %4u",
+                   n_new, n_new_Context, n_new_CFI, n_new_FP);
+    buf[sizeof(buf) - 1] = 0;
+    mLog(buf);
+  }
+}
+
+size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+  size_t n = aMallocSizeOf(this);
+  n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
+
+  // Measurement of the following members may be added later if DMD finds it
+  // is worthwhile:
+  // - mSegArray
+  // - mUSU
+
+  return n;
+}
+
+void LUL::EnableUnwinding() {
+  LUL_LOG("LUL::EnableUnwinding");
+  // Don't assert for Admin mode here.  That is, tolerate a call here
+  // if we are already in Unwinding mode.
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  mAdminMode = false;
+}
+
+void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
+                         const void* aMappedImage) {
+  MOZ_RELEASE_ASSERT(mAdminMode);
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[200];
+  SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
+                 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
+                 aFileName);
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+
+  // Ignore obviously-stupid notifications.
+  if (aSize > 0) {
+    // Here's a new mapping, for this object.
+    mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog);
+
+    // Read CFI or EXIDX unwind data into |smap|.
+    if (!aMappedImage) {
+      (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
+                                smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
+    } else {
+      (void)lul::ReadSymbolDataInternal(
+          (const uint8_t*)aMappedImage, string(aFileName),
+          std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
+    }
+
+    mLog("NotifyMap .. preparing entries\n");
+
+    smap->PrepareRuleSets(aRXavma, aSize);
+
+    SprintfLiteral(buf, "NotifyMap got %lld entries\n",
+                   (long long int)smap->Size());
+    buf[sizeof(buf) - 1] = 0;
+    mLog(buf);
+
+    // Add it to the primary map (the top level set of mapped objects).
+    mPriMap->AddSecMap(std::move(smap));
+
+    // Tell the segment array about the mapping, so that the stack
+    // scan and __kernel_syscall mechanisms know where valid code is.
+    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
+  }
+}
+
+void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
+  MOZ_RELEASE_ASSERT(mAdminMode);
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[200];
+  SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
+                 (unsigned long long int)aRXavma,
+                 (unsigned long long int)aSize);
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+
+  // Ignore obviously-stupid notifications.
+  if (aSize > 0) {
+    // Tell the segment array about the mapping, so that the stack
+    // scan and __kernel_syscall mechanisms know where valid code is.
+    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
+  }
+}
+
+void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
+  MOZ_RELEASE_ASSERT(mAdminMode);
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[100];
+  SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
+                 (unsigned long long int)aRXavmaMin,
+                 (unsigned long long int)aRXavmaMax);
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+
+  MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
+
+  // Remove from the primary map, any secondary maps that intersect
+  // with the address range.  Also delete the secondary maps.
+  mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
+
+  // Tell the segment array that the address range no longer
+  // contains valid code.
+  mSegArray->add(aRXavmaMin, aRXavmaMax, false);
+
+  SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
+                 (int)mPriMap->CountSecMaps());
+  buf[sizeof(buf) - 1] = 0;
+  mLog(buf);
+}
+
+size_t LUL::CountMappings() {
+  MOZ_RELEASE_ASSERT(mAdminMode);
+  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
+
+  return mPriMap->CountSecMaps();
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
+  if (!aAddr.Valid()) {
+    return TaggedUWord();
+  }
+
+  // Lower limit check.  |aAddr.Value()| is the lowest requested address
+  // and |aStackImg->mStartAvma| is the lowest address we actually have,
+  // so the comparison is straightforward.
+  if (aAddr.Value() < aStackImg->mStartAvma) {
+    return TaggedUWord();
+  }
+
+  // Upper limit check.  We must compute the highest requested address
+  // and the highest address we actually have, but being careful to
+  // avoid overflow.  In particular if |aAddr| is 0xFFF...FFF or the
+  // 3/7 values below that, then we will get overflow.  See bug #1245477.
+  typedef CheckedInt<uintptr_t> CheckedUWord;
+  CheckedUWord highest_requested_plus_one =
+      CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
+  CheckedUWord highest_available_plus_one =
+      CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
+  if (!highest_requested_plus_one.isValid()     // overflow?
+      || !highest_available_plus_one.isValid()  // overflow?
+      || (highest_requested_plus_one.value() >
+          highest_available_plus_one.value())) {  // in range?
+    return TaggedUWord();
+  }
+
+  return TaggedUWord(
+      *(uintptr_t*)(&aStackImg
+                         ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
+                               TaggedUWord aCFA) {
+  switch (aReg) {
+    case DW_REG_CFA:
+      return aCFA;
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    case DW_REG_INTEL_XBP:
+      return aOldRegs->xbp;
+    case DW_REG_INTEL_XSP:
+      return aOldRegs->xsp;
+    case DW_REG_INTEL_XIP:
+      return aOldRegs->xip;
+#elif defined(GP_ARCH_arm)
+    case DW_REG_ARM_R7:
+      return aOldRegs->r7;
+    case DW_REG_ARM_R11:
+      return aOldRegs->r11;
+    case DW_REG_ARM_R12:
+      return aOldRegs->r12;
+    case DW_REG_ARM_R13:
+      return aOldRegs->r13;
+    case DW_REG_ARM_R14:
+      return aOldRegs->r14;
+    case DW_REG_ARM_R15:
+      return aOldRegs->r15;
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:
+      return aOldRegs->x29;
+    case DW_REG_AARCH64_X30:
+      return aOldRegs->x30;
+    case DW_REG_AARCH64_SP:
+      return aOldRegs->sp;
+#elif defined(GP_ARCH_mips64)
+    case DW_REG_MIPS_SP:
+      return aOldRegs->sp;
+    case DW_REG_MIPS_FP:
+      return aOldRegs->fp;
+    case DW_REG_MIPS_PC:
+      return aOldRegs->pc;
+#else
+#  error "Unsupported arch"
+#endif
+    default:
+      MOZ_ASSERT(0);
+      return TaggedUWord();
+  }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+// See prototype for comment.
+TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
+                            TaggedUWord aCFA, const StackImage* aStackImg,
+                            const vector<PfxInstr>& aPfxInstrs) {
+  // A small evaluation stack, and a stack pointer, which points to
+  // the highest numbered in-use element.
+  const int N_STACK = 10;
+  TaggedUWord stack[N_STACK];
+  int stackPointer = -1;
+  for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
+
+#define PUSH(_tuw)                                             \
+  do {                                                         \
+    if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
+    stack[++stackPointer] = (_tuw);                            \
+  } while (0)
+
+#define POP(_lval)                                   \
+  do {                                               \
+    if (stackPointer < 0) goto fail; /* underflow */ \
+    _lval = stack[stackPointer--];                   \
+  } while (0)
+
+  // Cursor in the instruction sequence.
+  size_t curr = start + 1;
+
+  // Check the start point is sane.
+  size_t nInstrs = aPfxInstrs.size();
+  if (start < 0 || (size_t)start >= nInstrs) goto fail;
+
+  {
+    // The instruction sequence must start with PX_Start.  If not,
+    // something is seriously wrong.
+    PfxInstr first = aPfxInstrs[start];
+    if (first.mOpcode != PX_Start) goto fail;
+
+    // Push the CFA on the stack to start with (or not), as required by
+    // the original DW_OP_*expression* CFI.
+    if (first.mOperand != 0) PUSH(aCFA);
+  }
+
+  while (true) {
+    if (curr >= nInstrs) goto fail;  // ran off the end of the sequence
+
+    PfxInstr pfxi = aPfxInstrs[curr++];
+    if (pfxi.mOpcode == PX_End) break;  // we're done
+
+    switch (pfxi.mOpcode) {
+      case PX_Start:
+        // This should appear only at the start of the sequence.
+        goto fail;
+      case PX_End:
+        // We just took care of that, so we shouldn't see it again.
+        MOZ_ASSERT(0);
+        goto fail;
+      case PX_SImm32:
+        PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
+        break;
+      case PX_DwReg: {
+        DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
+        MOZ_ASSERT(reg != DW_REG_CFA);
+        PUSH(EvaluateReg(reg, aOldRegs, aCFA));
+        break;
+      }
+      case PX_Deref: {
+        TaggedUWord addr;
+        POP(addr);
+        PUSH(DerefTUW(addr, aStackImg));
+        break;
+      }
+      case PX_Add: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y + x);
+        break;
+      }
+      case PX_Sub: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y - x);
+        break;
+      }
+      case PX_And: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y & x);
+        break;
+      }
+      case PX_Or: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y | x);
+        break;
+      }
+      case PX_CmpGES: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y.CmpGEs(x));
+        break;
+      }
+      case PX_Shl: {
+        TaggedUWord x, y;
+        POP(x);
+        POP(y);
+        PUSH(y << x);
+        break;
+      }
+      default:
+        MOZ_ASSERT(0);
+        goto fail;
+    }
+  }  // while (true)
+
+  // Evaluation finished.  The top value on the stack is the result.
+  if (stackPointer >= 0) {
+    return stack[stackPointer];
+  }
+  // Else fall through
+
+fail:
+  return TaggedUWord();
+
+#undef PUSH
+#undef POP
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
+                                const StackImage* aStackImg,
+                                const vector<PfxInstr>* aPfxInstrs) const {
+  switch (mHow) {
+    case UNKNOWN:
+      return TaggedUWord();
+    case NODEREF: {
+      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
+      tuw = tuw + TaggedUWord((intptr_t)mOffset);
+      return tuw;
+    }
+    case DEREF: {
+      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
+      tuw = tuw + TaggedUWord((intptr_t)mOffset);
+      return DerefTUW(tuw, aStackImg);
+    }
+    case PFXEXPR: {
+      MOZ_ASSERT(aPfxInstrs);
+      if (!aPfxInstrs) {
+        return TaggedUWord();
+      }
+      return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
+    }
+    default:
+      MOZ_ASSERT(0);
+      return TaggedUWord();
+  }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
+                       const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
+  // Take a copy of regs, since we'll need to refer to the old values
+  // whilst computing the new ones.
+  UnwindRegs old_regs = *aRegs;
+
+  // Mark all the current register values as invalid, so that the
+  // caller can see, on our return, which ones have been computed
+  // anew.  If we don't even manage to compute a new PC value, then
+  // the caller will have to abandon the unwind.
+  // FIXME: Create and use instead: aRegs->SetAllInvalid();
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  aRegs->xbp = TaggedUWord();
+  aRegs->xsp = TaggedUWord();
+  aRegs->xip = TaggedUWord();
+#elif defined(GP_ARCH_arm)
+  aRegs->r7 = TaggedUWord();
+  aRegs->r11 = TaggedUWord();
+  aRegs->r12 = TaggedUWord();
+  aRegs->r13 = TaggedUWord();
+  aRegs->r14 = TaggedUWord();
+  aRegs->r15 = TaggedUWord();
+#elif defined(GP_ARCH_arm64)
+  aRegs->x29 = TaggedUWord();
+  aRegs->x30 = TaggedUWord();
+  aRegs->sp = TaggedUWord();
+  aRegs->pc = TaggedUWord();
+#elif defined(GP_ARCH_mips64)
+  aRegs->sp = TaggedUWord();
+  aRegs->fp = TaggedUWord();
+  aRegs->pc = TaggedUWord();
+#else
+#  error "Unsupported arch"
+#endif
+
+  // This is generally useful.
+  const TaggedUWord inval = TaggedUWord();
+
+  // First, compute the CFA.
+  TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
+                                               aStackImg, aPfxInstrs);
+
+  // If we didn't manage to compute the CFA, well .. that's ungood,
+  // but keep going anyway.  It'll be OK provided none of the register
+  // value rules mention the CFA.  In any case, compute the new values
+  // for each register that we're tracking.
+
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  aRegs->xbp =
+      aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->xsp =
+      aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->xip =
+      aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_arm)
+  aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r11 =
+      aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r12 =
+      aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r13 =
+      aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r14 =
+      aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r15 =
+      aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_arm64)
+  aRegs->x29 =
+      aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->x30 =
+      aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_mips64)
+  aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#else
+#  error "Unsupported arch"
+#endif
+
+  // We're done.  Any regs for which we didn't manage to compute a
+  // new value will now be marked as invalid.
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
+                 /*OUT*/ uintptr_t* aFrameSPs,
+                 /*OUT*/ size_t* aFramesUsed,
+                 /*OUT*/ size_t* aFramePointerFramesAcquired,
+                 size_t aFramesAvail, UnwindRegs* aStartRegs,
+                 StackImage* aStackImg) {
+  MOZ_RELEASE_ASSERT(!mAdminMode);
+
+  /////////////////////////////////////////////////////////
+  // BEGIN UNWIND
+
+  *aFramesUsed = 0;
+
+  UnwindRegs regs = *aStartRegs;
+  TaggedUWord last_valid_sp = TaggedUWord();
+
+  while (true) {
+    if (DEBUG_MAIN) {
+      char buf[300];
+      mLog("\n");
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+      SprintfLiteral(
+          buf, "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
+          (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
+          (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
+          (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+#elif defined(GP_ARCH_arm)
+      SprintfLiteral(
+          buf,
+          "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
+          "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
+          (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
+          (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
+          (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
+          (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
+          (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
+          (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+#elif defined(GP_ARCH_arm64)
+      SprintfLiteral(
+          buf,
+          "LoopTop: pc %d/%llx  x29 %d/%llx  x30 %d/%llx"
+          "  sp %d/%llx\n",
+          (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
+          (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
+          (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
+          (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+#elif defined(GP_ARCH_mips64)
+      SprintfLiteral(
+          buf, "LoopTop: pc %d/%llx  sp %d/%llx  fp %d/%llx\n",
+          (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
+          (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
+          (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+#else
+#  error "Unsupported arch"
+#endif
+    }
+
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    TaggedUWord ia = regs.xip;
+    TaggedUWord sp = regs.xsp;
+#elif defined(GP_ARCH_arm)
+    TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
+    TaggedUWord sp = regs.r13;
+#elif defined(GP_ARCH_arm64)
+    TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
+    TaggedUWord sp = regs.sp;
+#elif defined(GP_ARCH_mips64)
+    TaggedUWord ia = regs.pc;
+    TaggedUWord sp = regs.sp;
+#else
+#  error "Unsupported arch"
+#endif
+
+    if (*aFramesUsed >= aFramesAvail) {
+      break;
+    }
+
+    // If we don't have a valid value for the PC, give up.
+    if (!ia.Valid()) {
+      break;
+    }
+
+    // If this is the innermost frame, record the SP value, which
+    // presumably is valid.  If this isn't the innermost frame, and we
+    // have a valid SP value, check that its SP value isn't less that
+    // the one we've seen so far, so as to catch potential SP value
+    // cycles.
+    if (*aFramesUsed == 0) {
+      last_valid_sp = sp;
+    } else {
+      MOZ_ASSERT(last_valid_sp.Valid());
+      if (sp.Valid()) {
+        if (sp.Value() < last_valid_sp.Value()) {
+          // Hmm, SP going in the wrong direction.  Let's stop.
+          break;
+        }
+        // Remember where we got to.
+        last_valid_sp = sp;
+      }
+    }
+
+    // For the innermost frame, the IA value is what we need.  For all
+    // other frames, it's actually the return address, so back up one
+    // byte so as to get it into the calling instruction.
+    aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
+    aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
+    (*aFramesUsed)++;
+
+    // Find the RuleSet for the current IA, if any.  This will also
+    // query the backing (secondary) maps if it isn't found in the
+    // thread-local cache.
+
+    // If this isn't the innermost frame, back up into the calling insn.
+    if (*aFramesUsed > 1) {
+      ia = ia + TaggedUWord((uintptr_t)(-1));
+    }
+
+    pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
+        mPriMap->Lookup(ia.Value());
+    const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
+    const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
+
+    if (DEBUG_MAIN) {
+      char buf[100];
+      SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
+                     (unsigned long long int)ia.Value(), ruleset);
+      buf[sizeof(buf) - 1] = 0;
+      mLog(buf);
+    }
+
+#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
+    /////////////////////////////////////////////
+    ////
+    // On 32 bit x86-linux, syscalls are often done via the VDSO
+    // function __kernel_vsyscall, which doesn't have a corresponding
+    // object that we can read debuginfo from.  That effectively kills
+    // off all stack traces for threads blocked in syscalls.  Hence
+    // special-case by looking at the code surrounding the program
+    // counter.
+    //
+    // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx
+    // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx
+    // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp
+    // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp
+    // 0xf7757425 <__kernel_vsyscall+5>:	sysenter
+    // 0xf7757427 <__kernel_vsyscall+7>:	nop
+    // 0xf7757428 <__kernel_vsyscall+8>:	nop
+    // 0xf7757429 <__kernel_vsyscall+9>:	nop
+    // 0xf775742a <__kernel_vsyscall+10>:	nop
+    // 0xf775742b <__kernel_vsyscall+11>:	nop
+    // 0xf775742c <__kernel_vsyscall+12>:	nop
+    // 0xf775742d <__kernel_vsyscall+13>:	nop
+    // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80
+    // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp
+    // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx
+    // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx
+    // 0xf7757433 <__kernel_vsyscall+19>:	ret
+    //
+    // In cases where the sampled thread is blocked in a syscall, its
+    // program counter will point at "pop %ebp".  Hence we look for
+    // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
+    // the corresponding register-recovery actions are:
+    //    new_ebp = *(old_esp + 0)
+    //    new eip = *(old_esp + 12)
+    //    new_esp = old_esp + 16
+    //
+    // It may also be the case that the program counter points two
+    // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
+    // the case where the syscall has been restarted but the thread
+    // hasn't been rescheduled.  The code below doesn't handle that;
+    // it could easily be made to.
+    //
+    if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
+      uintptr_t insns_min, insns_max;
+      uintptr_t eip = ia.Value();
+      bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
+      if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
+        uint8_t* eipC = (uint8_t*)eip;
+        if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
+            eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
+          TaggedUWord sp_plus_0 = sp;
+          TaggedUWord sp_plus_12 = sp;
+          TaggedUWord sp_plus_16 = sp;
+          sp_plus_12 = sp_plus_12 + TaggedUWord(12);
+          sp_plus_16 = sp_plus_16 + TaggedUWord(16);
+          TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
+          TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
+          TaggedUWord new_esp = sp_plus_16;
+          if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
+            regs.xbp = new_ebp;
+            regs.xip = new_eip;
+            regs.xsp = new_esp;
+            continue;
+          }
+        }
+      }
+    }
+    ////
+    /////////////////////////////////////////////
+#endif  // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
+
+    // So, do we have a ruleset for this address?  If so, use it now.
+    if (ruleset) {
+      if (DEBUG_MAIN) {
+        ruleset->Print(mLog);
+        mLog("\n");
+      }
+      // Use the RuleSet to compute the registers for the previous
+      // frame.  |regs| is modified in-place.
+      UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
+      continue;
+    }
+
+#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||     \
+    defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
+    defined(GP_PLAT_amd64_freebsd)
+    // There's no RuleSet for the specified address.  On amd64/x86_linux, see if
+    // it's possible to recover the caller's frame by using the frame pointer.
+
+    // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
+    // and assume the following layout:
+    //
+    //                 <--- new_SP
+    //   +----------+
+    //   |  new_IP  |  (return address)
+    //   +----------+
+    //   |  new_BP  |  <--- old_BP
+    //   +----------+
+    //   |   ....   |
+    //   |   ....   |
+    //   |   ....   |
+    //   +----------+  <---- old_SP (arbitrary, but must be <= old_BP)
+
+    const size_t wordSzB = sizeof(uintptr_t);
+    TaggedUWord old_xsp = regs.xsp;
+
+    // points at new_BP ?
+    TaggedUWord old_xbp = regs.xbp;
+    // points at new_IP ?
+    TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
+    // is the new_SP ?
+    TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
+
+    if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
+        old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
+      // We don't need to do any range, alignment or validity checks for
+      // addresses passed to DerefTUW, since that performs them itself, and
+      // returns an invalid value on failure.  Any such value will poison
+      // subsequent uses, and we do a final check for validity before putting
+      // the computed values into |regs|.
+      TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
+      if (new_xbp.Valid() && new_xbp.IsAligned() &&
+          old_xbp.Value() < new_xbp.Value()) {
+        TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
+        TaggedUWord new_xsp = old_xbp_plus2;
+        if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
+          regs.xbp = new_xbp;
+          regs.xip = new_xip;
+          regs.xsp = new_xsp;
+          (*aFramePointerFramesAcquired)++;
+          continue;
+        }
+      }
+    }
+#elif defined(GP_ARCH_arm64)
+    // Here is an example of generated code for prologue and epilogue..
+    //
+    // stp     x29, x30, [sp, #-16]!
+    // mov     x29, sp
+    // ...
+    // ldp     x29, x30, [sp], #16
+    // ret
+    //
+    // Next is another example of generated code.
+    //
+    // stp     x20, x19, [sp, #-32]!
+    // stp     x29, x30, [sp, #16]
+    // add     x29, sp, #0x10
+    // ...
+    // ldp     x29, x30, [sp, #16]
+    // ldp     x20, x19, [sp], #32
+    // ret
+    //
+    // Previous x29 and x30 register are stored in the address of x29 register.
+    // But since sp register value depends on local variables, we cannot compute
+    // previous sp register from current sp/fp/lr register and there is no
+    // regular rule for sp register in prologue. But since return address is lr
+    // register, if x29 is valid, we will get return address without sp
+    // register.
+    //
+    // So we assume the following layout that if no rule set. x29 is frame
+    // pointer, so we will be able to compute x29 and x30 .
+    //
+    //   +----------+  <--- new_sp (cannot compute)
+    //   |   ....   |
+    //   +----------+
+    //   |  new_lr  |  (return address)
+    //   +----------+
+    //   |  new_fp  |  <--- old_fp
+    //   +----------+
+    //   |   ....   |
+    //   |   ....   |
+    //   +----------+  <---- old_sp (arbitrary, but unused)
+
+    TaggedUWord old_fp = regs.x29;
+    if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
+        last_valid_sp.Value() <= old_fp.Value()) {
+      TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
+      if (new_fp.Valid() && new_fp.IsAligned() &&
+          old_fp.Value() < new_fp.Value()) {
+        TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
+        TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
+        if (new_lr.Valid()) {
+          regs.x29 = new_fp;
+          regs.x30 = new_lr;
+          // When using frame pointer to walk stack, we cannot compute sp
+          // register since we cannot compute sp register from fp/lr/sp
+          // register, and there is no regular rule to compute previous sp
+          // register. So mark as invalid.
+          regs.sp = TaggedUWord();
+          (*aFramePointerFramesAcquired)++;
+          continue;
+        }
+      }
+    }
+#endif  // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
+        // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android)
+
+    // We failed to recover a frame either using CFI or FP chasing, and we
+    // have no other ways to recover the frame.  So we have to give up.
+    break;
+
+  }  // top level unwind loop
+
+  // END UNWIND
+  /////////////////////////////////////////////////////////
+}
+
+////////////////////////////////////////////////////////////////
+// LUL Unit Testing                                           //
+////////////////////////////////////////////////////////////////
+
+static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
+
+#if defined(GP_ARCH_mips64)
+static __attribute__((noinline)) unsigned long __getpc(void) {
+  unsigned long rtaddr;
+  __asm__ volatile("move %0, $31" : "=r"(rtaddr));
+  return rtaddr;
+}
+#endif
+
+// This function is innermost in the test call sequence.  It uses LUL
+// to unwind, and compares the result with the sequence specified in
+// the director string.  These need to agree in order for the test to
+// pass.  In order not to screw up the results, this function needs
+// to have a not-very big stack frame, since we're only presenting
+// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
+// that chunk unavoidably includes the frame for this function.
+//
+// This function must not be inlined into its callers.  Doing so will
+// cause the expected-vs-actual backtrace consistency checking to
+// fail.  Prints summary results to |aLUL|'s logging sink and also
+// returns a boolean indicating whether or not the test passed.
+static __attribute__((noinline)) bool GetAndCheckStackTrace(
+    LUL* aLUL, const char* dstring) {
+  // Get hold of the current unwind-start registers.
+  UnwindRegs startRegs;
+  memset(&startRegs, 0, sizeof(startRegs));
+#if defined(GP_ARCH_amd64)
+  volatile uintptr_t block[3];
+  MOZ_ASSERT(sizeof(block) == 24);
+  __asm__ __volatile__(
+      "leaq 0(%%rip), %%r15"
+      "\n\t"
+      "movq %%r15, 0(%0)"
+      "\n\t"
+      "movq %%rsp, 8(%0)"
+      "\n\t"
+      "movq %%rbp, 16(%0)"
+      "\n"
+      :
+      : "r"(&block[0])
+      : "memory", "r15");
+  startRegs.xip = TaggedUWord(block[0]);
+  startRegs.xsp = TaggedUWord(block[1]);
+  startRegs.xbp = TaggedUWord(block[2]);
+  const uintptr_t REDZONE_SIZE = 128;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
+  volatile uintptr_t block[3];
+  MOZ_ASSERT(sizeof(block) == 12);
+  __asm__ __volatile__(
+      ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
+      "\n\t"
+      "popl %%edi"
+      "\n\t"
+      "movl %%edi, 0(%0)"
+      "\n\t"
+      "movl %%esp, 4(%0)"
+      "\n\t"
+      "movl %%ebp, 8(%0)"
+      "\n"
+      :
+      : "r"(&block[0])
+      : "memory", "edi");
+  startRegs.xip = TaggedUWord(block[0]);
+  startRegs.xsp = TaggedUWord(block[1]);
+  startRegs.xbp = TaggedUWord(block[2]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
+  volatile uintptr_t block[6];
+  MOZ_ASSERT(sizeof(block) == 24);
+  __asm__ __volatile__(
+      "mov r0, r15"
+      "\n\t"
+      "str r0,  [%0, #0]"
+      "\n\t"
+      "str r14, [%0, #4]"
+      "\n\t"
+      "str r13, [%0, #8]"
+      "\n\t"
+      "str r12, [%0, #12]"
+      "\n\t"
+      "str r11, [%0, #16]"
+      "\n\t"
+      "str r7,  [%0, #20]"
+      "\n"
+      :
+      : "r"(&block[0])
+      : "memory", "r0");
+  startRegs.r15 = TaggedUWord(block[0]);
+  startRegs.r14 = TaggedUWord(block[1]);
+  startRegs.r13 = TaggedUWord(block[2]);
+  startRegs.r12 = TaggedUWord(block[3]);
+  startRegs.r11 = TaggedUWord(block[4]);
+  startRegs.r7 = TaggedUWord(block[5]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_ARCH_arm64)
+  volatile uintptr_t block[4];
+  MOZ_ASSERT(sizeof(block) == 32);
+  __asm__ __volatile__(
+      "adr x0, . \n\t"
+      "str x0, [%0, #0] \n\t"
+      "str x29, [%0, #8] \n\t"
+      "str x30, [%0, #16] \n\t"
+      "mov x0, sp \n\t"
+      "str x0, [%0, #24] \n\t"
+      :
+      : "r"(&block[0])
+      : "memory", "x0");
+  startRegs.pc = TaggedUWord(block[0]);
+  startRegs.x29 = TaggedUWord(block[1]);
+  startRegs.x30 = TaggedUWord(block[2]);
+  startRegs.sp = TaggedUWord(block[3]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_ARCH_mips64)
+  volatile uintptr_t block[3];
+  MOZ_ASSERT(sizeof(block) == 24);
+  __asm__ __volatile__(
+      "sd $29, 8(%0)     \n"
+      "sd $30, 16(%0)    \n"
+      :
+      : "r"(block)
+      : "memory");
+  block[0] = __getpc();
+  startRegs.pc = TaggedUWord(block[0]);
+  startRegs.sp = TaggedUWord(block[1]);
+  startRegs.fp = TaggedUWord(block[2]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#else
+#  error "Unsupported platform"
+#endif
+
+  // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
+  // stack.
+  uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
+  uintptr_t ws = sizeof(void*);
+  start &= ~(ws - 1);
+  end &= ~(ws - 1);
+  uintptr_t nToCopy = end - start;
+  if (nToCopy > lul::N_STACK_BYTES) {
+    nToCopy = lul::N_STACK_BYTES;
+  }
+  MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+  StackImage* stackImg = new StackImage();
+  stackImg->mLen = nToCopy;
+  stackImg->mStartAvma = start;
+  if (nToCopy > 0) {
+    MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
+    memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
+  }
+
+  // Unwind it.
+  const int MAX_TEST_FRAMES = 64;
+  uintptr_t framePCs[MAX_TEST_FRAMES];
+  uintptr_t frameSPs[MAX_TEST_FRAMES];
+  size_t framesAvail = mozilla::ArrayLength(framePCs);
+  size_t framesUsed = 0;
+  size_t framePointerFramesAcquired = 0;
+  aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
+               &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
+
+  delete stackImg;
+
+  // if (0) {
+  //  // Show what we have.
+  //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
+  //  for (size_t i = 0; i < framesUsed; i++) {
+  //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
+  //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
+  //  }
+  //  fprintf(stderr, "\n");
+  //}
+
+  // Check to see if there's a consistent binding between digits in
+  // the director string ('1' .. '8') and the PC values acquired by
+  // the unwind.  If there isn't, the unwinding has failed somehow.
+  uintptr_t binding[8];  // binding for '1' .. binding for '8'
+  memset((void*)binding, 0, sizeof(binding));
+
+  // The general plan is to work backwards along the director string
+  // and forwards along the framePCs array.  Doing so corresponds to
+  // working outwards from the innermost frame of the recursive test set.
+  const char* cursor = dstring;
+
+  // Find the end.  This leaves |cursor| two bytes past the first
+  // character we want to look at -- see comment below.
+  while (*cursor) cursor++;
+
+  // Counts the number of consistent frames.
+  size_t nConsistent = 0;
+
+  // Iterate back to the start of the director string.  The starting
+  // points are a bit complex.  We can't use framePCs[0] because that
+  // contains the PC in this frame (above).  We can't use framePCs[1]
+  // because that will contain the PC at return point in the recursive
+  // test group (TestFn[1-8]) for their call "out" to this function,
+  // GetAndCheckStackTrace.  Although LUL will compute a correct
+  // return address, that will not be the same return address as for a
+  // recursive call out of the the function to another function in the
+  // group.  Hence we can only start consistency checking at
+  // framePCs[2].
+  //
+  // To be consistent, then, we must ignore the last element in the
+  // director string as that corresponds to framePCs[1].  Hence the
+  // start points are: framePCs[2] and the director string 2 bytes
+  // before the terminating zero.
+  //
+  // Also as a result of this, the number of consistent frames counted
+  // will always be one less than the length of the director string
+  // (not including its terminating zero).
+  size_t frameIx;
+  for (cursor = cursor - 2, frameIx = 2;
+       cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
+    char c = *cursor;
+    uintptr_t pc = framePCs[frameIx];
+    // If this doesn't hold, the director string is ill-formed.
+    MOZ_ASSERT(c >= '1' && c <= '8');
+    int n = ((int)c) - ((int)'1');
+    if (binding[n] == 0) {
+      // There's no binding for |c| yet, so install |pc| and carry on.
+      binding[n] = pc;
+      nConsistent++;
+      continue;
+    }
+    // There's a pre-existing binding for |c|.  Check it's consistent.
+    if (binding[n] != pc) {
+      // Not consistent.  Give up now.
+      break;
+    }
+    // Consistent.  Keep going.
+    nConsistent++;
+  }
+
+  // So, did we succeed?
+  bool passed = nConsistent + 1 == strlen(dstring);
+
+  // Show the results.
+  char buf[200];
+  SprintfLiteral(buf, "LULUnitTest:   dstring = %s\n", dstring);
+  buf[sizeof(buf) - 1] = 0;
+  aLUL->mLog(buf);
+  SprintfLiteral(buf, "LULUnitTest:     %d consistent, %d in dstring: %s\n",
+                 (int)nConsistent, (int)strlen(dstring),
+                 passed ? "PASS" : "FAIL");
+  buf[sizeof(buf) - 1] = 0;
+  aLUL->mLog(buf);
+
+  return passed;
+}
+
+// Macro magic to create a set of 8 mutually recursive functions with
+// varying frame sizes.  These will recurse amongst themselves as
+// specified by |strP|, the directory string, and call
+// GetAndCheckStackTrace when the string becomes empty, passing it the
+// original value of the string.  This checks the result, printing
+// results on |aLUL|'s logging sink, and also returns a boolean
+// indicating whether or not the results are acceptable (correct).
+
+#define DECL_TEST_FN(NAME) \
+  bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
+
+#define GEN_TEST_FN(NAME, FRAMESIZE)                                          \
+  bool NAME(LUL* aLUL, const char* strPorig, const char* strP) {              \
+    /* Create a frame of size (at least) FRAMESIZE, so that the */            \
+    /* 8 functions created by this macro offer some variation in frame */     \
+    /* sizes.  This isn't as simple as it might seem, since a clever */       \
+    /* optimizing compiler (eg, clang-5) detects that the array is unused */  \
+    /* and removes it.  We try to defeat this by passing it to a function */  \
+    /* in a different compilation unit, and hoping that clang does not */     \
+    /* notice that the call is a no-op. */                                    \
+    char space[FRAMESIZE];                                                    \
+    Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
+                                                                              \
+    if (*strP == '\0') {                                                      \
+      /* We've come to the end of the director string. */                     \
+      /* Take a stack snapshot. */                                            \
+      return GetAndCheckStackTrace(aLUL, strPorig);                           \
+    } else {                                                                  \
+      /* Recurse onwards.  This is a bit subtle.  The obvious */              \
+      /* thing to do here is call onwards directly, from within the */        \
+      /* arms of the case statement.  That gives a problem in that */         \
+      /* there will be multiple return points inside each function when */    \
+      /* unwinding, so it will be difficult to check for consistency */       \
+      /* against the director string.  Instead, we make an indirect */        \
+      /* call, so as to guarantee that there is only one call site */         \
+      /* within each function.  This does assume that the compiler */         \
+      /* won't transform it back to the simple direct-call form. */           \
+      /* To discourage it from doing so, the call is bracketed with */        \
+      /* __asm__ __volatile__ sections so as to make it not-movable. */       \
+      bool (*nextFn)(LUL*, const char*, const char*) = NULL;                  \
+      switch (*strP) {                                                        \
+        case '1':                                                             \
+          nextFn = TestFn1;                                                   \
+          break;                                                              \
+        case '2':                                                             \
+          nextFn = TestFn2;                                                   \
+          break;                                                              \
+        case '3':                                                             \
+          nextFn = TestFn3;                                                   \
+          break;                                                              \
+        case '4':                                                             \
+          nextFn = TestFn4;                                                   \
+          break;                                                              \
+        case '5':                                                             \
+          nextFn = TestFn5;                                                   \
+          break;                                                              \
+        case '6':                                                             \
+          nextFn = TestFn6;                                                   \
+          break;                                                              \
+        case '7':                                                             \
+          nextFn = TestFn7;                                                   \
+          break;                                                              \
+        case '8':                                                             \
+          nextFn = TestFn8;                                                   \
+          break;                                                              \
+        default:                                                              \
+          nextFn = TestFn8;                                                   \
+          break;                                                              \
+      }                                                                       \
+      /* "use" |space| immediately after the recursive call, */               \
+      /* so as to dissuade clang from deallocating the space while */         \
+      /* the call is active, or otherwise messing with the stack frame. */    \
+      __asm__ __volatile__("" ::: "cc", "memory");                            \
+      bool passed = nextFn(aLUL, strPorig, strP + 1);                         \
+      Unused << write(1, space, 0);                                           \
+      __asm__ __volatile__("" ::: "cc", "memory");                            \
+      return passed;                                                          \
+    }                                                                         \
+  }
+
+// The test functions are mutually recursive, so it is necessary to
+// declare them before defining them.
+DECL_TEST_FN(TestFn1)
+DECL_TEST_FN(TestFn2)
+DECL_TEST_FN(TestFn3)
+DECL_TEST_FN(TestFn4)
+DECL_TEST_FN(TestFn5)
+DECL_TEST_FN(TestFn6)
+DECL_TEST_FN(TestFn7)
+DECL_TEST_FN(TestFn8)
+
+GEN_TEST_FN(TestFn1, 123)
+GEN_TEST_FN(TestFn2, 456)
+GEN_TEST_FN(TestFn3, 789)
+GEN_TEST_FN(TestFn4, 23)
+GEN_TEST_FN(TestFn5, 47)
+GEN_TEST_FN(TestFn6, 117)
+GEN_TEST_FN(TestFn7, 1)
+GEN_TEST_FN(TestFn8, 99)
+
+// This starts the test sequence going.  Call here to generate a
+// sequence of calls as directed by the string |dstring|.  The call
+// sequence will, from its innermost frame, finish by calling
+// GetAndCheckStackTrace() and passing it |dstring|.
+// GetAndCheckStackTrace() will unwind the stack, check consistency
+// of those results against |dstring|, and print a pass/fail message
+// to aLUL's logging sink.  It also updates the counters in *aNTests
+// and aNTestsPassed.
+__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
+                                       /*OUT*/ int* aNTestsPassed, LUL* aLUL,
+                                       const char* dstring) {
+  // Ensure that the stack has at least this much space on it.  This
+  // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
+  // and hand it to LUL.  Safe in the sense that no segfault can
+  // happen because the stack is at least this big.  This is all
+  // somewhat dubious in the sense that a sufficiently clever compiler
+  // (clang, for one) can figure out that space[] is unused and delete
+  // it from the frame.  Hence the somewhat elaborate hoop jumping to
+  // fill it up before the call and to at least appear to use the
+  // value afterwards.
+  int i;
+  volatile char space[LUL_UNIT_TEST_STACK_SIZE];
+  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
+    space[i] = (char)(i & 0x7F);
+  }
+
+  // Really run the test.
+  bool passed = TestFn1(aLUL, dstring, dstring);
+
+  // Appear to use space[], by visiting the value to compute some kind
+  // of checksum, and then (apparently) using the checksum.
+  int sum = 0;
+  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
+    // If this doesn't fool LLVM, I don't know what will.
+    sum += space[i] - 3 * i;
+  }
+  __asm__ __volatile__("" : : "r"(sum));
+
+  // Update the counters.
+  (*aNTests)++;
+  if (passed) {
+    (*aNTestsPassed)++;
+  }
+}
+
+void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
+                     LUL* aLUL) {
+  aLUL->mLog(":\n");
+  aLUL->mLog("LULUnitTest: BEGIN\n");
+  *aNTests = *aNTestsPassed = 0;
+  TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
+  TestUnw(aNTests, aNTestsPassed, aLUL,
+          "123456781122334455667788777777777777777777777");
+  aLUL->mLog("LULUnitTest: END\n");
+  aLUL->mLog(":\n");
+}
+
+}  // namespace lul
diff --git a/mozglue/baseprofiler/lul/LulMain.h b/mozglue/baseprofiler/lul/LulMain.h
new file mode 100644
index 0000000000..b0cb7f4f1e
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulMain.h
@@ -0,0 +1,377 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulMain_h
+#define LulMain_h
+
+#include "PlatformMacros.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/MemoryReporting.h"
+
+// LUL: A Lightweight Unwind Library.
+// This file provides the end-user (external) interface for LUL.
+
+// Some comments about naming in the implementation.  These are safe
+// to ignore if you are merely using LUL, but are important if you
+// hack on its internals.
+//
+// Debuginfo readers in general have tended to use the word "address"
+// to mean several different things.  This sometimes makes them
+// difficult to understand and maintain.  LUL tries hard to avoid
+// using the word "address" and instead uses the following more
+// precise terms:
+//
+// * SVMA ("Stated Virtual Memory Address"): this is an address of a
+//   symbol (etc) as it is stated in the symbol table, or other
+//   metadata, of an object.  Such values are typically small and
+//   start from zero or thereabouts, unless the object has been
+//   prelinked.
+//
+// * AVMA ("Actual Virtual Memory Address"): this is the address of a
+//   symbol (etc) in a running process, that is, once the associated
+//   object has been mapped into a process.  Such values are typically
+//   much larger than SVMAs, since objects can get mapped arbitrarily
+//   far along the address space.
+//
+// * "Bias": the difference between AVMA and SVMA for a given symbol
+//   (specifically, AVMA - SVMA).  The bias is always an integral
+//   number of pages.  Once we know the bias for a given object's
+//   text section (for example), we can compute the AVMAs of all of
+//   its text symbols by adding the bias to their SVMAs.
+//
+// * "Image address": typically, to read debuginfo from an object we
+//   will temporarily mmap in the file so as to read symbol tables
+//   etc.  Addresses in this temporary mapping are called "Image
+//   addresses".  Note that the temporary mapping is entirely
+//   unrelated to the mappings of the file that the dynamic linker
+//   must perform merely in order to get the program to run.  Hence
+//   image addresses are unrelated to either SVMAs or AVMAs.
+
+namespace lul {
+
+// A machine word plus validity tag.
+class TaggedUWord {
+ public:
+  // RUNS IN NO-MALLOC CONTEXT
+  // Construct a valid one.
+  explicit TaggedUWord(uintptr_t w) : mValue(w), mValid(true) {}
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Construct an invalid one.
+  TaggedUWord() : mValue(0), mValid(false) {}
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator+(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() + rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator-(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() - rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator&(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() & rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator|(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() | rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord CmpGEs(TaggedUWord rhs) const {
+    if (Valid() && rhs.Valid()) {
+      intptr_t s1 = (intptr_t)Value();
+      intptr_t s2 = (intptr_t)rhs.Value();
+      return TaggedUWord(s1 >= s2 ? 1 : 0);
+    }
+    return TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator<<(TaggedUWord rhs) const {
+    if (Valid() && rhs.Valid()) {
+      uintptr_t shift = rhs.Value();
+      if (shift < 8 * sizeof(uintptr_t)) return TaggedUWord(Value() << shift);
+    }
+    return TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Is equal?  Note: non-validity on either side gives non-equality.
+  bool operator==(TaggedUWord other) const {
+    return (mValid && other.Valid()) ? (mValue == other.Value()) : false;
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Is it word-aligned?
+  bool IsAligned() const {
+    return mValid && (mValue & (sizeof(uintptr_t) - 1)) == 0;
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  uintptr_t Value() const { return mValue; }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  bool Valid() const { return mValid; }
+
+ private:
+  uintptr_t mValue;
+  bool mValid;
+};
+
+// The registers, with validity tags, that will be unwound.
+
+struct UnwindRegs {
+#if defined(GP_ARCH_arm)
+  TaggedUWord r7;
+  TaggedUWord r11;
+  TaggedUWord r12;
+  TaggedUWord r13;
+  TaggedUWord r14;
+  TaggedUWord r15;
+#elif defined(GP_ARCH_arm64)
+  TaggedUWord x29;
+  TaggedUWord x30;
+  TaggedUWord sp;
+  TaggedUWord pc;
+#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  TaggedUWord xbp;
+  TaggedUWord xsp;
+  TaggedUWord xip;
+#elif defined(GP_ARCH_mips64)
+  TaggedUWord sp;
+  TaggedUWord fp;
+  TaggedUWord pc;
+#else
+#  error "Unknown plat"
+#endif
+};
+
+// The maximum number of bytes in a stack snapshot.  This value can be increased
+// if necessary, but testing showed that 160k is enough to obtain good
+// backtraces on x86_64 Linux.  Most backtraces fit comfortably into 4-8k of
+// stack space, but we do have some very deep stacks occasionally.  Please see
+// the comments in DoNativeBacktrace as to why it's OK to have this value be so
+// large.
+static const size_t N_STACK_BYTES = 160 * 1024;
+
+// The stack chunk image that will be unwound.
+struct StackImage {
+  // [start_avma, +len) specify the address range in the buffer.
+  // Obviously we require 0 <= len <= N_STACK_BYTES.
+  uintptr_t mStartAvma;
+  size_t mLen;
+  uint8_t mContents[N_STACK_BYTES];
+};
+
+// Statistics collection for the unwinder.
+template <typename T>
+class LULStats {
+ public:
+  LULStats() : mContext(0), mCFI(0), mFP(0) {}
+
+  template <typename S>
+  explicit LULStats(const LULStats<S>& aOther)
+      : mContext(aOther.mContext), mCFI(aOther.mCFI), mFP(aOther.mFP) {}
+
+  template <typename S>
+  LULStats<T>& operator=(const LULStats<S>& aOther) {
+    mContext = aOther.mContext;
+    mCFI = aOther.mCFI;
+    mFP = aOther.mFP;
+    return *this;
+  }
+
+  template <typename S>
+  uint32_t operator-(const LULStats<S>& aOther) {
+    return (mContext - aOther.mContext) + (mCFI - aOther.mCFI) +
+           (mFP - aOther.mFP);
+  }
+
+  T mContext;  // Number of context frames
+  T mCFI;      // Number of CFI/EXIDX frames
+  T mFP;       // Number of frame-pointer recovered frames
+};
+
+// The core unwinder library class.  Just one of these is needed, and
+// it can be shared by multiple unwinder threads.
+//
+// The library operates in one of two modes.
+//
+// * Admin mode.  The library is this state after creation.  In Admin
+//   mode, no unwinding may be performed.  It is however allowable to
+//   perform administrative tasks -- primarily, loading of unwind info
+//   -- in this mode.  In particular, it is safe for the library to
+//   perform dynamic memory allocation in this mode.  Safe in the
+//   sense that there is no risk of deadlock against unwinding threads
+//   that might -- because of where they have been sampled -- hold the
+//   system's malloc lock.
+//
+// * Unwind mode.  In this mode, calls to ::Unwind may be made, but
+//   nothing else.  ::Unwind guarantees not to make any dynamic memory
+//   requests, so as to guarantee that the calling thread won't
+//   deadlock in the case where it already holds the system's malloc lock.
+//
+// The library is created in Admin mode.  After debuginfo is loaded,
+// the caller must switch it into Unwind mode by calling
+// ::EnableUnwinding.  There is no way to switch it back to Admin mode
+// after that.  To safely switch back to Admin mode would require the
+// caller (or other external agent) to guarantee that there are no
+// pending ::Unwind calls.
+
+class PriMap;
+class SegArray;
+class UniqueStringUniverse;
+
+class LUL {
+ public:
+  // Create; supply a logging sink.  Sets the object in Admin mode.
+  explicit LUL(void (*aLog)(const char*));
+
+  // Destroy.  Caller is responsible for ensuring that no other
+  // threads are in Unwind calls.  All resources are freed and all
+  // registered unwinder threads are deregistered.  Can be called
+  // either in Admin or Unwind mode.
+  ~LUL();
+
+  // Notify the library that unwinding is now allowed and so
+  // admin-mode calls are no longer allowed.  The object is initially
+  // created in admin mode.  The only possible transition is
+  // admin->unwinding, therefore.
+  void EnableUnwinding();
+
+  // Notify of a new r-x mapping, and load the associated unwind info.
+  // The filename is strdup'd and used for debug printing.  If
+  // aMappedImage is NULL, this function will mmap/munmap the file
+  // itself, so as to be able to read the unwind info.  If
+  // aMappedImage is non-NULL then it is assumed to point to a
+  // called-supplied and caller-managed mapped image of the file.
+  // May only be called in Admin mode.
+  void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
+                      const void* aMappedImage);
+
+  // In rare cases we know an executable area exists but don't know
+  // what the associated file is.  This call notifies LUL of such
+  // areas.  This is important for correct functioning of stack
+  // scanning and of the x86-{linux,android} special-case
+  // __kernel_syscall function handling.
+  // This must be called only after the code area in
+  // question really has been mapped.
+  // May only be called in Admin mode.
+  void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize);
+
+  // Notify that a mapped area has been unmapped; discard any
+  // associated unwind info.  Acquires mRWlock for writing.  Note that
+  // to avoid segfaulting the stack-scan unwinder, which inspects code
+  // areas, this must be called before the code area in question is
+  // really unmapped.  Note that, unlike NotifyAfterMap(), this
+  // function takes the start and end addresses of the range to be
+  // unmapped, rather than a start and a length parameter.  This is so
+  // as to make it possible to notify an unmap for the entire address
+  // space using a single call.
+  // May only be called in Admin mode.
+  void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax);
+
+  // Apply NotifyBeforeUnmap to the entire address space.  This causes
+  // LUL to discard all unwind and executable-area information for the
+  // entire address space.
+  // May only be called in Admin mode.
+  void NotifyBeforeUnmapAll() { NotifyBeforeUnmap(0, UINTPTR_MAX); }
+
+  // Returns the number of mappings currently registered.
+  // May only be called in Admin mode.
+  size_t CountMappings();
+
+  // Unwind |aStackImg| starting with the context in |aStartRegs|.
+  // Write the number of frames recovered in *aFramesUsed.  Put
+  // the PC values in aFramePCs[0 .. *aFramesUsed-1] and
+  // the SP values in aFrameSPs[0 .. *aFramesUsed-1].
+  // |aFramesAvail| is the size of the two output arrays and hence the
+  // largest possible value of *aFramesUsed.  PC values are always
+  // valid, and the unwind will stop when the PC becomes invalid, but
+  // the SP values might be invalid, in which case the value zero will
+  // be written in the relevant frameSPs[] slot.
+  //
+  // This function assumes that the SP values increase as it unwinds
+  // away from the innermost frame -- that is, that the stack grows
+  // down.  It monitors SP values as it unwinds to check they
+  // decrease, so as to avoid looping on corrupted stacks.
+  //
+  // May only be called in Unwind mode.  Multiple threads may unwind
+  // at once.  LUL user is responsible for ensuring that no thread makes
+  // any Admin calls whilst in Unwind mode.
+  // MOZ_CRASHes if the calling thread is not registered for unwinding.
+  //
+  // The calling thread must previously have been registered via a call to
+  // RegisterSampledThread.
+  void Unwind(/*OUT*/ uintptr_t* aFramePCs,
+              /*OUT*/ uintptr_t* aFrameSPs,
+              /*OUT*/ size_t* aFramesUsed,
+              /*OUT*/ size_t* aFramePointerFramesAcquired, size_t aFramesAvail,
+              UnwindRegs* aStartRegs, StackImage* aStackImg);
+
+  // The logging sink.  Call to send debug strings to the caller-
+  // specified destination.  Can only be called by the Admin thread.
+  void (*mLog)(const char*);
+
+  // Statistics relating to unwinding.  These have to be atomic since
+  // unwinding can occur on different threads simultaneously.
+  LULStats<mozilla::Atomic<uint32_t>> mStats;
+
+  // Possibly show the statistics.  This may not be called from any
+  // registered sampling thread, since it involves I/O.
+  void MaybeShowStats();
+
+  size_t SizeOfIncludingThis(mozilla::MallocSizeOf) const;
+
+ private:
+  // The statistics counters at the point where they were last printed.
+  LULStats<uint32_t> mStatsPrevious;
+
+  // Are we in admin mode?  Initially |true| but changes to |false|
+  // once unwinding begins.
+  bool mAdminMode;
+
+  // The thread ID associated with admin mode.  This is the only thread
+  // that is allowed do perform non-Unwind calls on this object.  Conversely,
+  // no registered Unwinding thread may be the admin thread.  This is so
+  // as to clearly partition the one thread that may do dynamic memory
+  // allocation from the threads that are being sampled, since the latter
+  // absolutely may not do dynamic memory allocation.
+  int mAdminThreadId;
+
+  // The top level mapping from code address ranges to postprocessed
+  // unwind info.  Basically a sorted array of (addr, len, info)
+  // records.  This field is updated by NotifyAfterMap and NotifyBeforeUnmap.
+  PriMap* mPriMap;
+
+  // An auxiliary structure that records which address ranges are
+  // mapped r-x, for the benefit of the stack scanner.
+  SegArray* mSegArray;
+
+  // A UniqueStringUniverse that holds all the strdup'd strings created
+  // whilst reading unwind information.  This is included so as to make
+  // it possible to free them in ~LUL.
+  UniqueStringUniverse* mUSU;
+};
+
+// Run unit tests on an initialised, loaded-up LUL instance, and print
+// summary results on |aLUL|'s logging sink.  Also return the number
+// of tests run in *aNTests and the number that passed in
+// *aNTestsPassed.
+void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
+                     LUL* aLUL);
+
+}  // namespace lul
+
+#endif  // LulMain_h
diff --git a/mozglue/baseprofiler/lul/LulMainInt.h b/mozglue/baseprofiler/lul/LulMainInt.h
new file mode 100644
index 0000000000..c2ee45d73d
--- /dev/null
+++ b/mozglue/baseprofiler/lul/LulMainInt.h
@@ -0,0 +1,420 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulMainInt_h
+#define LulMainInt_h
+
+#include "PlatformMacros.h"
+#include "LulMain.h"  // for TaggedUWord
+
+#include "mozilla/Assertions.h"
+
+#include <string>
+#include <vector>
+
+// This file is provides internal interface inside LUL.  If you are an
+// end-user of LUL, do not include it in your code.  The end-user
+// interface is in LulMain.h.
+
+namespace lul {
+
+using std::vector;
+
+////////////////////////////////////////////////////////////////
+// DW_REG_ constants                                          //
+////////////////////////////////////////////////////////////////
+
+// These are the Dwarf CFI register numbers, as (presumably) defined
+// in the ELF ABI supplements for each architecture.
+
+enum DW_REG_NUMBER {
+  // No real register has this number.  It's convenient to be able to
+  // treat the CFA (Canonical Frame Address) as "just another
+  // register", though.
+  DW_REG_CFA = -1,
+#if defined(GP_ARCH_arm)
+  // ARM registers
+  DW_REG_ARM_R7 = 7,
+  DW_REG_ARM_R11 = 11,
+  DW_REG_ARM_R12 = 12,
+  DW_REG_ARM_R13 = 13,
+  DW_REG_ARM_R14 = 14,
+  DW_REG_ARM_R15 = 15,
+#elif defined(GP_ARCH_arm64)
+  // aarch64 registers
+  DW_REG_AARCH64_X29 = 29,
+  DW_REG_AARCH64_X30 = 30,
+  DW_REG_AARCH64_SP = 31,
+#elif defined(GP_ARCH_amd64)
+  // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are
+  // combined, a merged set of register constants is needed.
+  DW_REG_INTEL_XBP = 6,
+  DW_REG_INTEL_XSP = 7,
+  DW_REG_INTEL_XIP = 16,
+#elif defined(GP_ARCH_x86)
+  DW_REG_INTEL_XBP = 5,
+  DW_REG_INTEL_XSP = 4,
+  DW_REG_INTEL_XIP = 8,
+#elif defined(GP_ARCH_mips64)
+  DW_REG_MIPS_SP = 29,
+  DW_REG_MIPS_FP = 30,
+  DW_REG_MIPS_PC = 34,
+#else
+#  error "Unknown arch"
+#endif
+};
+
+////////////////////////////////////////////////////////////////
+// PfxExpr                                                    //
+////////////////////////////////////////////////////////////////
+
+enum PfxExprOp {
+  //             meaning of mOperand     effect on stack
+  PX_Start,   // bool start-with-CFA?    start, with CFA on stack, or not
+  PX_End,     // none                    stop; result is at top of stack
+  PX_SImm32,  // int32                   push signed int32
+  PX_DwReg,   // DW_REG_NUMBER           push value of the specified reg
+  PX_Deref,   // none                    pop X ; push *X
+  PX_Add,     // none                    pop X ; pop Y ; push Y + X
+  PX_Sub,     // none                    pop X ; pop Y ; push Y - X
+  PX_And,     // none                    pop X ; pop Y ; push Y & X
+  PX_Or,      // none                    pop X ; pop Y ; push Y | X
+  PX_CmpGES,  // none                    pop X ; pop Y ; push (Y >=s X) ? 1 : 0
+  PX_Shl      // none                    pop X ; pop Y ; push Y << X
+};
+
+struct PfxInstr {
+  PfxInstr(PfxExprOp opcode, int32_t operand)
+      : mOpcode(opcode), mOperand(operand) {}
+  explicit PfxInstr(PfxExprOp opcode) : mOpcode(opcode), mOperand(0) {}
+  bool operator==(const PfxInstr& other) const {
+    return mOpcode == other.mOpcode && mOperand == other.mOperand;
+  }
+  PfxExprOp mOpcode;
+  int32_t mOperand;
+};
+
+static_assert(sizeof(PfxInstr) <= 8, "PfxInstr size changed unexpectedly");
+
+// Evaluate the prefix expression whose PfxInstrs start at aPfxInstrs[start].
+// In the case of any mishap (stack over/underflow, running off the end of
+// the instruction vector, obviously malformed sequences),
+// return an invalid TaggedUWord.
+// RUNS IN NO-MALLOC CONTEXT
+TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
+                            TaggedUWord aCFA, const StackImage* aStackImg,
+                            const vector<PfxInstr>& aPfxInstrs);
+
+////////////////////////////////////////////////////////////////
+// LExpr                                                      //
+////////////////////////////////////////////////////////////////
+
+// An expression -- very primitive.  Denotes either "register +
+// offset", a dereferenced version of the same, or a reference to a
+// prefix expression stored elsewhere.  So as to allow convenient
+// handling of Dwarf-derived unwind info, the register may also denote
+// the CFA.  A large number of these need to be stored, so we ensure
+// it fits into 8 bytes.  See comment below on RuleSet to see how
+// expressions fit into the bigger picture.
+
+enum LExprHow {
+  UNKNOWN = 0,  // This LExpr denotes no value.
+  NODEREF,      // Value is  (mReg + mOffset).
+  DEREF,        // Value is *(mReg + mOffset).
+  PFXEXPR       // Value is EvaluatePfxExpr(secMap->mPfxInstrs[mOffset])
+};
+
+inline static const char* NameOf_LExprHow(LExprHow how) {
+  switch (how) {
+    case UNKNOWN:
+      return "UNKNOWN";
+    case NODEREF:
+      return "NODEREF";
+    case DEREF:
+      return "DEREF";
+    case PFXEXPR:
+      return "PFXEXPR";
+    default:
+      return "LExpr-??";
+  }
+}
+
+struct LExpr {
+  // Denotes an expression with no value.
+  LExpr() : mHow(UNKNOWN), mReg(0), mOffset(0) {}
+
+  // Denotes any expressible expression.
+  LExpr(LExprHow how, int16_t reg, int32_t offset)
+      : mHow(how), mReg(reg), mOffset(offset) {
+    switch (how) {
+      case UNKNOWN:
+        MOZ_ASSERT(reg == 0 && offset == 0);
+        break;
+      case NODEREF:
+        break;
+      case DEREF:
+        break;
+      case PFXEXPR:
+        MOZ_ASSERT(reg == 0 && offset >= 0);
+        break;
+      default:
+        MOZ_ASSERT(0, "LExpr::LExpr: invalid how");
+    }
+  }
+
+  // Change the offset for an expression that references memory.
+  LExpr add_delta(long delta) {
+    MOZ_ASSERT(mHow == NODEREF);
+    // If this is a non-debug build and the above assertion would have
+    // failed, at least return LExpr() so that the machinery that uses
+    // the resulting expression fails in a repeatable way.
+    return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset + delta)
+                             : LExpr();  // Gone bad
+  }
+
+  // Dereference an expression that denotes a memory address.
+  LExpr deref() {
+    MOZ_ASSERT(mHow == NODEREF);
+    // Same rationale as for add_delta().
+    return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset)
+                             : LExpr();  // Gone bad
+  }
+
+  // Print a rule for recovery of |aNewReg| whose recovered value
+  // is this LExpr.
+  std::string ShowRule(const char* aNewReg) const;
+
+  // Evaluate this expression, producing a TaggedUWord.  |aOldRegs|
+  // holds register values that may be referred to by the expression.
+  // |aCFA| holds the CFA value, if any, that applies.  |aStackImg|
+  // contains a chuck of stack that will be consulted if the expression
+  // references memory.  |aPfxInstrs| holds the vector of PfxInstrs
+  // that will be consulted if this is a PFXEXPR.
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
+                           const StackImage* aStackImg,
+                           const vector<PfxInstr>* aPfxInstrs) const;
+
+  // Representation of expressions.  If |mReg| is DW_REG_CFA (-1) then
+  // it denotes the CFA.  All other allowed values for |mReg| are
+  // nonnegative and are DW_REG_ values.
+  LExprHow mHow : 8;
+  int16_t mReg;     // A DW_REG_ value
+  int32_t mOffset;  // 32-bit signed offset should be more than enough.
+};
+
+static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly");
+
+////////////////////////////////////////////////////////////////
+// RuleSet                                                    //
+////////////////////////////////////////////////////////////////
+
+// This is platform-dependent.  For some address range, describes how
+// to recover the CFA and then how to recover the registers for the
+// previous frame.
+//
+// The set of LExprs contained in a given RuleSet describe a DAG which
+// says how to compute the caller's registers ("new registers") from
+// the callee's registers ("old registers").  The DAG can contain a
+// single internal node, which is the value of the CFA for the callee.
+// It would be possible to construct a DAG that omits the CFA, but
+// including it makes the summarisers simpler, and the Dwarf CFI spec
+// has the CFA as a central concept.
+//
+// For this to make sense, |mCfaExpr| can't have
+// |mReg| == DW_REG_CFA since we have no previous value for the CFA.
+// All of the other |Expr| fields can -- and usually do -- specify
+// |mReg| == DW_REG_CFA.
+//
+// With that in place, the unwind algorithm proceeds as follows.
+//
+// (0) Initially: we have values for the old registers, and a memory
+//     image.
+//
+// (1) Compute the CFA by evaluating |mCfaExpr|.  Add the computed
+//     value to the set of "old registers".
+//
+// (2) Compute values for the registers by evaluating all of the other
+//     |Expr| fields in the RuleSet.  These can depend on both the old
+//     register values and the just-computed CFA.
+//
+// If we are unwinding without computing a CFA, perhaps because the
+// RuleSets are derived from EXIDX instead of Dwarf, then
+// |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will
+// be invalid -- that is, TaggedUWord() -- and so any attempt to use
+// that will result in the same value.  But that's OK because the
+// RuleSet would make no sense if depended on the CFA but specified no
+// way to compute it.
+//
+// A RuleSet is not allowed to cover zero address range.  Having zero
+// length would break binary searching in SecMaps and PriMaps.
+
+class RuleSet {
+ public:
+  RuleSet();
+  void Print(void (*aLog)(const char*)) const;
+
+  // Find the LExpr* for a given DW_REG_ value in this class.
+  LExpr* ExprForRegno(DW_REG_NUMBER aRegno);
+
+  uintptr_t mAddr;
+  uintptr_t mLen;
+  // How to compute the CFA.
+  LExpr mCfaExpr;
+  // How to compute caller register values.  These may reference the
+  // value defined by |mCfaExpr|.
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+  LExpr mXipExpr;  // return address
+  LExpr mXspExpr;
+  LExpr mXbpExpr;
+#elif defined(GP_ARCH_arm)
+  LExpr mR15expr;  // return address
+  LExpr mR14expr;
+  LExpr mR13expr;
+  LExpr mR12expr;
+  LExpr mR11expr;
+  LExpr mR7expr;
+#elif defined(GP_ARCH_arm64)
+  LExpr mX29expr;  // frame pointer register
+  LExpr mX30expr;  // link register
+  LExpr mSPexpr;
+#elif defined(GP_ARCH_mips64)
+  LExpr mPCexpr;
+  LExpr mFPexpr;
+  LExpr mSPexpr;
+#else
+#  error "Unknown arch"
+#endif
+};
+
+// Returns |true| for Dwarf register numbers which are members
+// of the set of registers that LUL unwinds on this target.
+static inline bool registerIsTracked(DW_REG_NUMBER reg) {
+  switch (reg) {
+#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
+    case DW_REG_INTEL_XBP:
+    case DW_REG_INTEL_XSP:
+    case DW_REG_INTEL_XIP:
+      return true;
+#elif defined(GP_ARCH_arm)
+    case DW_REG_ARM_R7:
+    case DW_REG_ARM_R11:
+    case DW_REG_ARM_R12:
+    case DW_REG_ARM_R13:
+    case DW_REG_ARM_R14:
+    case DW_REG_ARM_R15:
+      return true;
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:
+    case DW_REG_AARCH64_X30:
+    case DW_REG_AARCH64_SP:
+      return true;
+#elif defined(GP_ARCH_mips64)
+    case DW_REG_MIPS_FP:
+    case DW_REG_MIPS_SP:
+    case DW_REG_MIPS_PC:
+      return true;
+#else
+#  error "Unknown arch"
+#endif
+    default:
+      return false;
+  }
+}
+
+////////////////////////////////////////////////////////////////
+// SecMap                                                     //
+////////////////////////////////////////////////////////////////
+
+// A SecMap may have zero address range, temporarily, whilst RuleSets
+// are being added to it.  But adding a zero-range SecMap to a PriMap
+// will make it impossible to maintain the total order of the PriMap
+// entries, and so that can't be allowed to happen.
+
+class SecMap {
+ public:
+  // These summarise the contained mRuleSets, in that they give
+  // exactly the lowest and highest addresses that any of the entries
+  // in this SecMap cover.  Hence invariants:
+  //
+  // mRuleSets is nonempty
+  //    <=> mSummaryMinAddr <= mSummaryMaxAddr
+  //        && mSummaryMinAddr == mRuleSets[0].mAddr
+  //        && mSummaryMaxAddr == mRuleSets[#rulesets-1].mAddr
+  //                              + mRuleSets[#rulesets-1].mLen - 1;
+  //
+  // This requires that no RuleSet has zero length.
+  //
+  // mRuleSets is empty
+  //    <=> mSummaryMinAddr > mSummaryMaxAddr
+  //
+  // This doesn't constrain mSummaryMinAddr and mSummaryMaxAddr uniquely,
+  // so let's use mSummaryMinAddr == 1 and mSummaryMaxAddr == 0 to denote
+  // this case.
+
+  explicit SecMap(void (*aLog)(const char*));
+  ~SecMap();
+
+  // Binary search mRuleSets to find one that brackets |ia|, or nullptr
+  // if none is found.  It's not allowable to do this until PrepareRuleSets
+  // has been called first.
+  RuleSet* FindRuleSet(uintptr_t ia);
+
+  // Add a RuleSet to the collection.  The rule is copied in.  Calling
+  // this makes the map non-searchable.
+  void AddRuleSet(const RuleSet* rs);
+
+  // Add a PfxInstr to the vector of such instrs, and return the index
+  // in the vector.  Calling this makes the map non-searchable.
+  uint32_t AddPfxInstr(PfxInstr pfxi);
+
+  // Returns the entire vector of PfxInstrs.
+  const vector<PfxInstr>* GetPfxInstrs() { return &mPfxInstrs; }
+
+  // Prepare the map for searching.  Also, remove any rules for code
+  // address ranges which don't fall inside [start, +len).  |len| may
+  // not be zero.
+  void PrepareRuleSets(uintptr_t start, size_t len);
+
+  bool IsEmpty();
+
+  size_t Size() { return mRuleSets.size(); }
+
+  size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+  // The min and max addresses of the addresses in the contained
+  // RuleSets.  See comment above for invariants.
+  uintptr_t mSummaryMinAddr;
+  uintptr_t mSummaryMaxAddr;
+
+ private:
+  // False whilst adding entries; true once it is safe to call FindRuleSet.
+  // Transition (false->true) is caused by calling PrepareRuleSets().
+  bool mUsable;
+
+  // A vector of RuleSets, sorted, nonoverlapping (post Prepare()).
+  vector<RuleSet> mRuleSets;
+
+  // A vector of PfxInstrs, which are referred to by the RuleSets.
+  // These are provided as a representation of Dwarf expressions
+  // (DW_CFA_val_expression, DW_CFA_expression, DW_CFA_def_cfa_expression),
+  // are relatively expensive to evaluate, and and are therefore
+  // expected to be used only occasionally.
+  //
+  // The vector holds a bunch of separate PfxInstr programs, each one
+  // starting with a PX_Start and terminated by a PX_End, all
+  // concatenated together.  When a RuleSet can't recover a value
+  // using a self-contained LExpr, it uses a PFXEXPR whose mOffset is
+  // the index in this vector of start of the necessary PfxInstr program.
+  vector<PfxInstr> mPfxInstrs;
+
+  // A logging sink, for debugging.
+  void (*mLog)(const char*);
+};
+
+}  // namespace lul
+
+#endif  // ndef LulMainInt_h
diff --git a/mozglue/baseprofiler/lul/platform-linux-lul.cpp b/mozglue/baseprofiler/lul/platform-linux-lul.cpp
new file mode 100644
index 0000000000..a9ee65858d
--- /dev/null
+++ b/mozglue/baseprofiler/lul/platform-linux-lul.cpp
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "AutoObjectMapper.h"
+#include "BaseProfiler.h"
+#include "BaseProfilerSharedLibraries.h"
+#include "platform.h"
+#include "PlatformMacros.h"
+#include "LulMain.h"
+
+// Contains miscellaneous helpers that are used to connect the Gecko Profiler
+// and LUL.
+
+// Find out, in a platform-dependent way, where the code modules got
+// mapped in the process' virtual address space, and get |aLUL| to
+// load unwind info for them.
+void read_procmaps(lul::LUL* aLUL) {
+  MOZ_ASSERT(aLUL->CountMappings() == 0);
+
+#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
+  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+
+  for (size_t i = 0; i < info.GetSize(); i++) {
+    const SharedLibrary& lib = info.GetEntry(i);
+
+    std::string nativePath = lib.GetDebugPath();
+
+    // We can use the standard POSIX-based mapper.
+    AutoObjectMapperPOSIX mapper(aLUL->mLog);
+
+    // Ask |mapper| to map the object.  Then hand its mapped address
+    // to NotifyAfterMap().
+    void* image = nullptr;
+    size_t size = 0;
+    bool ok = mapper.Map(&image, &size, nativePath);
+    if (ok && image && size > 0) {
+      aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd() - lib.GetStart(),
+                           nativePath.c_str(), image);
+    } else if (!ok && lib.GetDebugName().empty()) {
+      // The object has no name and (as a consequence) the mapper failed to map
+      // it.  This happens on Linux, where GetInfoForSelf() produces such a
+      // mapping for the VDSO.  This is a problem on x86-{linux,android} because
+      // lack of knowledge about the mapped area inhibits LUL's special
+      // __kernel_syscall handling.  Hence notify |aLUL| at least of the
+      // mapping, even though it can't read any unwind information for the area.
+      aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd() - lib.GetStart());
+    }
+
+    // |mapper| goes out of scope at this point and so its destructor
+    // unmaps the object.
+  }
+
+#else
+#  error "Unknown platform"
+#endif
+}
+
+// LUL needs a callback for its logging sink.
+void logging_sink_for_LUL(const char* str) {
+  // These are only printed when Verbose logging is enabled (e.g. with
+  // MOZ_BASE_PROFILER_VERBOSE_LOGGING=1). This is because LUL's logging is much
+  // more verbose than the rest of the profiler's logging, which occurs at the
+  // Info (3) and Debug (4) levels.
+  // FIXME: This causes a build failure in memory/replace/dmd/test/SmokeDMD (!)
+  // and other places, because it doesn't link the implementation in
+  // platform.cpp.
+  // VERBOSE_LOG("[%d] %s", profiler_current_process_id(), str);
+}
diff --git a/mozglue/baseprofiler/lul/platform-linux-lul.h b/mozglue/baseprofiler/lul/platform-linux-lul.h
new file mode 100644
index 0000000000..b54e80edcf
--- /dev/null
+++ b/mozglue/baseprofiler/lul/platform-linux-lul.h
@@ -0,0 +1,21 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PLATFORM_LINUX_LUL_H
+#define MOZ_PLATFORM_LINUX_LUL_H
+
+#include "platform.h"
+
+#include "BaseProfiler.h"
+
+// Find out, in a platform-dependent way, where the code modules got
+// mapped in the process' virtual address space, and get |aLUL| to
+// load unwind info for them.
+void read_procmaps(lul::LUL* aLUL);
+
+// LUL needs a callback for its logging sink.
+void logging_sink_for_LUL(const char* str);
+
+#endif /* ndef MOZ_PLATFORM_LINUX_LUL_H */
diff --git a/mozglue/baseprofiler/moz.build b/mozglue/baseprofiler/moz.build
new file mode 100644
index 0000000000..db2949863f
--- /dev/null
+++ b/mozglue/baseprofiler/moz.build
@@ -0,0 +1,118 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This is pretty much a copy from tools/profiler, cut down to exclude anything
+# that cannot work in mozglue (because they are totally dependent on libxul-
+# specific code).
+# All exported headers now prefixed with "Base" to avoid #include name clashes.
+
+if CONFIG["MOZ_GECKO_PROFILER"]:
+    DEFINES["IMPL_MFBT"] = True
+    EXPORTS += [
+        "public/BaseProfilerSharedLibraries.h",
+        "public/BaseProfilingCategory.h",
+        "public/BaseProfilingStack.h",
+        "public/ProfilingCategoryList.h",
+    ]
+    UNIFIED_SOURCES += [
+        "core/PageInformation.cpp",
+        "core/platform.cpp",
+        "core/ProfileBuffer.cpp",
+        "core/ProfileBufferEntry.cpp",
+        "core/ProfiledThreadData.cpp",
+        "core/ProfileJSONWriter.cpp",
+        "core/ProfilerBacktrace.cpp",
+        "core/ProfilerMarkers.cpp",
+        "core/ProfilingCategory.cpp",
+        "core/ProfilingStack.cpp",
+        "core/RegisteredThread.cpp",
+    ]
+
+    if CONFIG["OS_TARGET"] in ("Android", "Linux", "FreeBSD"):
+        if CONFIG["CPU_ARCH"] in ("arm", "aarch64", "x86", "x86_64", "mips64"):
+            UNIFIED_SOURCES += [
+                "lul/AutoObjectMapper.cpp",
+                "lul/LulCommon.cpp",
+                "lul/LulDwarf.cpp",
+                "lul/LulDwarfSummariser.cpp",
+                "lul/LulElf.cpp",
+                "lul/LulMain.cpp",
+                "lul/platform-linux-lul.cpp",
+            ]
+        # These files cannot be built in unified mode because of name clashes with mozglue headers on Android.
+        SOURCES += [
+            "core/shared-libraries-linux.cc",
+        ]
+        if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] != "FreeBSD":
+            SOURCES += [
+                "core/EHABIStackWalk.cpp",
+            ]
+    elif CONFIG["OS_TARGET"] == "Darwin":
+        UNIFIED_SOURCES += [
+            "core/shared-libraries-macos.cc",
+        ]
+    elif CONFIG["OS_TARGET"] == "WINNT":
+        SOURCES += [
+            "core/shared-libraries-win32.cc",
+        ]
+
+    LOCAL_INCLUDES += [
+        "/mozglue/baseprofiler/core/",
+        "/mozglue/linker",
+    ]
+
+    if CONFIG["OS_TARGET"] == "Android":
+        DEFINES["ANDROID_NDK_MAJOR_VERSION"] = CONFIG["ANDROID_NDK_MAJOR_VERSION"]
+        DEFINES["ANDROID_NDK_MINOR_VERSION"] = CONFIG["ANDROID_NDK_MINOR_VERSION"]
+
+    FINAL_LIBRARY = "mozglue"
+
+# Many of the headers in this directory are usable in non-MOZ_GECKO_PROFILER
+# builds.
+# BaseProfiler.h and BaseProfilerCounts.h only contain no-op macros in that
+# case.
+EXPORTS += [
+    "public/BaseProfiler.h",
+]
+
+EXPORTS.mozilla += [
+    "public/BaseProfileJSONWriter.h",
+    "public/BaseProfilerCounts.h",
+    "public/BaseProfilerDetail.h",
+    "public/BaseProfilerMarkers.h",
+    "public/BaseProfilerMarkersDetail.h",
+    "public/BaseProfilerMarkersPrerequisites.h",
+    "public/BaseProfilerMarkerTypes.h",
+    "public/BlocksRingBuffer.h",
+    "public/leb128iterator.h",
+    "public/ModuloBuffer.h",
+    "public/PowerOfTwo.h",
+    "public/ProfileBufferChunk.h",
+    "public/ProfileBufferChunkManager.h",
+    "public/ProfileBufferChunkManagerSingle.h",
+    "public/ProfileBufferChunkManagerWithLocalLimit.h",
+    "public/ProfileBufferControlledChunkManager.h",
+    "public/ProfileBufferEntryKinds.h",
+    "public/ProfileBufferEntrySerialization.h",
+    "public/ProfileBufferIndex.h",
+    "public/ProfileChunkedBuffer.h",
+]
+
+if CONFIG["MOZ_VTUNE"]:
+    DEFINES["MOZ_VTUNE_INSTRUMENTATION"] = True
+    UNIFIED_SOURCES += [
+        "core/VTuneProfiler.cpp",
+    ]
+
+
+if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+    CXXFLAGS += [
+        "-Wno-error=shadow",
+        "-Wno-ignored-qualifiers",  # due to use of breakpad headers
+    ]
+
+with Files("**"):
+    BUG_COMPONENT = ("Core", "Gecko Profiler")
diff --git a/mozglue/baseprofiler/public/BaseProfileJSONWriter.h b/mozglue/baseprofiler/public/BaseProfileJSONWriter.h
new file mode 100644
index 0000000000..5dcf06f3f3
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfileJSONWriter.h
@@ -0,0 +1,388 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BASEPROFILEJSONWRITER_H
+#define BASEPROFILEJSONWRITER_H
+
+#include "mozilla/HashFunctions.h"
+#include "mozilla/HashTable.h"
+#include "mozilla/JSONWriter.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#include <functional>
+#include <ostream>
+#include <string_view>
+
+namespace mozilla {
+namespace baseprofiler {
+
+class SpliceableJSONWriter;
+
+// On average, profile JSONs are large enough such that we want to avoid
+// reallocating its buffer when expanding. Additionally, the contents of the
+// profile are not accessed until the profile is entirely written. For these
+// reasons we use a chunked writer that keeps an array of chunks, which is
+// concatenated together after writing is finished.
+class ChunkedJSONWriteFunc final : public JSONWriteFunc {
+ public:
+  friend class SpliceableJSONWriter;
+
+  ChunkedJSONWriteFunc() : mChunkPtr{nullptr}, mChunkEnd{nullptr} {
+    AllocChunk(kChunkSize);
+  }
+
+  bool IsEmpty() const {
+    MOZ_ASSERT_IF(!mChunkPtr, !mChunkEnd && mChunkList.length() == 0 &&
+                                  mChunkLengths.length() == 0);
+    return !mChunkPtr;
+  }
+
+  void Write(const Span<const char>& aStr) override {
+    MOZ_ASSERT(mChunkPtr >= mChunkList.back().get() && mChunkPtr <= mChunkEnd);
+    MOZ_ASSERT(mChunkEnd >= mChunkList.back().get() + mChunkLengths.back());
+    MOZ_ASSERT(*mChunkPtr == '\0');
+
+    // Most strings to be written are small, but subprocess profiles (e.g.,
+    // from the content process in e10s) may be huge. If the string is larger
+    // than a chunk, allocate its own chunk.
+    char* newPtr;
+    if (aStr.size() >= kChunkSize) {
+      AllocChunk(aStr.size() + 1);
+      newPtr = mChunkPtr + aStr.size();
+    } else {
+      newPtr = mChunkPtr + aStr.size();
+      if (newPtr >= mChunkEnd) {
+        AllocChunk(kChunkSize);
+        newPtr = mChunkPtr + aStr.size();
+      }
+    }
+
+    memcpy(mChunkPtr, aStr.data(), aStr.size());
+    *newPtr = '\0';
+    mChunkPtr = newPtr;
+    mChunkLengths.back() += aStr.size();
+  }
+  void CopyDataIntoLazilyAllocatedBuffer(
+      const std::function<char*(size_t)>& aAllocator) const {
+    // Request a buffer for the full content plus a null terminator.
+    MOZ_ASSERT(mChunkLengths.length() == mChunkList.length());
+    size_t totalLen = 1;
+    for (size_t i = 0; i < mChunkLengths.length(); i++) {
+      MOZ_ASSERT(strlen(mChunkList[i].get()) == mChunkLengths[i]);
+      totalLen += mChunkLengths[i];
+    }
+    char* ptr = aAllocator(totalLen);
+
+    if (!ptr) {
+      // Failed to allocate memory.
+      return;
+    }
+
+    for (size_t i = 0; i < mChunkList.length(); i++) {
+      size_t len = mChunkLengths[i];
+      memcpy(ptr, mChunkList[i].get(), len);
+      ptr += len;
+    }
+    *ptr = '\0';
+  }
+  UniquePtr<char[]> CopyData() const {
+    UniquePtr<char[]> c;
+    CopyDataIntoLazilyAllocatedBuffer([&](size_t allocationSize) {
+      c = MakeUnique<char[]>(allocationSize);
+      return c.get();
+    });
+    return c;
+  }
+  void Take(ChunkedJSONWriteFunc&& aOther) {
+    for (size_t i = 0; i < aOther.mChunkList.length(); i++) {
+      MOZ_ALWAYS_TRUE(mChunkLengths.append(aOther.mChunkLengths[i]));
+      MOZ_ALWAYS_TRUE(mChunkList.append(std::move(aOther.mChunkList[i])));
+    }
+    mChunkPtr = mChunkList.back().get() + mChunkLengths.back();
+    mChunkEnd = mChunkPtr;
+    aOther.mChunkPtr = nullptr;
+    aOther.mChunkEnd = nullptr;
+    aOther.mChunkList.clear();
+    aOther.mChunkLengths.clear();
+  }
+
+ private:
+  void AllocChunk(size_t aChunkSize) {
+    MOZ_ASSERT(mChunkLengths.length() == mChunkList.length());
+    UniquePtr<char[]> newChunk = MakeUnique<char[]>(aChunkSize);
+    mChunkPtr = newChunk.get();
+    mChunkEnd = mChunkPtr + aChunkSize;
+    *mChunkPtr = '\0';
+    MOZ_ALWAYS_TRUE(mChunkLengths.append(0));
+    MOZ_ALWAYS_TRUE(mChunkList.append(std::move(newChunk)));
+  }
+
+  static const size_t kChunkSize = 4096 * 512;
+
+  // Pointer for writing inside the current chunk.
+  //
+  // The current chunk is always at the back of mChunkList, i.e.,
+  // mChunkList.back() <= mChunkPtr <= mChunkEnd.
+  char* mChunkPtr;
+
+  // Pointer to the end of the current chunk.
+  //
+  // The current chunk is always at the back of mChunkList, i.e.,
+  // mChunkEnd >= mChunkList.back() + mChunkLengths.back().
+  char* mChunkEnd;
+
+  // List of chunks and their lengths.
+  //
+  // For all i, the length of the string in mChunkList[i] is
+  // mChunkLengths[i].
+  Vector<UniquePtr<char[]>> mChunkList;
+  Vector<size_t> mChunkLengths;
+};
+
+struct OStreamJSONWriteFunc final : public JSONWriteFunc {
+  explicit OStreamJSONWriteFunc(std::ostream& aStream) : mStream(aStream) {}
+
+  void Write(const Span<const char>& aStr) override {
+    std::string_view sv(aStr.data(), aStr.size());
+    mStream << sv;
+  }
+
+  std::ostream& mStream;
+};
+
+class UniqueJSONStrings;
+
+class SpliceableJSONWriter : public JSONWriter {
+ public:
+  explicit SpliceableJSONWriter(UniquePtr<JSONWriteFunc> aWriter)
+      : JSONWriter(std::move(aWriter)) {}
+
+  void StartBareList(CollectionStyle aStyle = MultiLineStyle) {
+    StartCollection(scEmptyString, scEmptyString, aStyle);
+  }
+
+  void EndBareList() { EndCollection(scEmptyString); }
+
+  // This function must be used to correctly stream timestamps in profiles.
+  // Null timestamps don't output anything.
+  void TimeProperty(const Span<const char>& aName, const TimeStamp& aTime) {
+    if (!aTime.IsNull()) {
+      DoubleProperty(aName,
+                     (aTime - TimeStamp::ProcessCreation()).ToMilliseconds());
+    }
+  }
+
+  void NullElements(uint32_t aCount) {
+    for (uint32_t i = 0; i < aCount; i++) {
+      NullElement();
+    }
+  }
+
+  void Splice(const Span<const char>& aStr) {
+    Separator();
+    WriteFunc()->Write(aStr);
+    mNeedComma[mDepth] = true;
+  }
+
+  void Splice(const char* aStr, size_t aLen) {
+    Separator();
+    WriteFunc()->Write(Span<const char>(aStr, aLen));
+    mNeedComma[mDepth] = true;
+  }
+
+  // Splice the given JSON directly in, without quoting.
+  void SplicedJSONProperty(const Span<const char>& aMaybePropertyName,
+                           const Span<const char>& aJsonValue) {
+    Scalar(aMaybePropertyName, aJsonValue);
+  }
+
+  void CopyAndSplice(const ChunkedJSONWriteFunc& aFunc) {
+    Separator();
+    for (size_t i = 0; i < aFunc.mChunkList.length(); i++) {
+      WriteFunc()->Write(
+          Span<const char>(aFunc.mChunkList[i].get(), aFunc.mChunkLengths[i]));
+    }
+    mNeedComma[mDepth] = true;
+  }
+
+  // Takes the chunks from aFunc and write them. If move is not possible
+  // (e.g., using OStreamJSONWriteFunc), aFunc's chunks are copied and its
+  // storage cleared.
+  virtual void TakeAndSplice(ChunkedJSONWriteFunc&& aFunc) {
+    Separator();
+    for (size_t i = 0; i < aFunc.mChunkList.length(); i++) {
+      WriteFunc()->Write(
+          Span<const char>(aFunc.mChunkList[i].get(), aFunc.mChunkLengths[i]));
+    }
+    aFunc.mChunkPtr = nullptr;
+    aFunc.mChunkEnd = nullptr;
+    aFunc.mChunkList.clear();
+    aFunc.mChunkLengths.clear();
+    mNeedComma[mDepth] = true;
+  }
+
+  // Set (or reset) the pointer to a UniqueJSONStrings.
+  void SetUniqueStrings(UniqueJSONStrings& aUniqueStrings) {
+    MOZ_RELEASE_ASSERT(!mUniqueStrings);
+    mUniqueStrings = &aUniqueStrings;
+  }
+
+  // Set (or reset) the pointer to a UniqueJSONStrings.
+  void ResetUniqueStrings() {
+    MOZ_RELEASE_ASSERT(mUniqueStrings);
+    mUniqueStrings = nullptr;
+  }
+
+  // Add `aStr` to the unique-strings list (if not already there), and write its
+  // index as a named object property.
+  inline void UniqueStringProperty(const Span<const char>& aName,
+                                   const Span<const char>& aStr);
+
+  // Add `aStr` to the unique-strings list (if not already there), and write its
+  // index as an array element.
+  inline void UniqueStringElement(const Span<const char>& aStr);
+
+ private:
+  UniqueJSONStrings* mUniqueStrings = nullptr;
+};
+
+class SpliceableChunkedJSONWriter final : public SpliceableJSONWriter {
+ public:
+  explicit SpliceableChunkedJSONWriter()
+      : SpliceableJSONWriter(MakeUnique<ChunkedJSONWriteFunc>()) {}
+
+  // Access the ChunkedJSONWriteFunc as reference-to-const, usually to copy data
+  // out.
+  const ChunkedJSONWriteFunc& ChunkedWriteFunc() const {
+    MOZ_ASSERT(!mTaken);
+    // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the
+    // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*.
+    return *static_cast<const ChunkedJSONWriteFunc*>(WriteFunc());
+  }
+
+  // Access the ChunkedJSONWriteFunc as rvalue-reference, usually to take its
+  // data out. This writer shouldn't be used anymore after this.
+  ChunkedJSONWriteFunc&& TakeChunkedWriteFunc() {
+#ifdef DEBUG
+    MOZ_ASSERT(!mTaken);
+    mTaken = true;
+#endif  //
+    // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the
+    // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*.
+    return std::move(*static_cast<ChunkedJSONWriteFunc*>(WriteFunc()));
+  }
+
+  // Adopts the chunks from aFunc without copying.
+  void TakeAndSplice(ChunkedJSONWriteFunc&& aFunc) override {
+    MOZ_ASSERT(!mTaken);
+    Separator();
+    // The WriteFunc was non-fallibly allocated as a ChunkedJSONWriteFunc in the
+    // only constructor above, so it's safe to cast to ChunkedJSONWriteFunc*.
+    static_cast<ChunkedJSONWriteFunc*>(WriteFunc())->Take(std::move(aFunc));
+    mNeedComma[mDepth] = true;
+  }
+
+#ifdef DEBUG
+ private:
+  bool mTaken = false;
+#endif  //
+};
+
+class JSONSchemaWriter {
+  JSONWriter& mWriter;
+  uint32_t mIndex;
+
+ public:
+  explicit JSONSchemaWriter(JSONWriter& aWriter) : mWriter(aWriter), mIndex(0) {
+    aWriter.StartObjectProperty("schema",
+                                SpliceableJSONWriter::SingleLineStyle);
+  }
+
+  void WriteField(const Span<const char>& aName) {
+    mWriter.IntProperty(aName, mIndex++);
+  }
+
+  template <size_t Np1>
+  void WriteField(const char (&aName)[Np1]) {
+    WriteField(Span<const char>(aName, Np1 - 1));
+  }
+
+  ~JSONSchemaWriter() { mWriter.EndObject(); }
+};
+
+// This class helps create an indexed list of unique strings, and inserts the
+// index as a JSON value. The collected list of unique strings can later be
+// inserted as a JSON array.
+// This can be useful for elements/properties with many repeated strings.
+//
+// With only JSONWriter w,
+// `w.WriteElement("a"); w.WriteElement("b"); w.WriteElement("a");`
+// when done inside a JSON array, will generate:
+// `["a", "b", "c"]`
+//
+// With UniqueStrings u,
+// `u.WriteElement(w, "a"); u.WriteElement(w, "b"); u.WriteElement(w, "a");`
+// when done inside a JSON array, will generate:
+// `[0, 1, 0]`
+// and later, `u.SpliceStringTableElements(w)` (inside a JSON array), will
+// output the corresponding indexed list of unique strings:
+// `["a", "b"]`
+class UniqueJSONStrings {
+ public:
+  // Start an empty list of unique strings.
+  MFBT_API explicit UniqueJSONStrings(
+      JSONWriter::CollectionStyle aStyle = JSONWriter::MultiLineStyle);
+
+  // Start with a copy of the strings from another list.
+  MFBT_API explicit UniqueJSONStrings(
+      const UniqueJSONStrings& aOther,
+      JSONWriter::CollectionStyle aStyle = JSONWriter::MultiLineStyle);
+
+  MFBT_API ~UniqueJSONStrings();
+
+  // Add `aStr` to the list (if not already there), and write its index as a
+  // named object property.
+  void WriteProperty(JSONWriter& aWriter, const Span<const char>& aName,
+                     const Span<const char>& aStr) {
+    aWriter.IntProperty(aName, GetOrAddIndex(aStr));
+  }
+
+  // Add `aStr` to the list (if not already there), and write its index as an
+  // array element.
+  void WriteElement(JSONWriter& aWriter, const Span<const char>& aStr) {
+    aWriter.IntElement(GetOrAddIndex(aStr));
+  }
+
+  // Splice all collected unique strings into an array. This should only be done
+  // once, and then this UniqueStrings shouldn't be used anymore.
+  MFBT_API void SpliceStringTableElements(SpliceableJSONWriter& aWriter);
+
+ private:
+  // If `aStr` is already listed, return its index.
+  // Otherwise add it to the list and return the new index.
+  MFBT_API uint32_t GetOrAddIndex(const Span<const char>& aStr);
+
+  SpliceableChunkedJSONWriter mStringTableWriter;
+  HashMap<HashNumber, uint32_t> mStringHashToIndexMap;
+};
+
+void SpliceableJSONWriter::UniqueStringProperty(const Span<const char>& aName,
+                                                const Span<const char>& aStr) {
+  MOZ_RELEASE_ASSERT(mUniqueStrings);
+  mUniqueStrings->WriteProperty(*this, aName, aStr);
+}
+
+// Add `aStr` to the list (if not already there), and write its index as an
+// array element.
+void SpliceableJSONWriter::UniqueStringElement(const Span<const char>& aStr) {
+  MOZ_RELEASE_ASSERT(mUniqueStrings);
+  mUniqueStrings->WriteElement(*this, aStr);
+}
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // BASEPROFILEJSONWRITER_H
diff --git a/mozglue/baseprofiler/public/BaseProfiler.h b/mozglue/baseprofiler/public/BaseProfiler.h
new file mode 100644
index 0000000000..4bf1705041
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfiler.h
@@ -0,0 +1,964 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The Gecko Profiler is an always-on profiler that takes fast and low overhead
+// samples of the program execution using only userspace functionality for
+// portability. The goal of this module is to provide performance data in a
+// generic cross-platform way without requiring custom tools or kernel support.
+//
+// Samples are collected to form a timeline with optional timeline event
+// (markers) used for filtering. The samples include both native stacks and
+// platform-independent "label stack" frames.
+
+#ifndef BaseProfiler_h
+#define BaseProfiler_h
+
+// This file is safe to include unconditionally, and only defines
+// empty macros if MOZ_GECKO_PROFILER is not set.
+
+// These headers are also safe to include unconditionally, with empty macros if
+// MOZ_GECKO_PROFILER is not set.
+#include "mozilla/BaseProfilerCounts.h"
+
+// BaseProfilerMarkers.h is #included in the middle of this header!
+// #include "mozilla/BaseProfilerMarkers.h"
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  include "mozilla/BaseProfilerMarkers.h"
+#  include "mozilla/UniquePtr.h"
+
+// This file can be #included unconditionally. However, everything within this
+// file must be guarded by a #ifdef MOZ_GECKO_PROFILER, *except* for the
+// following macros and functions, which encapsulate the most common operations
+// and thus avoid the need for many #ifdefs.
+
+#  define AUTO_BASE_PROFILER_INIT
+
+#  define BASE_PROFILER_REGISTER_THREAD(name)
+#  define BASE_PROFILER_UNREGISTER_THREAD()
+#  define AUTO_BASE_PROFILER_REGISTER_THREAD(name)
+
+#  define AUTO_BASE_PROFILER_THREAD_SLEEP
+#  define AUTO_BASE_PROFILER_THREAD_WAKE
+
+#  define AUTO_BASE_PROFILER_LABEL(label, categoryPair)
+#  define AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(categoryPair)
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr)
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING(label, categoryPair, str)
+#  define AUTO_BASE_PROFILER_LABEL_FAST(label, categoryPair, ctx)
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString, \
+                                                categoryPair, ctx, flags)
+
+#  define AUTO_PROFILER_STATS(name)
+
+// Function stubs for when MOZ_GECKO_PROFILER is not defined.
+
+namespace mozilla {
+// This won't be used, it's just there to allow the empty definition of
+// `profiler_capture_backtrace`.
+class ProfileChunkedBuffer {};
+
+namespace baseprofiler {
+// This won't be used, it's just there to allow the empty definition of
+// `profiler_get_backtrace`.
+struct ProfilerBacktrace {};
+using UniqueProfilerBacktrace = UniquePtr<ProfilerBacktrace>;
+
+// Get/Capture-backtrace functions can return nullptr or false, the result
+// should be fed to another empty macro or stub anyway.
+
+static inline UniqueProfilerBacktrace profiler_get_backtrace() {
+  return nullptr;
+}
+
+static inline bool profiler_capture_backtrace_into(
+    ProfileChunkedBuffer& aChunkedBuffer) {
+  return false;
+}
+
+static inline UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
+  return nullptr;
+}
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#else  // !MOZ_GECKO_PROFILER
+
+#  include "BaseProfilingStack.h"
+
+#  include "mozilla/Assertions.h"
+#  include "mozilla/Atomics.h"
+#  include "mozilla/Attributes.h"
+#  include "mozilla/Maybe.h"
+#  include "mozilla/PowerOfTwo.h"
+#  include "mozilla/Sprintf.h"
+#  include "mozilla/ThreadLocal.h"
+#  include "mozilla/TimeStamp.h"
+#  include "mozilla/UniquePtr.h"
+
+#  include <functional>
+#  include <stdint.h>
+#  include <string>
+
+namespace mozilla {
+
+class MallocAllocPolicy;
+class ProfileChunkedBuffer;
+template <class T, size_t MinInlineCapacity, class AllocPolicy>
+class Vector;
+
+namespace baseprofiler {
+
+class ProfilerBacktrace;
+class SpliceableJSONWriter;
+
+// Macros used by the AUTO_PROFILER_* macros below.
+#  define BASE_PROFILER_RAII_PASTE(id, line) id##line
+#  define BASE_PROFILER_RAII_EXPAND(id, line) BASE_PROFILER_RAII_PASTE(id, line)
+#  define BASE_PROFILER_RAII BASE_PROFILER_RAII_EXPAND(raiiObject, __LINE__)
+
+//---------------------------------------------------------------------------
+// Profiler features
+//---------------------------------------------------------------------------
+
+// Higher-order macro containing all the feature info in one place. Define
+// |MACRO| appropriately to extract the relevant parts. Note that the number
+// values are used internally only and so can be changed without consequence.
+// Any changes to this list should also be applied to the feature list in
+// toolkit/components/extensions/schemas/geckoProfiler.json.
+#  define BASE_PROFILER_FOR_EACH_FEATURE(MACRO)                                \
+    MACRO(0, "java", Java, "Profile Java code, Android only")                  \
+                                                                               \
+    MACRO(1, "js", JS,                                                         \
+          "Get the JS engine to expose the JS stack to the profiler")          \
+                                                                               \
+    /* The DevTools profiler doesn't want the native addresses. */             \
+    MACRO(2, "leaf", Leaf, "Include the C++ leaf node if not stackwalking")    \
+                                                                               \
+    MACRO(3, "mainthreadio", MainThreadIO, "Add main thread file I/O")         \
+                                                                               \
+    MACRO(4, "fileio", FileIO,                                                 \
+          "Add file I/O from all profiled threads, implies mainthreadio")      \
+                                                                               \
+    MACRO(5, "fileioall", FileIOAll,                                           \
+          "Add file I/O from all threads, implies fileio")                     \
+                                                                               \
+    MACRO(6, "noiostacks", NoIOStacks,                                         \
+          "File I/O markers do not capture stacks, to reduce overhead")        \
+                                                                               \
+    MACRO(7, "screenshots", Screenshots,                                       \
+          "Take a snapshot of the window on every composition")                \
+                                                                               \
+    MACRO(8, "seqstyle", SequentialStyle,                                      \
+          "Disable parallel traversal in styling")                             \
+                                                                               \
+    MACRO(9, "stackwalk", StackWalk,                                           \
+          "Walk the C++ stack, not available on all platforms")                \
+                                                                               \
+    MACRO(10, "tasktracer", TaskTracer,                                        \
+          "Start profiling with feature TaskTracer")                           \
+                                                                               \
+    MACRO(11, "threads", Threads, "Profile the registered secondary threads")  \
+                                                                               \
+    MACRO(12, "jstracer", JSTracer, "Enable tracing of the JavaScript engine") \
+                                                                               \
+    MACRO(13, "jsallocations", JSAllocations,                                  \
+          "Have the JavaScript engine track allocations")                      \
+                                                                               \
+    MACRO(14, "nostacksampling", NoStackSampling,                              \
+          "Disable all stack sampling: Cancels \"js\", \"leaf\", "             \
+          "\"stackwalk\" and labels")                                          \
+                                                                               \
+    MACRO(15, "preferencereads", PreferenceReads,                              \
+          "Track when preferences are read")                                   \
+                                                                               \
+    MACRO(16, "nativeallocations", NativeAllocations,                          \
+          "Collect the stacks from a smaller subset of all native "            \
+          "allocations, biasing towards collecting larger allocations")        \
+                                                                               \
+    MACRO(17, "ipcmessages", IPCMessages,                                      \
+          "Have the IPC layer track cross-process messages")                   \
+                                                                               \
+    MACRO(18, "audiocallbacktracing", AudioCallbackTracing,                    \
+          "Audio callback tracing")                                            \
+                                                                               \
+    MACRO(19, "cpu", CPUUtilization, "CPU utilization")
+
+struct ProfilerFeature {
+#  define DECLARE(n_, str_, Name_, desc_)                     \
+    static constexpr uint32_t Name_ = (1u << n_);             \
+    static constexpr bool Has##Name_(uint32_t aFeatures) {    \
+      return aFeatures & Name_;                               \
+    }                                                         \
+    static constexpr void Set##Name_(uint32_t& aFeatures) {   \
+      aFeatures |= Name_;                                     \
+    }                                                         \
+    static constexpr void Clear##Name_(uint32_t& aFeatures) { \
+      aFeatures &= ~Name_;                                    \
+    }
+
+  // Define a bitfield constant, a getter, and two setters for each feature.
+  BASE_PROFILER_FOR_EACH_FEATURE(DECLARE)
+
+#  undef DECLARE
+};
+
+namespace detail {
+
+// RacyFeatures is only defined in this header file so that its methods can
+// be inlined into profiler_is_active(). Please do not use anything from the
+// detail namespace outside the profiler.
+
+// Within the profiler's code, the preferred way to check profiler activeness
+// and features is via ActivePS(). However, that requires locking gPSMutex.
+// There are some hot operations where absolute precision isn't required, so we
+// duplicate the activeness/feature state in a lock-free manner in this class.
+class RacyFeatures {
+ public:
+  MFBT_API static void SetActive(uint32_t aFeatures);
+
+  MFBT_API static void SetInactive();
+
+  MFBT_API static void SetPaused();
+
+  MFBT_API static void SetUnpaused();
+
+  MFBT_API static void SetSamplingPaused();
+
+  MFBT_API static void SetSamplingUnpaused();
+
+  MFBT_API static bool IsActive();
+
+  MFBT_API static bool IsActiveWithFeature(uint32_t aFeature);
+
+  // True if profiler is active, and not fully paused.
+  // Note that periodic sampling *could* be paused!
+  MFBT_API static bool IsActiveAndUnpaused();
+
+  // True if profiler is active, and sampling is not paused (though generic
+  // `SetPaused()` or specific `SetSamplingPaused()`).
+  MFBT_API static bool IsActiveAndSamplingUnpaused();
+
+ private:
+  static constexpr uint32_t Active = 1u << 31;
+  static constexpr uint32_t Paused = 1u << 30;
+  static constexpr uint32_t SamplingPaused = 1u << 29;
+
+// Ensure Active/Paused don't overlap with any of the feature bits.
+#  define NO_OVERLAP(n_, str_, Name_, desc_)                \
+    static_assert(ProfilerFeature::Name_ != SamplingPaused, \
+                  "bad feature value");
+
+  BASE_PROFILER_FOR_EACH_FEATURE(NO_OVERLAP);
+
+#  undef NO_OVERLAP
+
+  // We combine the active bit with the feature bits so they can be read or
+  // written in a single atomic operation.
+  // TODO: Could this be MFBT_DATA for better inlining optimization?
+  static Atomic<uint32_t, MemoryOrdering::Relaxed> sActiveAndFeatures;
+};
+
+MFBT_API bool IsThreadBeingProfiled();
+
+}  // namespace detail
+
+//---------------------------------------------------------------------------
+// Start and stop the profiler
+//---------------------------------------------------------------------------
+
+static constexpr PowerOfTwo32 BASE_PROFILER_DEFAULT_ENTRIES =
+#  if !defined(GP_PLAT_arm_android)
+    MakePowerOfTwo32<1024 * 1024>();  // 1M entries = 8MB
+#  else
+    MakePowerOfTwo32<128 * 1024>();  // 128k entries = 1MB
+#  endif
+
+// Startup profiling usually need to capture more data, especially on slow
+// systems.
+static constexpr PowerOfTwo32 BASE_PROFILER_DEFAULT_STARTUP_ENTRIES =
+#  if !defined(GP_PLAT_arm_android)
+    MakePowerOfTwo32<4 * 1024 * 1024>();  // 4M entries = 32MB
+#  else
+    MakePowerOfTwo32<256 * 1024>();  // 256k entries = 2MB
+#  endif
+
+#  define BASE_PROFILER_DEFAULT_DURATION 20
+#  define BASE_PROFILER_DEFAULT_INTERVAL 1
+
+// Initialize the profiler. If MOZ_PROFILER_STARTUP is set the profiler will
+// also be started. This call must happen before any other profiler calls
+// (except profiler_start(), which will call profiler_init() if it hasn't
+// already run).
+MFBT_API void profiler_init(void* stackTop);
+
+#  define AUTO_BASE_PROFILER_INIT \
+    ::mozilla::baseprofiler::AutoProfilerInit BASE_PROFILER_RAII
+
+// Clean up the profiler module, stopping it if required. This function may
+// also save a shutdown profile if requested. No profiler calls should happen
+// after this point and all profiling stack labels should have been popped.
+MFBT_API void profiler_shutdown();
+
+// Start the profiler -- initializing it first if necessary -- with the
+// selected options. Stops and restarts the profiler if it is already active.
+// After starting the profiler is "active". The samples will be recorded in a
+// circular buffer.
+//   "aCapacity" is the maximum number of 8-byte entries in the profiler's
+//               circular buffer.
+//   "aInterval" the sampling interval, measured in millseconds.
+//   "aFeatures" is the feature set. Features unsupported by this
+//               platform/configuration are ignored.
+//   "aFilters" is the list of thread filters. Threads that do not match any
+//              of the filters are not profiled. A filter matches a thread if
+//              (a) the thread name contains the filter as a case-insensitive
+//                  substring, or
+//              (b) the filter is of the form "pid:<n>" where n is the process
+//                  id of the process that the thread is running in.
+//   "aDuration" is the duration of entries in the profiler's circular buffer.
+MFBT_API void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
+                             uint32_t aFeatures, const char** aFilters,
+                             uint32_t aFilterCount,
+                             const Maybe<double>& aDuration = Nothing());
+
+// Stop the profiler and discard the profile without saving it. A no-op if the
+// profiler is inactive. After stopping the profiler is "inactive".
+MFBT_API void profiler_stop();
+
+// If the profiler is inactive, start it. If it's already active, restart it if
+// the requested settings differ from the current settings. Both the check and
+// the state change are performed while the profiler state is locked.
+// The only difference to profiler_start is that the current buffer contents are
+// not discarded if the profiler is already running with the requested settings.
+MFBT_API void profiler_ensure_started(
+    PowerOfTwo32 aCapacity, double aInterval, uint32_t aFeatures,
+    const char** aFilters, uint32_t aFilterCount,
+    const Maybe<double>& aDuration = Nothing());
+
+//---------------------------------------------------------------------------
+// Control the profiler
+//---------------------------------------------------------------------------
+
+// Register/unregister threads with the profiler. Both functions operate the
+// same whether the profiler is active or inactive.
+#  define BASE_PROFILER_REGISTER_THREAD(name)                             \
+    do {                                                                  \
+      char stackTop;                                                      \
+      ::mozilla::baseprofiler::profiler_register_thread(name, &stackTop); \
+    } while (0)
+#  define BASE_PROFILER_UNREGISTER_THREAD() \
+    ::mozilla::baseprofiler::profiler_unregister_thread()
+MFBT_API ProfilingStack* profiler_register_thread(const char* name,
+                                                  void* guessStackTop);
+MFBT_API void profiler_unregister_thread();
+
+// Registers a DOM Window (the JS global `window`) with the profiler. Each
+// Window _roughly_ corresponds to a single document loaded within a
+// BrowsingContext. The unique IDs for both the Window and BrowsingContext are
+// recorded to allow correlating different Windows loaded within the same tab or
+// frame element.
+//
+// We register pages for each navigations but we do not register
+// history.pushState or history.replaceState since they correspond to the same
+// Inner Window ID. When a Browsing context is first loaded, the first url
+// loaded in it will be about:blank. Because of that, this call keeps the first
+// non-about:blank registration of window and discards the previous one.
+//
+//   "aBrowsingContextID"     is the ID of the browsing context that document
+//                            belongs to. That's used to determine the tab of
+//                            that page.
+//   "aInnerWindowID"         is the ID of the `window` global object of that
+//                            document.
+//   "aUrl"                   is the URL of the page.
+//   "aEmbedderInnerWindowID" is the inner window id of embedder. It's used to
+//                            determine sub documents of a page.
+MFBT_API void profiler_register_page(uint64_t aBrowsingContextID,
+                                     uint64_t aInnerWindowID,
+                                     const std::string& aUrl,
+                                     uint64_t aEmbedderInnerWindowID);
+// Unregister page with the profiler.
+//
+// Take a Inner Window ID and unregister the page entry that has the same ID.
+MFBT_API void profiler_unregister_page(uint64_t aRegisteredInnerWindowID);
+
+// Remove all registered and unregistered pages in the profiler.
+void profiler_clear_all_pages();
+
+class BaseProfilerCount;
+MFBT_API void profiler_add_sampled_counter(BaseProfilerCount* aCounter);
+MFBT_API void profiler_remove_sampled_counter(BaseProfilerCount* aCounter);
+
+// Register and unregister a thread within a scope.
+#  define AUTO_BASE_PROFILER_REGISTER_THREAD(name) \
+    ::mozilla::baseprofiler::AutoProfilerRegisterThread BASE_PROFILER_RAII(name)
+
+// Pause and resume the profiler. No-ops if the profiler is inactive. While
+// paused the profile will not take any samples and will not record any data
+// into its buffers. The profiler remains fully initialized in this state.
+// This feature will keep JavaScript profiling enabled, thus allowing toggling
+// the profiler without invalidating the JIT.
+MFBT_API void profiler_pause();
+MFBT_API void profiler_resume();
+
+// Only pause and resume the periodic sampling loop, including stack sampling,
+// counters, and profiling overheads.
+MFBT_API void profiler_pause_sampling();
+MFBT_API void profiler_resume_sampling();
+
+// These functions tell the profiler that a thread went to sleep so that we can
+// avoid sampling it while it's sleeping. Calling profiler_thread_sleep()
+// twice without an intervening profiler_thread_wake() is an error. All three
+// functions operate the same whether the profiler is active or inactive.
+MFBT_API void profiler_thread_sleep();
+MFBT_API void profiler_thread_wake();
+
+// Mark a thread as asleep/awake within a scope.
+#  define AUTO_BASE_PROFILER_THREAD_SLEEP \
+    ::mozilla::baseprofiler::AutoProfilerThreadSleep BASE_PROFILER_RAII
+#  define AUTO_BASE_PROFILER_THREAD_WAKE \
+    ::mozilla::baseprofiler::AutoProfilerThreadWake BASE_PROFILER_RAII
+
+//---------------------------------------------------------------------------
+// Get information from the profiler
+//---------------------------------------------------------------------------
+
+// Is the profiler active? Note: the return value of this function can become
+// immediately out-of-date. E.g. the profile might be active but then
+// profiler_stop() is called immediately afterward. One common and reasonable
+// pattern of usage is the following:
+//
+//   if (profiler_is_active()) {
+//     ExpensiveData expensiveData = CreateExpensiveData();
+//     PROFILER_OPERATION(expensiveData);
+//   }
+//
+// where PROFILER_OPERATION is a no-op if the profiler is inactive. In this
+// case the profiler_is_active() check is just an optimization -- it prevents
+// us calling CreateExpensiveData() unnecessarily in most cases, but the
+// expensive data will end up being created but not used if another thread
+// stops the profiler between the CreateExpensiveData() and PROFILER_OPERATION
+// calls.
+inline bool profiler_is_active() {
+  return baseprofiler::detail::RacyFeatures::IsActive();
+}
+
+// Same as profiler_is_active(), but with the same extra checks that determine
+// if the profiler would currently store markers. So this should be used before
+// doing some potentially-expensive work that's used in a marker. E.g.:
+//
+//   if (profiler_can_accept_markers()) {
+//     BASE_PROFILER_MARKER(name, OTHER, SomeMarkerType, expensivePayload);
+//   }
+inline bool profiler_can_accept_markers() {
+  return baseprofiler::detail::RacyFeatures::IsActiveAndUnpaused();
+}
+
+// Is the profiler active, and is the current thread being profiled?
+// (Same caveats and recommented usage as profiler_is_active().)
+inline bool profiler_thread_is_being_profiled() {
+  return profiler_is_active() && baseprofiler::detail::IsThreadBeingProfiled();
+}
+
+// Is the profiler active and paused? Returns false if the profiler is inactive.
+MFBT_API bool profiler_is_paused();
+
+// Is the profiler active and sampling is paused? Returns false if the profiler
+// is inactive.
+MFBT_API bool profiler_is_sampling_paused();
+
+// Is the current thread sleeping?
+MFBT_API bool profiler_thread_is_sleeping();
+
+// Get all the features supported by the profiler that are accepted by
+// profiler_start(). The result is the same whether the profiler is active or
+// not.
+MFBT_API uint32_t profiler_get_available_features();
+
+// Check if a profiler feature (specified via the ProfilerFeature type) is
+// active. Returns false if the profiler is inactive. Note: the return value
+// can become immediately out-of-date, much like the return value of
+// profiler_is_active().
+MFBT_API bool profiler_feature_active(uint32_t aFeature);
+
+// Get the params used to start the profiler. Returns 0 and an empty vector
+// (via outparams) if the profile is inactive. It's possible that the features
+// returned may be slightly different to those requested due to required
+// adjustments.
+MFBT_API void profiler_get_start_params(
+    int* aEntrySize, Maybe<double>* aDuration, double* aInterval,
+    uint32_t* aFeatures, Vector<const char*, 0, MallocAllocPolicy>* aFilters);
+
+// The number of milliseconds since the process started. Operates the same
+// whether the profiler is active or inactive.
+MFBT_API double profiler_time();
+
+// Get the current process's ID.
+MFBT_API int profiler_current_process_id();
+
+// Get the current thread's ID.
+MFBT_API int profiler_current_thread_id();
+
+// Statically initialized to 0, then set once from profiler_init(), which should
+// be called from the main thread before any other use of the profiler.
+extern MFBT_DATA int scProfilerMainThreadId;
+
+inline int profiler_main_thread_id() { return scProfilerMainThreadId; }
+
+inline bool profiler_is_main_thread() {
+  return profiler_current_thread_id() == profiler_main_thread_id();
+}
+
+// An object of this class is passed to profiler_suspend_and_sample_thread().
+// For each stack frame, one of the Collect methods will be called.
+class ProfilerStackCollector {
+ public:
+  // Some collectors need to worry about possibly overwriting previous
+  // generations of data. If that's not an issue, this can return Nothing,
+  // which is the default behaviour.
+  virtual Maybe<uint64_t> SamplePositionInBuffer() { return Nothing(); }
+  virtual Maybe<uint64_t> BufferRangeStart() { return Nothing(); }
+
+  // This method will be called once if the thread being suspended is the main
+  // thread. Default behaviour is to do nothing.
+  virtual void SetIsMainThread() {}
+
+  // WARNING: The target thread is suspended when the Collect methods are
+  // called. Do not try to allocate or acquire any locks, or you could
+  // deadlock. The target thread will have resumed by the time this function
+  // returns.
+
+  virtual void CollectNativeLeafAddr(void* aAddr) = 0;
+
+  virtual void CollectProfilingStackFrame(
+      const ProfilingStackFrame& aFrame) = 0;
+};
+
+// This method suspends the thread identified by aThreadId, samples its
+// profiling stack, JS stack, and (optionally) native stack, passing the
+// collected frames into aCollector. aFeatures dictates which compiler features
+// are used. |Leaf| is the only relevant one.
+MFBT_API void profiler_suspend_and_sample_thread(
+    int aThreadId, uint32_t aFeatures, ProfilerStackCollector& aCollector,
+    bool aSampleNative = true);
+
+struct ProfilerBacktraceDestructor {
+  MFBT_API void operator()(ProfilerBacktrace*);
+};
+
+using UniqueProfilerBacktrace =
+    UniquePtr<ProfilerBacktrace, ProfilerBacktraceDestructor>;
+
+// Immediately capture the current thread's call stack, store it in the provided
+// buffer (usually to avoid allocations if you can construct the buffer on the
+// stack). Returns false if unsuccessful, or if the profiler is inactive.
+MFBT_API bool profiler_capture_backtrace_into(
+    ProfileChunkedBuffer& aChunkedBuffer);
+
+// Immediately capture the current thread's call stack, and return it in a
+// ProfileChunkedBuffer (usually for later use in MarkerStack::TakeBacktrace()).
+// May be null if unsuccessful, or if the profiler is inactive.
+MFBT_API UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace();
+
+// Immediately capture the current thread's call stack, and return it in a
+// ProfilerBacktrace (usually for later use in marker function that take a
+// ProfilerBacktrace). May be null if unsuccessful, or if the profiler is
+// inactive.
+MFBT_API UniqueProfilerBacktrace profiler_get_backtrace();
+
+struct ProfilerStats {
+  unsigned n = 0;
+  double sum = 0;
+  double min = std::numeric_limits<double>::max();
+  double max = 0;
+  void Count(double v) {
+    ++n;
+    sum += v;
+    if (v < min) {
+      min = v;
+    }
+    if (v > max) {
+      max = v;
+    }
+  }
+};
+
+struct ProfilerBufferInfo {
+  // Index of the oldest entry.
+  uint64_t mRangeStart;
+  // Index of the newest entry.
+  uint64_t mRangeEnd;
+  // Buffer capacity in number of 8-byte entries.
+  uint32_t mEntryCount;
+  // Sampling stats: Interval (us) between successive samplings.
+  ProfilerStats mIntervalsUs;
+  // Sampling stats: Total duration (us) of each sampling. (Split detail below.)
+  ProfilerStats mOverheadsUs;
+  // Sampling stats: Time (us) to acquire the lock before sampling.
+  ProfilerStats mLockingsUs;
+  // Sampling stats: Time (us) to discard expired data.
+  ProfilerStats mCleaningsUs;
+  // Sampling stats: Time (us) to collect counter data.
+  ProfilerStats mCountersUs;
+  // Sampling stats: Time (us) to sample thread stacks.
+  ProfilerStats mThreadsUs;
+};
+
+// Get information about the current buffer status.
+// Returns Nothing() if the profiler is inactive.
+//
+// This information may be useful to a user-interface displaying the current
+// status of the profiler, allowing the user to get a sense for how fast the
+// buffer is being written to, and how much data is visible.
+MFBT_API Maybe<ProfilerBufferInfo> profiler_get_buffer_info();
+
+// Uncomment the following line to display profiler runtime statistics at
+// shutdown.
+// #  define PROFILER_RUNTIME_STATS
+
+#  ifdef PROFILER_RUNTIME_STATS
+// This class gathers durations and displays some basic stats when destroyed.
+// It is intended to be used as a static variable (see `AUTO_PROFILER_STATS`
+// below), to display stats at the end of the program.
+class StaticBaseProfilerStats {
+ public:
+  explicit StaticBaseProfilerStats(const char* aName) : mName(aName) {}
+
+  ~StaticBaseProfilerStats() {
+    // Using unsigned long long for computations and printfs.
+    using ULL = unsigned long long;
+    ULL n = static_cast<ULL>(mNumberDurations);
+    if (n != 0) {
+      ULL sumNs = static_cast<ULL>(mSumDurationsNs);
+      printf(
+          "[%d] Profiler stats `%s`: %llu ns / %llu = %llu ns, max %llu ns\n",
+          profiler_current_process_id(), mName, sumNs, n, sumNs / n,
+          static_cast<ULL>(mLongestDurationNs));
+    } else {
+      printf("[%d] Profiler stats `%s`: (nothing)\n",
+             profiler_current_process_id(), mName);
+    }
+  }
+
+  void AddDurationFrom(TimeStamp aStart) {
+    DurationNs duration = static_cast<DurationNs>(
+        (TimeStamp::NowUnfuzzed() - aStart).ToMicroseconds() * 1000 + 0.5);
+    mSumDurationsNs += duration;
+    ++mNumberDurations;
+    // Update mLongestDurationNs if this one is longer.
+    for (;;) {
+      DurationNs longest = mLongestDurationNs;
+      if (MOZ_LIKELY(longest >= duration)) {
+        // This duration is not the longest, nothing to do.
+        break;
+      }
+      if (MOZ_LIKELY(mLongestDurationNs.compareExchange(longest, duration))) {
+        // Successfully updated `mLongestDurationNs` with the new value.
+        break;
+      }
+      // Otherwise someone else just updated `mLongestDurationNs`, we need to
+      // try again by looping.
+    }
+  }
+
+ private:
+  using DurationNs = uint64_t;
+  using Count = uint32_t;
+
+  Atomic<DurationNs> mSumDurationsNs{0};
+  Atomic<DurationNs> mLongestDurationNs{0};
+  Atomic<Count> mNumberDurations{0};
+  const char* mName;
+};
+
+// RAII object that measure its scoped lifetime duration and reports it to a
+// `StaticBaseProfilerStats`.
+class MOZ_RAII AutoProfilerStats {
+ public:
+  explicit AutoProfilerStats(StaticBaseProfilerStats& aStats)
+      : mStats(aStats), mStart(TimeStamp::NowUnfuzzed()) {}
+
+  ~AutoProfilerStats() { mStats.AddDurationFrom(mStart); }
+
+ private:
+  StaticBaseProfilerStats& mStats;
+  TimeStamp mStart;
+};
+
+// Macro that should be used to collect basic statistics from measurements of
+// block durations, from where this macro is, until the end of its enclosing
+// scope. The name is used in the static variable name and when displaying stats
+// at the end of the program; Another location could use the same name but their
+// stats will not be combined, so use different name if these locations should
+// be distinguished.
+#    define AUTO_PROFILER_STATS(name)                                      \
+      static ::mozilla::baseprofiler::StaticBaseProfilerStats sStat##name( \
+          #name);                                                          \
+      ::mozilla::baseprofiler::AutoProfilerStats autoStat##name(sStat##name);
+
+#  else  // PROFILER_RUNTIME_STATS
+
+#    define AUTO_PROFILER_STATS(name)
+
+#  endif  // PROFILER_RUNTIME_STATS else
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+// BaseProfilerMarkers.h requires some stuff from this header.
+// TODO: Move common stuff to shared header, and move this #include to the top.
+#  include "mozilla/BaseProfilerMarkers.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+//---------------------------------------------------------------------------
+// Put profiling data into the profiler (labels and markers)
+//---------------------------------------------------------------------------
+
+// Insert an RAII object in this scope to enter a label stack frame. Any
+// samples collected in this scope will contain this label in their stack.
+// The label argument must be a static C string. It is usually of the
+// form "ClassName::FunctionName". (Ideally we'd use the compiler to provide
+// that for us, but __func__ gives us the function name without the class
+// name.) If the label applies to only part of a function, you can qualify it
+// like this: "ClassName::FunctionName:PartName".
+//
+// Use AUTO_BASE_PROFILER_LABEL_DYNAMIC_* if you want to add additional /
+// dynamic information to the label stack frame.
+#  define AUTO_BASE_PROFILER_LABEL(label, categoryPair)            \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \
+        label, nullptr,                                            \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but with only one argument: the category
+// pair. The label string is taken from the category pair. This is convenient
+// for labels like
+// AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(GRAPHICS_LayerBuilding) which would
+// otherwise just repeat the string.
+#  define AUTO_BASE_PROFILER_LABEL_CATEGORY_PAIR(categoryPair)         \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII(     \
+        "", nullptr,                                                   \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair,  \
+        uint32_t(::mozilla::baseprofiler::ProfilingStackFrame::Flags:: \
+                     LABEL_DETERMINED_BY_CATEGORY_PAIR))
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but with an additional string. The
+// inserted RAII object stores the cStr pointer in a field; it does not copy the
+// string.
+//
+// WARNING: This means that the string you pass to this macro needs to live at
+// least until the end of the current scope. Be careful using this macro with
+// ns[C]String; the other AUTO_BASE_PROFILER_LABEL_DYNAMIC_* macros below are
+// preferred because they avoid this problem.
+//
+// If the profiler samples the current thread and walks the label stack while
+// this RAII object is on the stack, it will copy the supplied string into the
+// profile buffer. So there's one string copy operation, and it happens at
+// sample time.
+//
+// Compare this to the plain AUTO_BASE_PROFILER_LABEL macro, which only accepts
+// literal strings: When the label stack frames generated by
+// AUTO_BASE_PROFILER_LABEL are sampled, no string copy needs to be made because
+// the profile buffer can just store the raw pointers to the literal strings.
+// Consequently, AUTO_BASE_PROFILER_LABEL frames take up considerably less space
+// in the profile buffer than AUTO_BASE_PROFILER_LABEL_DYNAMIC_* frames.
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR(label, categoryPair, cStr) \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII(         \
+        label, cStr,                                                       \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR, but takes an std::string.
+//
+// Note: The use of the Maybe<>s ensures the scopes for the dynamic string and
+// the AutoProfilerLabel are appropriate, while also not incurring the runtime
+// cost of the string assignment unless the profiler is active. Therefore,
+// unlike AUTO_BASE_PROFILER_LABEL and AUTO_BASE_PROFILER_LABEL_DYNAMIC_CSTR,
+// this macro doesn't push/pop a label when the profiler is inactive.
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING(label, categoryPair, str) \
+    Maybe<std::string> autoStr;                                             \
+    Maybe<::mozilla::baseprofiler::AutoProfilerLabel> raiiObjectString;     \
+    if (::mozilla::baseprofiler::profiler_is_active()) {                    \
+      autoStr.emplace(str);                                                 \
+      raiiObjectString.emplace(                                             \
+          label, autoStr->c_str(),                                          \
+          ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair);    \
+    }
+
+// Similar to AUTO_BASE_PROFILER_LABEL, but accepting a JSContext* parameter,
+// and a no-op if the profiler is disabled. Used to annotate functions for which
+// overhead in the range of nanoseconds is noticeable. It avoids overhead from
+// the TLS lookup because it can get the ProfilingStack from the JS context, and
+// avoids almost all overhead in the case where the profiler is disabled.
+#  define AUTO_BASE_PROFILER_LABEL_FAST(label, categoryPair, ctx)  \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII( \
+        ctx, label, nullptr,                                       \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair)
+
+// Similar to AUTO_BASE_PROFILER_LABEL_FAST, but also takes an extra string and
+// an additional set of flags. The flags parameter should carry values from the
+// ProfilingStackFrame::Flags enum.
+#  define AUTO_BASE_PROFILER_LABEL_DYNAMIC_FAST(label, dynamicString,     \
+                                                categoryPair, ctx, flags) \
+    ::mozilla::baseprofiler::AutoProfilerLabel BASE_PROFILER_RAII(        \
+        ctx, label, dynamicString,                                        \
+        ::mozilla::baseprofiler::ProfilingCategoryPair::categoryPair, flags)
+
+MFBT_API void profiler_add_js_marker(const char* aMarkerName,
+                                     const char* aMarkerText);
+
+// Returns true if any of the profiler mutexes are currently locked *on the
+// current thread*. This may be used by re-entrant code that may call profiler
+// functions while the same of a different profiler mutex is locked, which could
+// deadlock.
+bool profiler_is_locked_on_current_thread();
+
+//---------------------------------------------------------------------------
+// Output profiles
+//---------------------------------------------------------------------------
+
+// Set a user-friendly process name, used in JSON stream.
+MFBT_API void profiler_set_process_name(const std::string& aProcessName,
+                                        const std::string* aETLDplus1);
+
+// Get the profile encoded as a JSON string. A no-op (returning nullptr) if the
+// profiler is inactive.
+// If aIsShuttingDown is true, the current time is included as the process
+// shutdown time in the JSON's "meta" object.
+MFBT_API UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0,
+                                                bool aIsShuttingDown = false,
+                                                bool aOnlyThreads = false);
+
+// Write the profile for this process (excluding subprocesses) into aWriter.
+// Returns false if the profiler is inactive.
+MFBT_API bool profiler_stream_json_for_this_process(
+    SpliceableJSONWriter& aWriter, double aSinceTime = 0,
+    bool aIsShuttingDown = false, bool aOnlyThreads = false);
+
+// Get the profile and write it into a file. A no-op if the profile is
+// inactive.
+MFBT_API void profiler_save_profile_to_file(const char* aFilename);
+
+//---------------------------------------------------------------------------
+// RAII classes
+//---------------------------------------------------------------------------
+
+class MOZ_RAII AutoProfilerInit {
+ public:
+  explicit AutoProfilerInit() { profiler_init(this); }
+
+  ~AutoProfilerInit() { profiler_shutdown(); }
+
+ private:
+};
+
+// Convenience class to register and unregister a thread with the profiler.
+// Needs to be the first object on the stack of the thread.
+class MOZ_RAII AutoProfilerRegisterThread final {
+ public:
+  explicit AutoProfilerRegisterThread(const char* aName) {
+    profiler_register_thread(aName, this);
+  }
+
+  ~AutoProfilerRegisterThread() { profiler_unregister_thread(); }
+
+ private:
+  AutoProfilerRegisterThread(const AutoProfilerRegisterThread&) = delete;
+  AutoProfilerRegisterThread& operator=(const AutoProfilerRegisterThread&) =
+      delete;
+};
+
+class MOZ_RAII AutoProfilerThreadSleep {
+ public:
+  explicit AutoProfilerThreadSleep() { profiler_thread_sleep(); }
+
+  ~AutoProfilerThreadSleep() { profiler_thread_wake(); }
+
+ private:
+};
+
+// Temporarily wake up the profiling of a thread while servicing events such as
+// Asynchronous Procedure Calls (APCs).
+class MOZ_RAII AutoProfilerThreadWake {
+ public:
+  explicit AutoProfilerThreadWake()
+      : mIssuedWake(profiler_thread_is_sleeping()) {
+    if (mIssuedWake) {
+      profiler_thread_wake();
+    }
+  }
+
+  ~AutoProfilerThreadWake() {
+    if (mIssuedWake) {
+      MOZ_ASSERT(!profiler_thread_is_sleeping());
+      profiler_thread_sleep();
+    }
+  }
+
+ private:
+  bool mIssuedWake;
+};
+
+// This class creates a non-owning ProfilingStack reference. Objects of this
+// class are stack-allocated, and so exist within a thread, and are thus bounded
+// by the lifetime of the thread, which ensures that the references held can't
+// be used after the ProfilingStack is destroyed.
+class MOZ_RAII AutoProfilerLabel {
+ public:
+  // This is the AUTO_BASE_PROFILER_LABEL and AUTO_BASE_PROFILER_LABEL_DYNAMIC
+  // variant.
+  AutoProfilerLabel(const char* aLabel, const char* aDynamicString,
+                    ProfilingCategoryPair aCategoryPair, uint32_t aFlags = 0) {
+    // Get the ProfilingStack from TLS.
+    Push(GetProfilingStack(), aLabel, aDynamicString, aCategoryPair, aFlags);
+  }
+
+  void Push(ProfilingStack* aProfilingStack, const char* aLabel,
+            const char* aDynamicString, ProfilingCategoryPair aCategoryPair,
+            uint32_t aFlags = 0) {
+    // This function runs both on and off the main thread.
+
+    mProfilingStack = aProfilingStack;
+    if (mProfilingStack) {
+      mProfilingStack->pushLabelFrame(aLabel, aDynamicString, this,
+                                      aCategoryPair, aFlags);
+    }
+  }
+
+  ~AutoProfilerLabel() {
+    // This function runs both on and off the main thread.
+
+    if (mProfilingStack) {
+      mProfilingStack->pop();
+    }
+  }
+
+  MFBT_API static ProfilingStack* GetProfilingStack();
+
+ private:
+  // We save a ProfilingStack pointer in the ctor so we don't have to redo the
+  // TLS lookup in the dtor.
+  ProfilingStack* mProfilingStack;
+
+ public:
+  // See the comment on the definition in platform.cpp for details about this.
+  static MOZ_THREAD_LOCAL(ProfilingStack*) sProfilingStack;
+};
+
+// Get the MOZ_PROFILER_STARTUP* environment variables that should be
+// supplied to a child process that is about to be launched, in order
+// to make that child process start with the same profiler settings as
+// in the current process.  The given function is invoked once for
+// each variable to be set.
+MFBT_API void GetProfilerEnvVarsForChildProcess(
+    std::function<void(const char* key, const char* value)>&& aSetEnv);
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // BaseProfiler_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerCounts.h b/mozglue/baseprofiler/public/BaseProfilerCounts.h
new file mode 100644
index 0000000000..fbcc713744
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerCounts.h
@@ -0,0 +1,280 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerCounts_h
+#define BaseProfilerCounts_h
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  define BASE_PROFILER_DEFINE_COUNT_TOTAL(label, category, description)
+#  define BASE_PROFILER_DEFINE_COUNT(label, category, description)
+#  define BASE_PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category, description)
+#  define AUTO_BASE_PROFILER_COUNT_TOTAL(label, count)
+#  define AUTO_BASE_PROFILER_COUNT(label)
+#  define AUTO_BASE_PROFILER_STATIC_COUNT(label, count)
+#  define AUTO_BASE_PROFILER_FORCE_ALLOCATION(label)
+
+#else
+
+#  include "mozilla/Atomics.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+class BaseProfilerCount;
+MFBT_API void profiler_add_sampled_counter(BaseProfilerCount* aCounter);
+MFBT_API void profiler_remove_sampled_counter(BaseProfilerCount* aCounter);
+
+typedef Atomic<int64_t, MemoryOrdering::Relaxed> ProfilerAtomicSigned;
+typedef Atomic<uint64_t, MemoryOrdering::Relaxed> ProfilerAtomicUnsigned;
+
+// Counter support
+// There are two types of counters:
+// 1) a simple counter which can be added to or subtracted from.  This could
+// track the number of objects of a type, the number of calls to something
+// (reflow, JIT, etc).
+// 2) a combined counter which has the above, plus a number-of-calls counter
+// that is incremented by 1 for each call to modify the count.  This provides
+// an optional source for a 'heatmap' of access.  This can be used (for
+// example) to track the amount of memory allocated, and provide a heatmap of
+// memory operations (allocs/frees).
+//
+// Counters are sampled by the profiler once per sample-period.  At this time,
+// all counters are global to the process.  In the future, there might be more
+// versions with per-thread or other discriminators.
+//
+// Typical usage:
+// There are two ways to use counters: With heap-created counter objects,
+// or using macros.  Note: the macros use statics, and will be slightly
+// faster/smaller, and you need to care about creating them before using
+// them.  They're similar to the use-pattern for the other AUTO_PROFILER*
+// macros, but they do need the PROFILER_DEFINE* to be use to instantiate
+// the statics.
+//
+// PROFILER_DEFINE_COUNT(mything, "JIT", "Some JIT byte count")
+// ...
+// void foo() { ... AUTO_PROFILER_COUNT(mything, number_of_bytes_used); ... }
+//
+// or (to also get a heatmap)
+//
+// PROFILER_DEFINE_COUNT_TOTAL(mything, "JIT", "Some JIT byte count")
+// ...
+// void foo() {
+//   ...
+//   AUTO_PROFILER_COUNT_TOTAL(mything, number_of_bytes_generated);
+//   ...
+// }
+//
+// To use without statics/macros:
+//
+// UniquePtr<ProfilerCounter> myCounter;
+// ...
+// myCounter =
+//   MakeUnique<ProfilerCounter>("mything", "JIT", "Some JIT byte count"));
+// ...
+// void foo() { ... myCounter->Add(number_of_bytes_generated0; ... }
+
+class BaseProfilerCount {
+ public:
+  BaseProfilerCount(const char* aLabel, ProfilerAtomicSigned* aCounter,
+                    ProfilerAtomicUnsigned* aNumber, const char* aCategory,
+                    const char* aDescription)
+      : mLabel(aLabel),
+        mCategory(aCategory),
+        mDescription(aDescription),
+        mCounter(aCounter),
+        mNumber(aNumber) {
+#  define COUNTER_CANARY 0xDEADBEEF
+#  ifdef DEBUG
+    mCanary = COUNTER_CANARY;
+    mPrevNumber = 0;
+#  endif
+    // Can't call profiler_* here since this may be non-xul-library
+  }
+#  ifdef DEBUG
+  ~BaseProfilerCount() { mCanary = 0; }
+#  endif
+
+  void Sample(int64_t& aCounter, uint64_t& aNumber) {
+    MOZ_ASSERT(mCanary == COUNTER_CANARY);
+
+    aCounter = *mCounter;
+    aNumber = mNumber ? *mNumber : 0;
+#  ifdef DEBUG
+    MOZ_ASSERT(aNumber >= mPrevNumber);
+    mPrevNumber = aNumber;
+#  endif
+  }
+
+  // We don't define ++ and Add() here, since the static defines directly
+  // increment the atomic counters, and the subclasses implement ++ and
+  // Add() directly.
+
+  // These typically are static strings (for example if you use the macros
+  // below)
+  const char* mLabel;
+  const char* mCategory;
+  const char* mDescription;
+  // We're ok with these being un-ordered in race conditions.  These are
+  // pointers because we want to be able to use statics and increment them
+  // directly.  Otherwise we could just have them inline, and not need the
+  // constructor args.
+  // These can be static globals (using the macros below), though they
+  // don't have to be - their lifetime must be longer than the use of them
+  // by the profiler (see profiler_add/remove_sampled_counter()).  If you're
+  // using a lot of these, they probably should be allocated at runtime (see
+  // class ProfilerCountOnly below).
+  ProfilerAtomicSigned* mCounter;
+  ProfilerAtomicUnsigned* mNumber;  // may be null
+
+#  ifdef DEBUG
+  uint32_t mCanary;
+  uint64_t mPrevNumber;  // value of number from the last Sample()
+#  endif
+};
+
+// Designed to be allocated dynamically, and simply incremented with obj++
+// or obj->Add(n)
+class ProfilerCounter final : public BaseProfilerCount {
+ public:
+  ProfilerCounter(const char* aLabel, const char* aCategory,
+                  const char* aDescription)
+      : BaseProfilerCount(aLabel, &mCounter, nullptr, aCategory, aDescription) {
+    // Assume we're in libxul
+    profiler_add_sampled_counter(this);
+  }
+
+  virtual ~ProfilerCounter() { profiler_remove_sampled_counter(this); }
+
+  BaseProfilerCount& operator++() {
+    Add(1);
+    return *this;
+  }
+
+  void Add(int64_t aNumber) { mCounter += aNumber; }
+
+  ProfilerAtomicSigned mCounter;
+};
+
+// Also keeps a heatmap (number of calls to ++/Add())
+class ProfilerCounterTotal final : public BaseProfilerCount {
+ public:
+  ProfilerCounterTotal(const char* aLabel, const char* aCategory,
+                       const char* aDescription)
+      : BaseProfilerCount(aLabel, &mCounter, &mNumber, aCategory,
+                          aDescription) {
+    // Assume we're in libxul
+    profiler_add_sampled_counter(this);
+  }
+
+  virtual ~ProfilerCounterTotal() { profiler_remove_sampled_counter(this); }
+
+  BaseProfilerCount& operator++() {
+    Add(1);
+    return *this;
+  }
+
+  void Add(int64_t aNumber) {
+    mCounter += aNumber;
+    mNumber++;
+  }
+
+  ProfilerAtomicSigned mCounter;
+  ProfilerAtomicUnsigned mNumber;
+};
+
+// Defines a counter that is sampled on each profiler tick, with a running
+// count (signed), and number-of-instances. Note that because these are two
+// independent Atomics, there is a possiblity that count will not include
+// the last call, but number of uses will.  I think this is not worth
+// worrying about
+#  define BASE_PROFILER_DEFINE_COUNT_TOTAL(label, category, description) \
+    ProfilerAtomicSigned profiler_count_##label(0);                      \
+    ProfilerAtomicUnsigned profiler_number_##label(0);                   \
+    const char profiler_category_##label[] = category;                   \
+    const char profiler_description_##label[] = description;             \
+    UniquePtr<::mozilla::baseprofiler::BaseProfilerCount> AutoCount_##label;
+
+// This counts, but doesn't keep track of the number of calls to
+// AUTO_PROFILER_COUNT()
+#  define BASE_PROFILER_DEFINE_COUNT(label, category, description) \
+    ProfilerAtomicSigned profiler_count_##label(0);                \
+    const char profiler_category_##label[] = category;             \
+    const char profiler_description_##label[] = description;       \
+    UniquePtr<::mozilla::baseprofiler::BaseProfilerCount> AutoCount_##label;
+
+// This will create a static initializer if used, but avoids a possible
+// allocation.
+#  define BASE_PROFILER_DEFINE_STATIC_COUNT_TOTAL(label, category,           \
+                                                  description)               \
+    ProfilerAtomicSigned profiler_count_##label(0);                          \
+    ProfilerAtomicUnsigned profiler_number_##label(0);                       \
+    ::mozilla::baseprofiler::BaseProfilerCount AutoCount_##label(            \
+        #label, &profiler_count_##label, &profiler_number_##label, category, \
+        description);
+
+// If we didn't care about static initializers, we could avoid the need for
+// a ptr to the BaseProfilerCount object.
+
+// XXX It would be better to do this without the if() and without the
+// theoretical race to set the UniquePtr (i.e. possible leak).
+#  define AUTO_BASE_PROFILER_COUNT_TOTAL(label, count)                      \
+    do {                                                                    \
+      profiler_number_##label++; /* do this first*/                         \
+      profiler_count_##label += count;                                      \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(new BaseProfilerCount(                      \
+            #label, &profiler_count_##label, &profiler_number_##label,      \
+            profiler_category_##label, profiler_description_##label));      \
+        ::mozilla::baseprofiler::profiler_add_sampled_counter(              \
+            AutoCount_##label.get());                                       \
+      }                                                                     \
+    } while (0)
+
+#  define AUTO_BASE_PROFILER_COUNT(label, count)                            \
+    do {                                                                    \
+      profiler_count_##label += count; /* do this first*/                   \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(new BaseProfilerCount(                      \
+            #label, nullptr, &profiler_number_##label,                      \
+            profiler_category_##label, profiler_description_##label));      \
+        ::mozilla::baseprofiler::profiler_add_sampled_counter(              \
+            AutoCount_##label.get());                                       \
+      }                                                                     \
+    } while (0)
+
+#  define AUTO_BASE_PROFILER_STATIC_COUNT(label, count) \
+    do {                                                \
+      profiler_number_##label++; /* do this first*/     \
+      profiler_count_##label += count;                  \
+    } while (0)
+
+// if we need to force the allocation
+#  define AUTO_BASE_PROFILER_FORCE_ALLOCATION(label)                        \
+    do {                                                                    \
+      if (!AutoCount_##label) {                                             \
+        /* Ignore that we could call this twice in theory, and that we leak \
+         * them                                                             \
+         */                                                                 \
+        AutoCount_##label.reset(                                            \
+            new ::mozilla::baseprofiler::BaseProfilerCount(                 \
+                #label, &profiler_count_##label, &profiler_number_##label,  \
+                profiler_category_##label, profiler_description_##label));  \
+      }                                                                     \
+    } while (0)
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // !MOZ_GECKO_PROFILER
+
+#endif  // BaseProfilerCounts_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerDetail.h b/mozglue/baseprofiler/public/BaseProfilerDetail.h
new file mode 100644
index 0000000000..9027f32bc7
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerDetail.h
@@ -0,0 +1,189 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Internal Base Profiler utilities.
+
+#ifndef BaseProfilerDetail_h
+#define BaseProfilerDetail_h
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/PlatformMutex.h"
+
+#ifndef MOZ_GECKO_PROFILER
+#  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+namespace mozilla {
+namespace baseprofiler {
+
+// Implemented in platform.cpp
+MFBT_API int profiler_current_thread_id();
+
+namespace detail {
+
+// Thin shell around mozglue PlatformMutex, for Base Profiler internal use.
+class BaseProfilerMutex : private ::mozilla::detail::MutexImpl {
+ public:
+  BaseProfilerMutex() : ::mozilla::detail::MutexImpl() {}
+  explicit BaseProfilerMutex(const char* aName)
+      : ::mozilla::detail::MutexImpl(), mName(aName) {}
+
+  BaseProfilerMutex(const BaseProfilerMutex&) = delete;
+  BaseProfilerMutex& operator=(const BaseProfilerMutex&) = delete;
+  BaseProfilerMutex(BaseProfilerMutex&&) = delete;
+  BaseProfilerMutex& operator=(BaseProfilerMutex&&) = delete;
+
+#ifdef DEBUG
+  ~BaseProfilerMutex() { MOZ_ASSERT(mOwningThreadId == 0); }
+#endif  // DEBUG
+
+  [[nodiscard]] bool IsLockedOnCurrentThread() const {
+    return mOwningThreadId == baseprofiler::profiler_current_thread_id();
+  }
+
+  void AssertCurrentThreadOwns() const {
+    MOZ_ASSERT(IsLockedOnCurrentThread());
+  }
+
+  void Lock() {
+    const int tid = baseprofiler::profiler_current_thread_id();
+    MOZ_ASSERT(tid != 0);
+    MOZ_ASSERT(!IsLockedOnCurrentThread(), "Recursive locking");
+    ::mozilla::detail::MutexImpl::lock();
+    MOZ_ASSERT(mOwningThreadId == 0, "Not unlocked properly");
+    mOwningThreadId = tid;
+  }
+
+  [[nodiscard]] bool TryLock() {
+    const int tid = baseprofiler::profiler_current_thread_id();
+    MOZ_ASSERT(tid != 0);
+    MOZ_ASSERT(!IsLockedOnCurrentThread(), "Recursive locking");
+    if (!::mozilla::detail::MutexImpl::tryLock()) {
+      // Failed to lock, nothing more to do.
+      return false;
+    }
+    MOZ_ASSERT(mOwningThreadId == 0, "Not unlocked properly");
+    mOwningThreadId = tid;
+    return true;
+  }
+
+  void Unlock() {
+    MOZ_ASSERT(IsLockedOnCurrentThread(), "Unlocking when not locked here");
+    // We're still holding the mutex here, so it's safe to just reset
+    // `mOwningThreadId`.
+    mOwningThreadId = 0;
+    ::mozilla::detail::MutexImpl::unlock();
+  }
+
+  const char* GetName() const { return mName; }
+
+ private:
+  // Thread currently owning the lock, or 0.
+  // Atomic because it may be read at any time independent of the mutex.
+  // Relaxed because threads only need to know if they own it already, so:
+  // - If it's their id, only *they* wrote that value with a locked mutex.
+  // - If it's different from their thread id it doesn't matter what other
+  //   number it is (0 or another id) and that it can change again at any time.
+  Atomic<int, MemoryOrdering::Relaxed> mOwningThreadId{0};
+
+  const char* mName = nullptr;
+};
+
+// RAII class to lock a mutex.
+class MOZ_RAII BaseProfilerAutoLock {
+ public:
+  explicit BaseProfilerAutoLock(BaseProfilerMutex& aMutex) : mMutex(aMutex) {
+    mMutex.Lock();
+  }
+
+  BaseProfilerAutoLock(const BaseProfilerAutoLock&) = delete;
+  BaseProfilerAutoLock& operator=(const BaseProfilerAutoLock&) = delete;
+  BaseProfilerAutoLock(BaseProfilerAutoLock&&) = delete;
+  BaseProfilerAutoLock& operator=(BaseProfilerAutoLock&&) = delete;
+
+  ~BaseProfilerAutoLock() { mMutex.Unlock(); }
+
+ private:
+  BaseProfilerMutex& mMutex;
+};
+
+// Thin shell around mozglue PlatformMutex, for Base Profiler internal use.
+// Actual mutex may be disabled at construction time.
+class BaseProfilerMaybeMutex : private ::mozilla::detail::MutexImpl {
+ public:
+  explicit BaseProfilerMaybeMutex(bool aActivate) {
+    if (aActivate) {
+      mMaybeMutex.emplace();
+    }
+  }
+
+  BaseProfilerMaybeMutex(const BaseProfilerMaybeMutex&) = delete;
+  BaseProfilerMaybeMutex& operator=(const BaseProfilerMaybeMutex&) = delete;
+  BaseProfilerMaybeMutex(BaseProfilerMaybeMutex&&) = delete;
+  BaseProfilerMaybeMutex& operator=(BaseProfilerMaybeMutex&&) = delete;
+
+  ~BaseProfilerMaybeMutex() = default;
+
+  bool IsActivated() const { return mMaybeMutex.isSome(); }
+
+  [[nodiscard]] bool IsActivatedAndLockedOnCurrentThread() const {
+    if (!IsActivated()) {
+      // Not activated, so we can never be locked.
+      return false;
+    }
+    return mMaybeMutex->IsLockedOnCurrentThread();
+  }
+
+  void AssertCurrentThreadOwns() const {
+#ifdef DEBUG
+    if (IsActivated()) {
+      mMaybeMutex->AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+  }
+
+  void Lock() {
+    if (IsActivated()) {
+      mMaybeMutex->Lock();
+    }
+  }
+
+  void Unlock() {
+    if (IsActivated()) {
+      mMaybeMutex->Unlock();
+    }
+  }
+
+ private:
+  Maybe<BaseProfilerMutex> mMaybeMutex;
+};
+
+// RAII class to lock a mutex.
+class MOZ_RAII BaseProfilerMaybeAutoLock {
+ public:
+  explicit BaseProfilerMaybeAutoLock(BaseProfilerMaybeMutex& aMaybeMutex)
+      : mMaybeMutex(aMaybeMutex) {
+    mMaybeMutex.Lock();
+  }
+
+  BaseProfilerMaybeAutoLock(const BaseProfilerMaybeAutoLock&) = delete;
+  BaseProfilerMaybeAutoLock& operator=(const BaseProfilerMaybeAutoLock&) =
+      delete;
+  BaseProfilerMaybeAutoLock(BaseProfilerMaybeAutoLock&&) = delete;
+  BaseProfilerMaybeAutoLock& operator=(BaseProfilerMaybeAutoLock&&) = delete;
+
+  ~BaseProfilerMaybeAutoLock() { mMaybeMutex.Unlock(); }
+
+ private:
+  BaseProfilerMaybeMutex& mMaybeMutex;
+};
+
+}  // namespace detail
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif  // BaseProfilerDetail_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h b/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h
new file mode 100644
index 0000000000..1556b7a272
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkerTypes.h
@@ -0,0 +1,69 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerMarkerTypes_h
+#define BaseProfilerMarkerTypes_h
+
+// This header contains common marker type definitions.
+//
+// It #include's "mozilla/BaseProfilerMarkers.h", see that file for how to
+// define other marker types, and how to add markers to the profiler buffers.
+//
+// If you don't need to use these common types, #include
+// "mozilla/BaseProfilerMarkers.h" instead.
+//
+// Types in this files can be defined without relying on xpcom.
+// Others are defined in "ProfilerMarkerTypes.h".
+
+// !!!                       /!\ WORK IN PROGRESS /!\                       !!!
+// This file contains draft marker definitions, but most are not used yet.
+// Further work is needed to complete these definitions, and use them to convert
+// existing PROFILER_ADD_MARKER calls. See meta bug 1661394.
+
+#include "mozilla/BaseProfilerMarkers.h"
+
+#ifdef MOZ_GECKO_PROFILER
+
+namespace mozilla::baseprofiler::markers {
+
+struct MediaSampleMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("MediaSample");
+  }
+  static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter,
+                                   int64_t aSampleStartTimeUs,
+                                   int64_t aSampleEndTimeUs) {
+    aWriter.IntProperty("sampleStartTimeUs", aSampleStartTimeUs);
+    aWriter.IntProperty("sampleEndTimeUs", aSampleEndTimeUs);
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::markerChart, MS::Location::markerTable};
+    schema.AddKeyLabelFormat("sampleStartTimeUs", "Sample start time",
+                             MS::Format::microseconds);
+    schema.AddKeyLabelFormat("sampleEndTimeUs", "Sample end time",
+                             MS::Format::microseconds);
+    return schema;
+  }
+};
+
+struct ContentBuildMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("CONTENT_FULL_PAINT_TIME");
+  }
+  static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter) {}
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::markerChart, MS::Location::markerTable};
+    return schema;
+  }
+};
+
+}  // namespace mozilla::baseprofiler::markers
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // BaseProfilerMarkerTypes_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkers.h b/mozglue/baseprofiler/public/BaseProfilerMarkers.h
new file mode 100644
index 0000000000..c63b018f95
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkers.h
@@ -0,0 +1,242 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Markers are useful to delimit something important happening such as the first
+// paint. Unlike labels, which are only recorded in the profile buffer if a
+// sample is collected while the label is on the label stack, markers will
+// always be recorded in the profile buffer.
+//
+// This header contains basic definitions necessary to create marker types, and
+// to add markers to the profiler buffers.
+//
+// If basic marker types are needed, #include
+// "mozilla/BaseProfilerMarkerTypes.h" instead.
+//
+// But if you want to create your own marker type locally, you can #include this
+// header only; look at mozilla/BaseProfilerMarkerTypes.h for examples of how to
+// define types, and mozilla/BaseProfilerMarkerPrerequisites.h for some
+// supporting types.
+//
+// To then record markers:
+// - Use `baseprofiler::AddMarker(...)` from  mozglue or other libraries that
+//   are outside of xul, especially if they may happen outside of xpcom's
+//   lifetime (typically startup, shutdown, or tests).
+// - Otherwise #include "ProfilerMarkers.h" instead, and use
+//   `profiler_add_marker(...)`.
+// See these functions for more details.
+
+#ifndef BaseProfilerMarkers_h
+#define BaseProfilerMarkers_h
+
+#include "mozilla/BaseProfilerMarkersDetail.h"
+
+#ifndef MOZ_GECKO_PROFILER
+
+#  define BASE_PROFILER_MARKER_UNTYPED(markerName, categoryName, ...)
+#  define BASE_PROFILER_MARKER(markerName, categoryName, options, MarkerType, \
+                               ...)
+#  define BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, text)
+#  define AUTO_BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, \
+                                         text)
+
+#else  // ndef MOZ_GECKO_PROFILER
+
+#  include "mozilla/ProfileChunkedBuffer.h"
+#  include "mozilla/TimeStamp.h"
+#  include "mozilla/Unused.h"
+
+#  include <functional>
+#  include <string>
+#  include <utility>
+
+namespace mozilla::baseprofiler {
+
+// Add a marker to a given buffer. `AddMarker()` and related macros should be
+// used in most cases, see below for more information about them and the
+// parameters; This function may be useful when markers need to be recorded in a
+// local buffer outside of the main profiler buffer.
+template <typename MarkerType, typename... PayloadArguments>
+ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+    const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+    MarkerType aMarkerType, const PayloadArguments&... aPayloadArguments) {
+  Unused << aMarkerType;  // Only the empty object type is useful.
+  return base_profiler_markers_detail::AddMarkerToBuffer<MarkerType>(
+      aBuffer, aName, aCategory, std::move(aOptions),
+      ::mozilla::baseprofiler::profiler_capture_backtrace_into,
+      aPayloadArguments...);
+}
+
+// Add a marker (without payload) to a given buffer.
+inline ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+    const MarkerCategory& aCategory, MarkerOptions&& aOptions = {}) {
+  return AddMarkerToBuffer(aBuffer, aName, aCategory, std::move(aOptions),
+                           markers::NoPayload{});
+}
+
+// Add a marker to the Base Profiler buffer.
+// - aName: Main name of this marker.
+// - aCategory: Category for this marker.
+// - aOptions: Optional settings (such as timing, inner window id,
+//   backtrace...), see `MarkerOptions` for details.
+// - aMarkerType: Empty object that specifies the type of marker.
+// - aPayloadArguments: Arguments expected by this marker type's
+// ` StreamJSONMarkerData` function.
+template <typename MarkerType, typename... PayloadArguments>
+ProfileBufferBlockIndex AddMarker(
+    const ProfilerString8View& aName, const MarkerCategory& aCategory,
+    MarkerOptions&& aOptions, MarkerType aMarkerType,
+    const PayloadArguments&... aPayloadArguments) {
+  if (!baseprofiler::profiler_can_accept_markers()) {
+    return {};
+  }
+  return ::mozilla::baseprofiler::AddMarkerToBuffer(
+      base_profiler_markers_detail::CachedBaseCoreBuffer(), aName, aCategory,
+      std::move(aOptions), aMarkerType, aPayloadArguments...);
+}
+
+// Add a marker (without payload) to the Base Profiler buffer.
+inline ProfileBufferBlockIndex AddMarker(const ProfilerString8View& aName,
+                                         const MarkerCategory& aCategory,
+                                         MarkerOptions&& aOptions = {}) {
+  return AddMarker(aName, aCategory, std::move(aOptions), markers::NoPayload{});
+}
+
+}  // namespace mozilla::baseprofiler
+
+// Same as `AddMarker()` (without payload). This macro is safe to use even if
+// MOZ_GECKO_PROFILER is not #defined.
+#  define BASE_PROFILER_MARKER_UNTYPED(markerName, categoryName, ...)  \
+    do {                                                               \
+      AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_UNTYPED);               \
+      ::mozilla::baseprofiler::AddMarker(                              \
+          markerName, ::mozilla::baseprofiler::category::categoryName, \
+          ##__VA_ARGS__);                                              \
+    } while (false)
+
+// Same as `AddMarker()` (with payload). This macro is safe to use even if
+// MOZ_GECKO_PROFILER is not #defined.
+#  define BASE_PROFILER_MARKER(markerName, categoryName, options, MarkerType, \
+                               ...)                                           \
+    do {                                                                      \
+      AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_with_##MarkerType);            \
+      ::mozilla::baseprofiler::AddMarker(                                     \
+          markerName, ::mozilla::baseprofiler::category::categoryName,        \
+          options, ::mozilla::baseprofiler::markers::MarkerType{},            \
+          ##__VA_ARGS__);                                                     \
+    } while (false)
+
+namespace mozilla::baseprofiler::markers {
+// Most common marker type. Others are in BaseProfilerMarkerTypes.h.
+struct TextMarker {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("Text");
+  }
+  static void StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter& aWriter,
+                                   const ProfilerString8View& aText) {
+    aWriter.StringProperty("name", aText);
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::markerChart, MS::Location::markerTable};
+    schema.SetChartLabel("{marker.data.name}");
+    schema.SetTableLabel("{marker.name} - {marker.data.name}");
+    schema.AddKeyLabelFormat("name", "Details", MarkerSchema::Format::string);
+    return schema;
+  }
+};
+
+struct Tracing {
+  static constexpr Span<const char> MarkerTypeName() {
+    return MakeStringSpan("tracing");
+  }
+  static void StreamJSONMarkerData(SpliceableJSONWriter& aWriter,
+                                   const ProfilerString8View& aCategory) {
+    if (aCategory.Length() != 0) {
+      aWriter.StringProperty("category", aCategory);
+    }
+  }
+  static MarkerSchema MarkerTypeDisplay() {
+    using MS = MarkerSchema;
+    MS schema{MS::Location::markerChart, MS::Location::markerTable,
+              MS::Location::timelineOverview};
+    schema.AddKeyLabelFormat("category", "Type", MS::Format::string);
+    return schema;
+  }
+};
+}  // namespace mozilla::baseprofiler::markers
+
+// Add a text marker. This macro is safe to use even if MOZ_GECKO_PROFILER is
+// not #defined.
+#  define BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options, text) \
+    do {                                                                     \
+      AUTO_PROFILER_STATS(BASE_PROFILER_MARKER_TEXT);                        \
+      ::mozilla::baseprofiler::AddMarker(                                    \
+          markerName, ::mozilla::baseprofiler::category::categoryName,       \
+          options, ::mozilla::baseprofiler::markers::TextMarker{}, text);    \
+    } while (false)
+
+namespace mozilla::baseprofiler {
+
+// RAII object that adds a BASE_PROFILER_MARKER_TEXT when destroyed; the
+// marker's timing will be the interval from construction (unless an instant or
+// start time is already specified in the provided options) until destruction.
+class MOZ_RAII AutoProfilerTextMarker {
+ public:
+  AutoProfilerTextMarker(const char* aMarkerName,
+                         const MarkerCategory& aCategory,
+                         MarkerOptions&& aOptions, const std::string& aText)
+      : mMarkerName(aMarkerName),
+        mCategory(aCategory),
+        mOptions(std::move(aOptions)),
+        mText(aText) {
+    MOZ_ASSERT(mOptions.Timing().EndTime().IsNull(),
+               "AutoProfilerTextMarker options shouldn't have an end time");
+    if (mOptions.Timing().StartTime().IsNull()) {
+      mOptions.Set(MarkerTiming::InstantNow());
+    }
+  }
+
+  ~AutoProfilerTextMarker() {
+    mOptions.TimingRef().SetIntervalEnd();
+    AUTO_PROFILER_STATS(AUTO_BASE_PROFILER_MARKER_TEXT);
+    AddMarker(ProfilerString8View::WrapNullTerminatedString(mMarkerName),
+              mCategory, std::move(mOptions), markers::TextMarker{}, mText);
+  }
+
+ protected:
+  const char* mMarkerName;
+  MarkerCategory mCategory;
+  MarkerOptions mOptions;
+  std::string mText;
+};
+
+extern template MFBT_API ProfileBufferBlockIndex
+AddMarker(const ProfilerString8View&, const MarkerCategory&, MarkerOptions&&,
+          markers::TextMarker, const std::string&);
+
+extern template MFBT_API ProfileBufferBlockIndex
+AddMarkerToBuffer(ProfileChunkedBuffer&, const ProfilerString8View&,
+                  const MarkerCategory&, MarkerOptions&&, markers::NoPayload);
+
+extern template MFBT_API ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer&, const ProfilerString8View&, const MarkerCategory&,
+    MarkerOptions&&, markers::TextMarker, const std::string&);
+
+}  // namespace mozilla::baseprofiler
+
+// Creates an AutoProfilerTextMarker RAII object.  This macro is safe to use
+// even if MOZ_GECKO_PROFILER is not #defined.
+#  define AUTO_BASE_PROFILER_MARKER_TEXT(markerName, categoryName, options,   \
+                                         text)                                \
+    ::mozilla::baseprofiler::AutoProfilerTextMarker BASE_PROFILER_RAII(       \
+        markerName, ::mozilla::baseprofiler::category::categoryName, options, \
+        text)
+
+#endif  // nfed MOZ_GECKO_PROFILER else
+
+#endif  // BaseProfilerMarkers_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h b/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h
new file mode 100644
index 0000000000..b5dbe27343
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkersDetail.h
@@ -0,0 +1,674 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilerMarkersDetail_h
+#define BaseProfilerMarkersDetail_h
+
+#ifndef BaseProfilerMarkers_h
+#  error "This header should only be #included by BaseProfilerMarkers.h"
+#endif
+
+#include "mozilla/BaseProfilerMarkersPrerequisites.h"
+
+#ifdef MOZ_GECKO_PROFILER
+
+//                        ~~ HERE BE DRAGONS ~~
+//
+// Everything below is internal implementation detail, you shouldn't need to
+// look at it unless working on the profiler code.
+
+#  include "mozilla/BaseProfileJSONWriter.h"
+#  include "mozilla/ProfileBufferEntryKinds.h"
+
+#  include <limits>
+#  include <tuple>
+
+namespace mozilla::baseprofiler {
+// Implemented in platform.cpp
+MFBT_API ProfileChunkedBuffer& profiler_get_core_buffer();
+}  // namespace mozilla::baseprofiler
+
+namespace mozilla::base_profiler_markers_detail {
+
+// Get the core buffer from the profiler, and cache it in a
+// non-templated-function static reference.
+inline ProfileChunkedBuffer& CachedBaseCoreBuffer() {
+  static ProfileChunkedBuffer& coreBuffer =
+      baseprofiler::profiler_get_core_buffer();
+  return coreBuffer;
+}
+
+struct Streaming {
+  // A `MarkerDataDeserializer` is a free function that can read a serialized
+  // payload from an `EntryReader` and streams it as JSON object properties.
+  using MarkerDataDeserializer = void (*)(ProfileBufferEntryReader&,
+                                          baseprofiler::SpliceableJSONWriter&);
+
+  // A `MarkerTypeNameFunction` is a free function that returns the name of the
+  // marker type.
+  using MarkerTypeNameFunction = Span<const char> (*)();
+
+  // A `MarkerSchemaFunction` is a free function that returns a
+  // `MarkerSchema`, which contains all the information needed to stream
+  // the display schema associated with a marker type.
+  using MarkerSchemaFunction = MarkerSchema (*)();
+
+  struct MarkerTypeFunctions {
+    MarkerDataDeserializer mMarkerDataDeserializer = nullptr;
+    MarkerTypeNameFunction mMarkerTypeNameFunction = nullptr;
+    MarkerSchemaFunction mMarkerSchemaFunction = nullptr;
+  };
+
+  // A `DeserializerTag` will be added before the payload, to help select the
+  // correct deserializer when reading back the payload.
+  using DeserializerTag = uint8_t;
+
+  // Store a deserializer (and other marker-type-specific functions) and get its
+  // `DeserializerTag`.
+  // This is intended to be only used once per deserializer when a new marker
+  // type is used for the first time, so it should be called to initialize a
+  // `static const` tag that will be re-used by all markers of the corresponding
+  // payload type -- see use below.
+  MFBT_API static DeserializerTag TagForMarkerTypeFunctions(
+      MarkerDataDeserializer aDeserializer,
+      MarkerTypeNameFunction aMarkerTypeNameFunction,
+      MarkerSchemaFunction aMarkerSchemaFunction);
+
+  // Get the `MarkerDataDeserializer` for a given `DeserializerTag`.
+  MFBT_API static MarkerDataDeserializer DeserializerForTag(
+      DeserializerTag aTag);
+
+  // Retrieve all MarkerTypeFunctions's.
+  MFBT_API static Span<const MarkerTypeFunctions> MarkerTypeFunctionsArray();
+};
+
+// This helper will examine a marker type's `StreamJSONMarkerData` function, see
+// specialization below.
+template <typename T>
+struct StreamFunctionTypeHelper;
+
+// Helper specialization that takes the expected
+// `StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter&, ...)` function and
+// provide information about the `...` parameters.
+template <typename R, typename... As>
+struct StreamFunctionTypeHelper<R(baseprofiler::SpliceableJSONWriter&, As...)> {
+  constexpr static size_t scArity = sizeof...(As);
+  using TupleType =
+      std::tuple<std::remove_cv_t<std::remove_reference_t<As>>...>;
+
+  // Serialization function that takes the exact same parameter types
+  // (const-ref'd) as `StreamJSONMarkerData`. This has to be inside the helper
+  // because only here can we access the raw parameter pack `As...`.
+  // And because we're using the same argument types through
+  // references-to-const, permitted implicit conversions can happen.
+  static ProfileBufferBlockIndex Serialize(
+      ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+      const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+      Streaming::DeserializerTag aDeserializerTag, const As&... aAs) {
+    // Note that options are first after the entry kind, because they contain
+    // the thread id, which is handled first to filter markers by threads.
+    return aBuffer.PutObjects(ProfileBufferEntryKind::Marker, aOptions, aName,
+                              aCategory, aDeserializerTag, aAs...);
+  }
+};
+
+// Helper for a marker type.
+// A marker type is defined in a `struct` with some expected static member
+// functions. See example in BaseProfilerMarkers.h.
+template <typename MarkerType>
+struct MarkerTypeSerialization {
+  // Definitions to access the expected
+  // `StreamJSONMarkerData(baseprofiler::SpliceableJSONWriter&, ...)` function
+  // and its parameters.
+  using StreamFunctionType =
+      StreamFunctionTypeHelper<decltype(MarkerType::StreamJSONMarkerData)>;
+  constexpr static size_t scStreamFunctionParameterCount =
+      StreamFunctionType::scArity;
+  using StreamFunctionUserParametersTuple =
+      typename StreamFunctionType::TupleType;
+  template <size_t i>
+  using StreamFunctionParameter =
+      std::tuple_element_t<i, StreamFunctionUserParametersTuple>;
+
+  template <typename... Ts>
+  static ProfileBufferBlockIndex Serialize(ProfileChunkedBuffer& aBuffer,
+                                           const ProfilerString8View& aName,
+                                           const MarkerCategory& aCategory,
+                                           MarkerOptions&& aOptions,
+                                           const Ts&... aTs) {
+    static_assert(!std::is_same_v<MarkerType,
+                                  ::mozilla::baseprofiler::markers::NoPayload>,
+                  "NoPayload should have been handled in the caller.");
+    // Register marker type functions, and get the tag for this deserializer.
+    // Note that the tag is stored in a function-static object, and this
+    // function is static in a templated struct, so there should only be one tag
+    // per MarkerType.
+    // Making the tag class-static may have been more efficient (to avoid a
+    // thread-safe init check at every call), but random global static
+    // initialization order would make it more complex to coordinate with
+    // `Streaming::TagForMarkerTypeFunctions()`, and also would add a (small)
+    // cost for everybody, even the majority of users not using the profiler.
+    static const Streaming::DeserializerTag tag =
+        Streaming::TagForMarkerTypeFunctions(Deserialize,
+                                             MarkerType::MarkerTypeName,
+                                             MarkerType::MarkerTypeDisplay);
+    return StreamFunctionType::Serialize(aBuffer, aName, aCategory,
+                                         std::move(aOptions), tag, aTs...);
+  }
+
+ private:
+  // This templated function will recursively deserialize each argument expected
+  // by `MarkerType::StreamJSONMarkerData()` on the stack, and call it at the
+  // end. E.g., for `StreamJSONMarkerData(int, char)`:
+  // - DeserializeArguments<0>(aER, aWriter) reads an int and calls:
+  // - DeserializeArguments<1>(aER, aWriter, const int&) reads a char and calls:
+  // - MarkerType::StreamJSONMarkerData(aWriter, const int&, const char&).
+  // Prototyping on godbolt showed that clang and gcc can flatten these
+  // recursive calls into one function with successive reads followed by the one
+  // stream call; tested up to 40 arguments: https://godbolt.org/z/5KeeM4
+  template <size_t i = 0, typename... Args>
+  static void DeserializeArguments(ProfileBufferEntryReader& aEntryReader,
+                                   baseprofiler::SpliceableJSONWriter& aWriter,
+                                   const Args&... aArgs) {
+    static_assert(sizeof...(Args) == i,
+                  "We should have collected `i` arguments so far");
+    if constexpr (i < scStreamFunctionParameterCount) {
+      // Deserialize the i-th argument on this stack.
+      auto argument = aEntryReader.ReadObject<StreamFunctionParameter<i>>();
+      // Add our local argument to the next recursive call.
+      DeserializeArguments<i + 1>(aEntryReader, aWriter, aArgs..., argument);
+    } else {
+      // We've read all the arguments, finally call the `StreamJSONMarkerData`
+      // function, which should write the appropriate JSON elements for this
+      // marker type. Note that the MarkerType-specific "type" element is
+      // already written.
+      MarkerType::StreamJSONMarkerData(aWriter, aArgs...);
+    }
+  }
+
+ public:
+  static void Deserialize(ProfileBufferEntryReader& aEntryReader,
+                          baseprofiler::SpliceableJSONWriter& aWriter) {
+    aWriter.StringProperty("type", MarkerType::MarkerTypeName());
+    DeserializeArguments(aEntryReader, aWriter);
+  }
+};
+
+template <>
+struct MarkerTypeSerialization<::mozilla::baseprofiler::markers::NoPayload> {
+  // Nothing! NoPayload has special handling avoiding payload work.
+};
+
+template <typename MarkerType, typename... Ts>
+static ProfileBufferBlockIndex AddMarkerWithOptionalStackToBuffer(
+    ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+    const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+    const Ts&... aTs) {
+  if constexpr (std::is_same_v<MarkerType,
+                               ::mozilla::baseprofiler::markers::NoPayload>) {
+    static_assert(sizeof...(Ts) == 0,
+                  "NoPayload does not accept any payload arguments.");
+    // Special case for NoPayload where there is a stack or inner window id:
+    // Because these options would be stored in the payload 'data' object, but
+    // there is no such object for NoPayload, we convert the marker to another
+    // type (without user fields in the 'data' object), so that the stack and/or
+    // inner window id are not lost.
+    // TODO: Remove this when bug 1646714 lands.
+    if (aOptions.Stack().GetChunkedBuffer() ||
+        !aOptions.InnerWindowId().IsUnspecified()) {
+      struct NoPayloadUserData {
+        static constexpr Span<const char> MarkerTypeName() {
+          return MakeStringSpan("NoPayloadUserData");
+        }
+        static void StreamJSONMarkerData(
+            baseprofiler::SpliceableJSONWriter& aWriter) {
+          // No user payload.
+        }
+        static mozilla::MarkerSchema MarkerTypeDisplay() {
+          using MS = mozilla::MarkerSchema;
+          MS schema{MS::Location::markerChart, MS::Location::markerTable};
+          // No user data to display.
+          return schema;
+        }
+      };
+      return MarkerTypeSerialization<NoPayloadUserData>::Serialize(
+          aBuffer, aName, aCategory, std::move(aOptions));
+    }
+
+    // Note that options are first after the entry kind, because they contain
+    // the thread id, which is handled first to filter markers by threads.
+    return aBuffer.PutObjects(
+        ProfileBufferEntryKind::Marker, std::move(aOptions), aName, aCategory,
+        base_profiler_markers_detail::Streaming::DeserializerTag(0));
+  } else {
+    return MarkerTypeSerialization<MarkerType>::Serialize(
+        aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+  }
+}
+
+// Pointer to a function that can capture a backtrace into the provided
+// `ProfileChunkedBuffer`, and returns true when successful.
+using BacktraceCaptureFunction = bool (*)(ProfileChunkedBuffer&);
+
+// Add a marker with the given name, options, and arguments to the given buffer.
+// Because this may be called from either Base or Gecko Profiler functions, the
+// appropriate backtrace-capturing function must also be provided.
+template <typename MarkerType, typename... Ts>
+ProfileBufferBlockIndex AddMarkerToBuffer(
+    ProfileChunkedBuffer& aBuffer, const ProfilerString8View& aName,
+    const MarkerCategory& aCategory, MarkerOptions&& aOptions,
+    BacktraceCaptureFunction aBacktraceCaptureFunction, const Ts&... aTs) {
+  if (aOptions.ThreadId().IsUnspecified()) {
+    // If yet unspecified, set thread to this thread where the marker is added.
+    aOptions.Set(MarkerThreadId::CurrentThread());
+  }
+
+  if (aOptions.IsTimingUnspecified()) {
+    // If yet unspecified, set timing to this instant of adding the marker.
+    aOptions.Set(MarkerTiming::InstantNow());
+  }
+
+  if (aOptions.Stack().IsCaptureNeeded()) {
+    // A capture was requested, let's attempt to do it here&now. This avoids a
+    // lot of allocations that would be necessary if capturing a backtrace
+    // separately.
+    // TODO use a local on-stack byte buffer to remove last allocation.
+    // TODO reduce internal profiler stack levels, see bug 1659872.
+    ProfileBufferChunkManagerSingle chunkManager(
+        ProfileBufferChunkManager::scExpectedMaximumStackSize);
+    ProfileChunkedBuffer chunkedBuffer(
+        ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
+    aOptions.StackRef().UseRequestedBacktrace(
+        aBacktraceCaptureFunction(chunkedBuffer) ? &chunkedBuffer : nullptr);
+    // This call must be made from here, while chunkedBuffer is in scope.
+    return AddMarkerWithOptionalStackToBuffer<MarkerType>(
+        aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+  }
+
+  return AddMarkerWithOptionalStackToBuffer<MarkerType>(
+      aBuffer, aName, aCategory, std::move(aOptions), aTs...);
+}
+
+template <typename StackCallback>
+[[nodiscard]] bool DeserializeAfterKindAndStream(
+    ProfileBufferEntryReader& aEntryReader,
+    baseprofiler::SpliceableJSONWriter& aWriter, int aThreadIdOrZero,
+    StackCallback&& aStackCallback) {
+  // Each entry is made up of the following:
+  //   ProfileBufferEntry::Kind::Marker, <- already read by caller
+  //   options,                          <- next location in entries
+  //   name,
+  //   payload
+  const MarkerOptions options = aEntryReader.ReadObject<MarkerOptions>();
+  if (aThreadIdOrZero != 0 &&
+      options.ThreadId().ThreadId() != aThreadIdOrZero) {
+    // A specific thread is being read, we're not in it.
+    return false;
+  }
+  // Write the information to JSON with the following schema:
+  // [name, startTime, endTime, phase, category, data]
+  aWriter.StartArrayElement();
+  {
+    aWriter.UniqueStringElement(aEntryReader.ReadObject<ProfilerString8View>());
+
+    const double startTime = options.Timing().GetStartTime();
+    aWriter.DoubleElement(startTime);
+
+    const double endTime = options.Timing().GetEndTime();
+    aWriter.DoubleElement(endTime);
+
+    aWriter.IntElement(static_cast<int64_t>(options.Timing().MarkerPhase()));
+
+    MarkerCategory category = aEntryReader.ReadObject<MarkerCategory>();
+    aWriter.IntElement(static_cast<int64_t>(category.GetCategory()));
+
+    if (const auto tag =
+            aEntryReader.ReadObject<mozilla::base_profiler_markers_detail::
+                                        Streaming::DeserializerTag>();
+        tag != 0) {
+      aWriter.StartObjectElement(JSONWriter::SingleLineStyle);
+      {
+        // Stream "common props".
+
+        // TODO: Move this to top-level tuple, when frontend supports it.
+        if (!options.InnerWindowId().IsUnspecified()) {
+          // Here, we are converting uint64_t to double. Both Browsing Context
+          // and Inner Window IDs are created using
+          // `nsContentUtils::GenerateProcessSpecificId`, which is specifically
+          // designed to only use 53 of the 64 bits to be lossless when passed
+          // into and out of JS as a double.
+          aWriter.DoubleProperty(
+              "innerWindowID",
+              static_cast<double>(options.InnerWindowId().Id()));
+        }
+
+        // TODO: Move this to top-level tuple, when frontend supports it.
+        if (ProfileChunkedBuffer* chunkedBuffer =
+                options.Stack().GetChunkedBuffer();
+            chunkedBuffer) {
+          aWriter.StartObjectProperty("stack");
+          { std::forward<StackCallback>(aStackCallback)(*chunkedBuffer); }
+          aWriter.EndObject();
+        }
+
+        // Stream the payload, including the type.
+        mozilla::base_profiler_markers_detail::Streaming::MarkerDataDeserializer
+            deserializer = mozilla::base_profiler_markers_detail::Streaming::
+                DeserializerForTag(tag);
+        MOZ_RELEASE_ASSERT(deserializer);
+        deserializer(aEntryReader, aWriter);
+      }
+      aWriter.EndObject();
+    }
+  }
+  aWriter.EndArray();
+  return true;
+}
+
+}  // namespace mozilla::base_profiler_markers_detail
+
+namespace mozilla {
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: ProfilerStringView<CHAR>
+
+// The serialization starts with a ULEB128 number that encodes both whether the
+// ProfilerStringView is literal (Least Significant Bit = 0) or not (LSB = 1),
+// plus the string length (excluding null terminator) in bytes, shifted left by
+// 1 bit. Following that number:
+// - If literal, the string pointer value.
+// - If non-literal, the contents as bytes (excluding null terminator if any).
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<ProfilerStringView<CHAR>> {
+  static Length Bytes(const ProfilerStringView<CHAR>& aString) {
+    MOZ_RELEASE_ASSERT(
+        aString.Length() < std::numeric_limits<Length>::max() / 2,
+        "Double the string length doesn't fit in Length type");
+    const Length stringLength = static_cast<Length>(aString.Length());
+    if (aString.IsLiteral()) {
+      // Literal -> Length shifted left and LSB=0, then pointer.
+      return ULEB128Size(stringLength << 1 | 0u) +
+             static_cast<ProfileChunkedBuffer::Length>(sizeof(const CHAR*));
+    }
+    // Non-literal -> Length shifted left and LSB=1, then string size in bytes.
+    return ULEB128Size((stringLength << 1) | 1u) + stringLength * sizeof(CHAR);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfilerStringView<CHAR>& aString) {
+    MOZ_RELEASE_ASSERT(
+        aString.Length() < std::numeric_limits<Length>::max() / 2,
+        "Double the string length doesn't fit in Length type");
+    const Length stringLength = static_cast<Length>(aString.Length());
+    if (aString.IsLiteral()) {
+      // Literal -> Length shifted left and LSB=0, then pointer.
+      aEW.WriteULEB128(stringLength << 1 | 0u);
+      aEW.WriteObject(WrapProfileBufferRawPointer(aString.Data()));
+      return;
+    }
+    // Non-literal -> Length shifted left and LSB=1, then string size in bytes.
+    aEW.WriteULEB128(stringLength << 1 | 1u);
+    aEW.WriteBytes(aString.Data(), stringLength * sizeof(CHAR));
+  }
+};
+
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<ProfilerStringView<CHAR>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       ProfilerStringView<CHAR>& aString) {
+    const Length lengthAndIsLiteral = aER.ReadULEB128<Length>();
+    const Length stringLength = lengthAndIsLiteral >> 1;
+    if ((lengthAndIsLiteral & 1u) == 0u) {
+      // LSB==0 -> Literal string, read the string pointer.
+      aString.mStringView = std::basic_string_view<CHAR>(
+          aER.ReadObject<const CHAR*>(), stringLength);
+      aString.mOwnership = ProfilerStringView<CHAR>::Ownership::Literal;
+      return;
+    }
+    // LSB==1 -> Not a literal string, allocate a buffer to store the string
+    // (plus terminal, for safety), and give it to the ProfilerStringView; Note
+    // that this is a secret use of ProfilerStringView, which is intended to
+    // only be used between deserialization and JSON streaming.
+    CHAR* buffer = new CHAR[stringLength + 1];
+    aER.ReadBytes(buffer, stringLength * sizeof(CHAR));
+    buffer[stringLength] = CHAR(0);
+    aString.mStringView = std::basic_string_view<CHAR>(buffer, stringLength);
+    aString.mOwnership =
+        ProfilerStringView<CHAR>::Ownership::OwnedThroughStringView;
+  }
+
+  static ProfilerStringView<CHAR> Read(ProfileBufferEntryReader& aER) {
+    const Length lengthAndIsLiteral = aER.ReadULEB128<Length>();
+    const Length stringLength = lengthAndIsLiteral >> 1;
+    if ((lengthAndIsLiteral & 1u) == 0u) {
+      // LSB==0 -> Literal string, read the string pointer.
+      return ProfilerStringView<CHAR>(
+          aER.ReadObject<const CHAR*>(), stringLength,
+          ProfilerStringView<CHAR>::Ownership::Literal);
+    }
+    // LSB==1 -> Not a literal string, allocate a buffer to store the string
+    // (plus terminal, for safety), and give it to the ProfilerStringView; Note
+    // that this is a secret use of ProfilerStringView, which is intended to
+    // only be used between deserialization and JSON streaming.
+    CHAR* buffer = new CHAR[stringLength + 1];
+    aER.ReadBytes(buffer, stringLength * sizeof(CHAR));
+    buffer[stringLength] = CHAR(0);
+    return ProfilerStringView<CHAR>(
+        buffer, stringLength,
+        ProfilerStringView<CHAR>::Ownership::OwnedThroughStringView);
+  }
+};
+
+// Serializer, Deserializer: MarkerCategory
+
+// The serialization contains both category numbers encoded as ULEB128.
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerCategory> {
+  static Length Bytes(const MarkerCategory& aCategory) {
+    return ULEB128Size(static_cast<uint32_t>(aCategory.CategoryPair()));
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const MarkerCategory& aCategory) {
+    aEW.WriteULEB128(static_cast<uint32_t>(aCategory.CategoryPair()));
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerCategory> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       MarkerCategory& aCategory) {
+    aCategory = Read(aER);
+  }
+
+  static MarkerCategory Read(ProfileBufferEntryReader& aER) {
+    return MarkerCategory(static_cast<baseprofiler::ProfilingCategoryPair>(
+        aER.ReadULEB128<uint32_t>()));
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerTiming
+
+// The serialization starts with the marker phase, followed by one or two
+// timestamps as needed.
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerTiming> {
+  static Length Bytes(const MarkerTiming& aTiming) {
+    MOZ_ASSERT(!aTiming.IsUnspecified());
+    const auto phase = aTiming.MarkerPhase();
+    switch (phase) {
+      case MarkerTiming::Phase::Instant:
+        return SumBytes(phase, aTiming.StartTime());
+      case MarkerTiming::Phase::Interval:
+        return SumBytes(phase, aTiming.StartTime(), aTiming.EndTime());
+      case MarkerTiming::Phase::IntervalStart:
+        return SumBytes(phase, aTiming.StartTime());
+      case MarkerTiming::Phase::IntervalEnd:
+        return SumBytes(phase, aTiming.EndTime());
+      default:
+        MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+                           phase == MarkerTiming::Phase::Interval ||
+                           phase == MarkerTiming::Phase::IntervalStart ||
+                           phase == MarkerTiming::Phase::IntervalEnd);
+        return 0;  // Only to avoid build errors.
+    }
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const MarkerTiming& aTiming) {
+    MOZ_ASSERT(!aTiming.IsUnspecified());
+    const auto phase = aTiming.MarkerPhase();
+    switch (phase) {
+      case MarkerTiming::Phase::Instant:
+        aEW.WriteObjects(phase, aTiming.StartTime());
+        return;
+      case MarkerTiming::Phase::Interval:
+        aEW.WriteObjects(phase, aTiming.StartTime(), aTiming.EndTime());
+        return;
+      case MarkerTiming::Phase::IntervalStart:
+        aEW.WriteObjects(phase, aTiming.StartTime());
+        return;
+      case MarkerTiming::Phase::IntervalEnd:
+        aEW.WriteObjects(phase, aTiming.EndTime());
+        return;
+      default:
+        MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+                           phase == MarkerTiming::Phase::Interval ||
+                           phase == MarkerTiming::Phase::IntervalStart ||
+                           phase == MarkerTiming::Phase::IntervalEnd);
+        return;
+    }
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerTiming> {
+  static void ReadInto(ProfileBufferEntryReader& aER, MarkerTiming& aTiming) {
+    aTiming.mPhase = aER.ReadObject<MarkerTiming::Phase>();
+    switch (aTiming.mPhase) {
+      case MarkerTiming::Phase::Instant:
+        aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+        aTiming.mEndTime = TimeStamp{};
+        break;
+      case MarkerTiming::Phase::Interval:
+        aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+        aTiming.mEndTime = aER.ReadObject<TimeStamp>();
+        break;
+      case MarkerTiming::Phase::IntervalStart:
+        aTiming.mStartTime = aER.ReadObject<TimeStamp>();
+        aTiming.mEndTime = TimeStamp{};
+        break;
+      case MarkerTiming::Phase::IntervalEnd:
+        aTiming.mStartTime = TimeStamp{};
+        aTiming.mEndTime = aER.ReadObject<TimeStamp>();
+        break;
+      default:
+        MOZ_RELEASE_ASSERT(aTiming.mPhase == MarkerTiming::Phase::Instant ||
+                           aTiming.mPhase == MarkerTiming::Phase::Interval ||
+                           aTiming.mPhase ==
+                               MarkerTiming::Phase::IntervalStart ||
+                           aTiming.mPhase == MarkerTiming::Phase::IntervalEnd);
+        break;
+    }
+  }
+
+  static MarkerTiming Read(ProfileBufferEntryReader& aER) {
+    TimeStamp start;
+    TimeStamp end;
+    auto phase = aER.ReadObject<MarkerTiming::Phase>();
+    switch (phase) {
+      case MarkerTiming::Phase::Instant:
+        start = aER.ReadObject<TimeStamp>();
+        break;
+      case MarkerTiming::Phase::Interval:
+        start = aER.ReadObject<TimeStamp>();
+        end = aER.ReadObject<TimeStamp>();
+        break;
+      case MarkerTiming::Phase::IntervalStart:
+        start = aER.ReadObject<TimeStamp>();
+        break;
+      case MarkerTiming::Phase::IntervalEnd:
+        end = aER.ReadObject<TimeStamp>();
+        break;
+      default:
+        MOZ_RELEASE_ASSERT(phase == MarkerTiming::Phase::Instant ||
+                           phase == MarkerTiming::Phase::Interval ||
+                           phase == MarkerTiming::Phase::IntervalStart ||
+                           phase == MarkerTiming::Phase::IntervalEnd);
+        break;
+    }
+    return MarkerTiming(start, end, phase);
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerStack
+
+// The serialization only contains the `ProfileChunkedBuffer` from the
+// backtrace; if there is no backtrace or if it's empty, this will implicitly
+// store a nullptr (see
+// `ProfileBufferEntryWriter::Serializer<ProfilerChunkedBuffer*>`).
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerStack> {
+  static Length Bytes(const MarkerStack& aStack) {
+    return SumBytes(aStack.GetChunkedBuffer());
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const MarkerStack& aStack) {
+    aEW.WriteObject(aStack.GetChunkedBuffer());
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerStack> {
+  static void ReadInto(ProfileBufferEntryReader& aER, MarkerStack& aStack) {
+    aStack = Read(aER);
+  }
+
+  static MarkerStack Read(ProfileBufferEntryReader& aER) {
+    return MarkerStack(aER.ReadObject<UniquePtr<ProfileChunkedBuffer>>());
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Serializer, Deserializer: MarkerOptions
+
+// The serialization contains all members (either trivially-copyable, or they
+// provide their specialization above).
+template <>
+struct ProfileBufferEntryWriter::Serializer<MarkerOptions> {
+  static Length Bytes(const MarkerOptions& aOptions) {
+    return SumBytes(aOptions.ThreadId(), aOptions.Timing(), aOptions.Stack(),
+                    aOptions.InnerWindowId());
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const MarkerOptions& aOptions) {
+    aEW.WriteObjects(aOptions.ThreadId(), aOptions.Timing(), aOptions.Stack(),
+                     aOptions.InnerWindowId());
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<MarkerOptions> {
+  static void ReadInto(ProfileBufferEntryReader& aER, MarkerOptions& aOptions) {
+    aER.ReadIntoObjects(aOptions.mThreadId, aOptions.mTiming, aOptions.mStack,
+                        aOptions.mInnerWindowId);
+  }
+
+  static MarkerOptions Read(ProfileBufferEntryReader& aER) {
+    MarkerOptions options;
+    ReadInto(aER, options);
+    return options;
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // BaseProfilerMarkersDetail_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h b/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h
new file mode 100644
index 0000000000..aa85b41896
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerMarkersPrerequisites.h
@@ -0,0 +1,866 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This header contains basic definitions required to create marker types, and
+// to add markers to the profiler buffers.
+//
+// In most cases, #include "mozilla/BaseProfilerMarkers.h" instead, or
+// #include "mozilla/BaseProfilerMarkerTypes.h" for common marker types.
+
+#ifndef BaseProfilerMarkersPrerequisites_h
+#define BaseProfilerMarkersPrerequisites_h
+
+#ifdef MOZ_GECKO_PROFILER
+
+#  include "BaseProfilingCategory.h"
+#  include "mozilla/Maybe.h"
+#  include "mozilla/ProfileChunkedBuffer.h"
+#  include "mozilla/TimeStamp.h"
+#  include "mozilla/UniquePtr.h"
+#  include "mozilla/Variant.h"
+
+#  include <initializer_list>
+#  include <string_view>
+#  include <string>
+#  include <type_traits>
+#  include <utility>
+#  include <vector>
+
+// TODO: Move common stuff to shared header instead.
+#  include "BaseProfiler.h"
+
+namespace mozilla {
+
+// Return a NotNull<const CHAR*> pointing at the literal empty string `""`.
+template <typename CHAR>
+constexpr const CHAR* LiteralEmptyStringPointer() {
+  static_assert(std::is_same_v<CHAR, char> || std::is_same_v<CHAR, char16_t>,
+                "Only char and char16_t are supported in Firefox");
+  if constexpr (std::is_same_v<CHAR, char>) {
+    return "";
+  }
+  if constexpr (std::is_same_v<CHAR, char16_t>) {
+    return u"";
+  }
+}
+
+// Return a string_view<CHAR> pointing at the literal empty string.
+template <typename CHAR>
+constexpr std::basic_string_view<CHAR> LiteralEmptyStringView() {
+  static_assert(std::is_same_v<CHAR, char> || std::is_same_v<CHAR, char16_t>,
+                "Only char and char16_t are supported in Firefox");
+  // Use `operator""sv()` from <string_view>.
+  using namespace std::literals::string_view_literals;
+  if constexpr (std::is_same_v<CHAR, char>) {
+    return ""sv;
+  }
+  if constexpr (std::is_same_v<CHAR, char16_t>) {
+    return u""sv;
+  }
+}
+
+// General string view, optimized for short on-stack life before serialization,
+// and between deserialization and JSON-streaming.
+template <typename CHAR>
+class MOZ_STACK_CLASS ProfilerStringView {
+ public:
+  // Default constructor points at "" (literal empty string).
+  constexpr ProfilerStringView() = default;
+
+  // Don't allow copy.
+  ProfilerStringView(const ProfilerStringView&) = delete;
+  ProfilerStringView& operator=(const ProfilerStringView&) = delete;
+
+  // Allow move. For consistency the moved-from string is always reset to "".
+  constexpr ProfilerStringView(ProfilerStringView&& aOther)
+      : mStringView(std::move(aOther.mStringView)),
+        mOwnership(aOther.mOwnership) {
+    if (mOwnership == Ownership::OwnedThroughStringView) {
+      // We now own the buffer, make the other point at the literal "".
+      aOther.mStringView = LiteralEmptyStringView<CHAR>();
+      aOther.mOwnership = Ownership::Literal;
+    }
+  }
+  constexpr ProfilerStringView& operator=(ProfilerStringView&& aOther) {
+    mStringView = std::move(aOther.mStringView);
+    mOwnership = aOther.mOwnership;
+    if (mOwnership == Ownership::OwnedThroughStringView) {
+      // We now own the buffer, make the other point at the literal "".
+      aOther.mStringView = LiteralEmptyStringView<CHAR>();
+      aOther.mOwnership = Ownership::Literal;
+    }
+    return *this;
+  }
+
+  ~ProfilerStringView() {
+    if (MOZ_UNLIKELY(mOwnership == Ownership::OwnedThroughStringView)) {
+      // We own the buffer pointed at by mStringView, destroy it.
+      // This is only used between deserialization and streaming.
+      delete mStringView.data();
+    }
+  }
+
+  // Implicit construction from nullptr, points at "" (literal empty string).
+  constexpr MOZ_IMPLICIT ProfilerStringView(decltype(nullptr)) {}
+
+  // Implicit constructor from a literal string.
+  template <size_t Np1>
+  constexpr MOZ_IMPLICIT ProfilerStringView(const CHAR (&aLiteralString)[Np1])
+      : ProfilerStringView(aLiteralString, Np1 - 1, Ownership::Literal) {}
+
+  // Constructor from a non-literal string.
+  constexpr ProfilerStringView(const CHAR* aString, size_t aLength)
+      : ProfilerStringView(aString, aLength, Ownership::Reference) {}
+
+  // Implicit constructor from a string_view.
+  constexpr MOZ_IMPLICIT ProfilerStringView(
+      const std::basic_string_view<CHAR>& aStringView)
+      : ProfilerStringView(aStringView.data(), aStringView.length(),
+                           Ownership::Reference) {}
+
+  // Implicit constructor from an expiring string_view. We assume that the
+  // pointed-at string will outlive this ProfilerStringView.
+  constexpr MOZ_IMPLICIT ProfilerStringView(
+      std::basic_string_view<CHAR>&& aStringView)
+      : ProfilerStringView(aStringView.data(), aStringView.length(),
+                           Ownership::Reference) {}
+
+  // Implicit constructor from std::string.
+  constexpr MOZ_IMPLICIT ProfilerStringView(
+      const std::basic_string<CHAR>& aString)
+      : ProfilerStringView(aString.data(), aString.length(),
+                           Ownership::Reference) {}
+
+  // Construction from a raw pointer to a null-terminated string.
+  // This is a named class-static function to make it more obvious where work is
+  // being done (to determine the string length), and encourage users to instead
+  // provide a length, if already known.
+  // TODO: Find callers and convert them to constructor instead if possible.
+  static constexpr ProfilerStringView WrapNullTerminatedString(
+      const CHAR* aString) {
+    return ProfilerStringView(
+        aString, aString ? std::char_traits<CHAR>::length(aString) : 0,
+        Ownership::Reference);
+  }
+
+  // Implicit constructor for an object with member functions `Data()`
+  // `Length()`, and `IsLiteral()`, common in xpcom strings.
+  template <
+      typename String,
+      typename DataReturnType = decltype(std::declval<const String>().Data()),
+      typename LengthReturnType =
+          decltype(std::declval<const String>().Length()),
+      typename IsLiteralReturnType =
+          decltype(std::declval<const String>().IsLiteral()),
+      typename =
+          std::enable_if_t<std::is_convertible_v<DataReturnType, const CHAR*> &&
+                           std::is_integral_v<LengthReturnType> &&
+                           std::is_same_v<IsLiteralReturnType, bool>>>
+  constexpr MOZ_IMPLICIT ProfilerStringView(const String& aString)
+      : ProfilerStringView(
+            static_cast<const CHAR*>(aString.Data()), aString.Length(),
+            aString.IsLiteral() ? Ownership::Literal : Ownership::Reference) {}
+
+  [[nodiscard]] constexpr const std::basic_string_view<CHAR>& StringView()
+      const {
+    return mStringView;
+  }
+
+  [[nodiscard]] constexpr const CHAR* Data() const {
+    return mStringView.data();
+  }
+
+  [[nodiscard]] constexpr size_t Length() const { return mStringView.length(); }
+
+  [[nodiscard]] constexpr bool IsLiteral() const {
+    return mOwnership == Ownership::Literal;
+  }
+  [[nodiscard]] constexpr bool IsReference() const {
+    return mOwnership == Ownership::Reference;
+  }
+  // No `IsOwned...()` because it's a secret, only used internally!
+
+  [[nodiscard]] operator Span<const CHAR>() const {
+    return Span<const CHAR>(Data(), Length());
+  }
+
+ private:
+  enum class Ownership { Literal, Reference, OwnedThroughStringView };
+
+  // Allow deserializer to store anything here.
+  friend ProfileBufferEntryReader::Deserializer<ProfilerStringView>;
+
+  constexpr ProfilerStringView(const CHAR* aString, size_t aLength,
+                               Ownership aOwnership)
+      : mStringView(aString ? std::basic_string_view<CHAR>(aString, aLength)
+                            : LiteralEmptyStringView<CHAR>()),
+        mOwnership(aString ? aOwnership : Ownership::Literal) {}
+
+  // String view to an outside string (literal or reference).
+  // We may actually own the pointed-at buffer, but it is only used internally
+  // between deserialization and JSON streaming.
+  std::basic_string_view<CHAR> mStringView = LiteralEmptyStringView<CHAR>();
+
+  Ownership mOwnership = Ownership::Literal;
+};
+
+using ProfilerString8View = ProfilerStringView<char>;
+using ProfilerString16View = ProfilerStringView<char16_t>;
+
+// This compulsory marker parameter contains the required category information.
+class MarkerCategory {
+ public:
+  // Constructor from category pair (includes both super- and sub-categories).
+  constexpr explicit MarkerCategory(
+      baseprofiler::ProfilingCategoryPair aCategoryPair)
+      : mCategoryPair(aCategoryPair) {}
+
+  // Returns the stored category pair.
+  constexpr baseprofiler::ProfilingCategoryPair CategoryPair() const {
+    return mCategoryPair;
+  }
+
+  // Returns the super-category from the stored category pair.
+  baseprofiler::ProfilingCategory GetCategory() const {
+    return GetProfilingCategoryPairInfo(mCategoryPair).mCategory;
+  }
+
+ private:
+  baseprofiler::ProfilingCategoryPair mCategoryPair =
+      baseprofiler::ProfilingCategoryPair::OTHER;
+};
+
+namespace baseprofiler::category {
+
+// Each category pair name constructs a MarkerCategory.
+// E.g.: mozilla::baseprofiler::category::OTHER_Profiling
+// Profiler macros will take the category name alone without namespace.
+// E.g.: `PROFILER_MARKER_UNTYPED("name", OTHER_Profiling)`
+#  define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color)
+#  define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) \
+    static constexpr MarkerCategory name{ProfilingCategoryPair::name};
+#  define CATEGORY_ENUM_END_CATEGORY
+MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY,
+                            CATEGORY_ENUM_SUBCATEGORY,
+                            CATEGORY_ENUM_END_CATEGORY)
+#  undef CATEGORY_ENUM_BEGIN_CATEGORY
+#  undef CATEGORY_ENUM_SUBCATEGORY
+#  undef CATEGORY_ENUM_END_CATEGORY
+
+// Import `MarkerCategory` into this namespace. This will allow using this type
+// dynamically in macros that prepend `::mozilla::baseprofiler::category::` to
+// the given category, e.g.:
+// `PROFILER_MARKER_UNTYPED("name", MarkerCategory(...))`
+using MarkerCategory = ::mozilla::MarkerCategory;
+
+}  // namespace baseprofiler::category
+
+// The classes below are all embedded in a `MarkerOptions` object.
+class MarkerOptions;
+
+// This marker option captures a given thread id.
+// If left unspecified (by default construction) during the add-marker call, the
+// current thread id will be used then.
+class MarkerThreadId {
+ public:
+  // Default constructor, keeps the thread id unspecified.
+  constexpr MarkerThreadId() = default;
+
+  // Constructor from a given thread id.
+  constexpr explicit MarkerThreadId(int aThreadId) : mThreadId(aThreadId) {}
+
+  // Use the current thread's id.
+  static MarkerThreadId CurrentThread() {
+    return MarkerThreadId(baseprofiler::profiler_current_thread_id());
+  }
+
+  // Use the main thread's id. This can be useful to record a marker from a
+  // possibly-unregistered thread, and display it in the main thread track.
+  static MarkerThreadId MainThread() {
+    return MarkerThreadId(baseprofiler::profiler_main_thread_id());
+  }
+
+  [[nodiscard]] constexpr int ThreadId() const { return mThreadId; }
+
+  [[nodiscard]] constexpr bool IsUnspecified() const { return mThreadId == 0; }
+
+ private:
+  int mThreadId = 0;
+};
+
+// This marker option contains marker timing information.
+// This class encapsulates the logic for correctly storing a marker based on its
+// Use the static methods to create the MarkerTiming. This is a transient object
+// that is being used to enforce the constraints of the combinations of the
+// data.
+class MarkerTiming {
+ public:
+  // The following static methods are used to create the MarkerTiming based on
+  // the type that it is.
+
+  static MarkerTiming InstantAt(const TimeStamp& aTime) {
+    MOZ_ASSERT(!aTime.IsNull(), "Time is null for an instant marker.");
+    return MarkerTiming{aTime, TimeStamp{}, MarkerTiming::Phase::Instant};
+  }
+
+  static MarkerTiming InstantNow() {
+    return InstantAt(TimeStamp::NowUnfuzzed());
+  }
+
+  static MarkerTiming Interval(const TimeStamp& aStartTime,
+                               const TimeStamp& aEndTime) {
+    MOZ_ASSERT(!aStartTime.IsNull(),
+               "Start time is null for an interval marker.");
+    MOZ_ASSERT(!aEndTime.IsNull(), "End time is null for an interval marker.");
+    return MarkerTiming{aStartTime, aEndTime, MarkerTiming::Phase::Interval};
+  }
+
+  static MarkerTiming IntervalUntilNowFrom(const TimeStamp& aStartTime) {
+    return Interval(aStartTime, TimeStamp::NowUnfuzzed());
+  }
+
+  static MarkerTiming IntervalStart(
+      const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) {
+    MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval start marker.");
+    return MarkerTiming{aTime, TimeStamp{}, MarkerTiming::Phase::IntervalStart};
+  }
+
+  static MarkerTiming IntervalEnd(
+      const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) {
+    MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval end marker.");
+    return MarkerTiming{TimeStamp{}, aTime, MarkerTiming::Phase::IntervalEnd};
+  }
+
+  // Set the interval end in this timing.
+  // If there was already a start time, this makes it a full interval.
+  void SetIntervalEnd(const TimeStamp& aTime = TimeStamp::NowUnfuzzed()) {
+    MOZ_ASSERT(!aTime.IsNull(), "Time is null for an interval end marker.");
+    mEndTime = aTime;
+    mPhase = mStartTime.IsNull() ? Phase::IntervalEnd : Phase::Interval;
+  }
+
+  [[nodiscard]] const TimeStamp& StartTime() const { return mStartTime; }
+  [[nodiscard]] const TimeStamp& EndTime() const { return mEndTime; }
+
+  enum class Phase : uint8_t {
+    Instant = 0,
+    Interval = 1,
+    IntervalStart = 2,
+    IntervalEnd = 3,
+  };
+
+  [[nodiscard]] Phase MarkerPhase() const {
+    MOZ_ASSERT(!IsUnspecified());
+    return mPhase;
+  }
+
+  // The following getter methods are used to put the value into the buffer for
+  // storage.
+  [[nodiscard]] double GetStartTime() const {
+    MOZ_ASSERT(!IsUnspecified());
+    // If mStartTime is null (e.g., for IntervalEnd), this will output 0.0 as
+    // expected.
+    return MarkerTiming::timeStampToDouble(mStartTime);
+  }
+
+  [[nodiscard]] double GetEndTime() const {
+    MOZ_ASSERT(!IsUnspecified());
+    // If mEndTime is null (e.g., for Instant or IntervalStart), this will
+    // output 0.0 as expected.
+    return MarkerTiming::timeStampToDouble(mEndTime);
+  }
+
+  [[nodiscard]] uint8_t GetPhase() const {
+    MOZ_ASSERT(!IsUnspecified());
+    return static_cast<uint8_t>(mPhase);
+  }
+
+ private:
+  friend ProfileBufferEntryWriter::Serializer<MarkerTiming>;
+  friend ProfileBufferEntryReader::Deserializer<MarkerTiming>;
+  friend MarkerOptions;
+
+  // Default timing leaves it internally "unspecified", serialization getters
+  // and add-marker functions will default to `InstantNow()`.
+  constexpr MarkerTiming() = default;
+
+  // This should only be used by internal profiler code.
+  [[nodiscard]] bool IsUnspecified() const {
+    return mStartTime.IsNull() && mEndTime.IsNull();
+  }
+
+  // Full constructor, used by static factory functions.
+  constexpr MarkerTiming(const TimeStamp& aStartTime, const TimeStamp& aEndTime,
+                         Phase aPhase)
+      : mStartTime(aStartTime), mEndTime(aEndTime), mPhase(aPhase) {}
+
+  static double timeStampToDouble(const TimeStamp& time) {
+    if (time.IsNull()) {
+      // The Phase lets us know not to use this value.
+      return 0;
+    }
+    return (time - TimeStamp::ProcessCreation()).ToMilliseconds();
+  }
+
+  TimeStamp mStartTime;
+  TimeStamp mEndTime;
+  Phase mPhase = Phase::Instant;
+};
+
+// This marker option allows three cases:
+// - By default, no stacks are captured.
+// - The caller can request a stack capture, and the add-marker code will take
+//   care of it in the most efficient way.
+// - The caller can still provide an existing backtrace, for cases where a
+//   marker reports something that happened elsewhere.
+class MarkerStack {
+ public:
+  // Default constructor, no capture.
+  constexpr MarkerStack() = default;
+
+  // Disallow copy.
+  MarkerStack(const MarkerStack&) = delete;
+  MarkerStack& operator=(const MarkerStack&) = delete;
+
+  // Allow move.
+  MarkerStack(MarkerStack&& aOther)
+      : mIsCaptureRequested(aOther.mIsCaptureRequested),
+        mOptionalChunkedBufferStorage(
+            std::move(aOther.mOptionalChunkedBufferStorage)),
+        mChunkedBuffer(aOther.mChunkedBuffer) {
+    AssertInvariants();
+    aOther.Clear();
+  }
+  MarkerStack& operator=(MarkerStack&& aOther) {
+    mIsCaptureRequested = aOther.mIsCaptureRequested;
+    mOptionalChunkedBufferStorage =
+        std::move(aOther.mOptionalChunkedBufferStorage);
+    mChunkedBuffer = aOther.mChunkedBuffer;
+    AssertInvariants();
+    aOther.Clear();
+    return *this;
+  }
+
+  // Take ownership of a backtrace. If null or empty, equivalent to NoStack().
+  explicit MarkerStack(UniquePtr<ProfileChunkedBuffer>&& aExternalChunkedBuffer)
+      : mIsCaptureRequested(false),
+        mOptionalChunkedBufferStorage(
+            (!aExternalChunkedBuffer || aExternalChunkedBuffer->IsEmpty())
+                ? nullptr
+                : std::move(aExternalChunkedBuffer)),
+        mChunkedBuffer(mOptionalChunkedBufferStorage.get()) {
+    AssertInvariants();
+  }
+
+  // Use an existing backtrace stored elsewhere, which the user must guarantee
+  // is alive during the add-marker call. If empty, equivalent to NoStack().
+  explicit MarkerStack(ProfileChunkedBuffer& aExternalChunkedBuffer)
+      : mIsCaptureRequested(false),
+        mChunkedBuffer(aExternalChunkedBuffer.IsEmpty()
+                           ? nullptr
+                           : &aExternalChunkedBuffer) {
+    AssertInvariants();
+  }
+
+  // Don't capture a stack in this marker.
+  static MarkerStack NoStack() { return MarkerStack(false); }
+
+  // Capture a stack when adding this marker.
+  static MarkerStack Capture() {
+    // Actual capture will be handled inside profiler_add_marker.
+    return MarkerStack(true);
+  }
+
+  // Optionally capture a stack, useful for avoiding long-winded ternaries.
+  static MarkerStack MaybeCapture(bool aDoCapture) {
+    return MarkerStack(aDoCapture);
+  }
+
+  // Use an existing backtrace stored elsewhere, which the user must guarantee
+  // is alive during the add-marker call. If empty, equivalent to NoStack().
+  static MarkerStack UseBacktrace(
+      ProfileChunkedBuffer& aExternalChunkedBuffer) {
+    return MarkerStack(aExternalChunkedBuffer);
+  }
+
+  // Take ownership of a backtrace previously captured with
+  // `profiler_capture_backtrace()`. If null, equivalent to NoStack().
+  static MarkerStack TakeBacktrace(
+      UniquePtr<ProfileChunkedBuffer>&& aExternalChunkedBuffer) {
+    return MarkerStack(std::move(aExternalChunkedBuffer));
+  }
+
+  [[nodiscard]] bool IsCaptureNeeded() const {
+    // If the chunked buffer already contains something, consider the capture
+    // request already fulfilled.
+    return mIsCaptureRequested;
+  }
+
+  ProfileChunkedBuffer* GetChunkedBuffer() const { return mChunkedBuffer; }
+
+  // Use backtrace after a request. If null, equivalent to NoStack().
+  void UseRequestedBacktrace(ProfileChunkedBuffer* aExternalChunkedBuffer) {
+    MOZ_RELEASE_ASSERT(IsCaptureNeeded());
+    mIsCaptureRequested = false;
+    if (aExternalChunkedBuffer && !aExternalChunkedBuffer->IsEmpty()) {
+      // We only need to use the provided buffer if it is not empty.
+      mChunkedBuffer = aExternalChunkedBuffer;
+    }
+    AssertInvariants();
+  }
+
+  void Clear() {
+    mIsCaptureRequested = false;
+    mOptionalChunkedBufferStorage.reset();
+    mChunkedBuffer = nullptr;
+    AssertInvariants();
+  }
+
+ private:
+  explicit MarkerStack(bool aIsCaptureRequested)
+      : mIsCaptureRequested(aIsCaptureRequested) {
+    AssertInvariants();
+  }
+
+  // This should be called after every constructor and non-const function.
+  void AssertInvariants() const {
+#  ifdef DEBUG
+    if (mIsCaptureRequested) {
+      MOZ_ASSERT(!mOptionalChunkedBufferStorage,
+                 "We should not hold a buffer when capture is requested");
+      MOZ_ASSERT(!mChunkedBuffer,
+                 "We should not point at a buffer when capture is requested");
+    } else {
+      if (mOptionalChunkedBufferStorage) {
+        MOZ_ASSERT(mChunkedBuffer == mOptionalChunkedBufferStorage.get(),
+                   "Non-null mOptionalChunkedBufferStorage must be pointed-at "
+                   "by mChunkedBuffer");
+      }
+      if (mChunkedBuffer) {
+        MOZ_ASSERT(!mChunkedBuffer->IsEmpty(),
+                   "Non-null mChunkedBuffer must not be empty");
+      }
+    }
+#  endif  // DEBUG
+  }
+
+  // True if a capture is requested when marker is added to the profile buffer.
+  bool mIsCaptureRequested = false;
+
+  // Optional storage for the backtrace, in case it was captured before the
+  // add-marker call.
+  UniquePtr<ProfileChunkedBuffer> mOptionalChunkedBufferStorage;
+
+  // If not null, this points to the backtrace. It may point to a backtrace
+  // temporarily stored on the stack, or to mOptionalChunkedBufferStorage.
+  ProfileChunkedBuffer* mChunkedBuffer = nullptr;
+};
+
+// This marker option captures a given inner window id.
+class MarkerInnerWindowId {
+ public:
+  // Default constructor, it leaves the id unspecified.
+  constexpr MarkerInnerWindowId() = default;
+
+  // Constructor with a specified inner window id.
+  constexpr explicit MarkerInnerWindowId(uint64_t i) : mInnerWindowId(i) {}
+
+  // Constructor with either specified inner window id or Nothing.
+  constexpr explicit MarkerInnerWindowId(const Maybe<uint64_t>& i)
+      : mInnerWindowId(i.valueOr(scNoId)) {}
+
+  // Explicit option with unspecified id.
+  constexpr static MarkerInnerWindowId NoId() { return MarkerInnerWindowId{}; }
+
+  [[nodiscard]] bool IsUnspecified() const { return mInnerWindowId == scNoId; }
+
+  [[nodiscard]] constexpr uint64_t Id() const { return mInnerWindowId; }
+
+ private:
+  static constexpr uint64_t scNoId = 0;
+  uint64_t mInnerWindowId = scNoId;
+};
+
+// This class combines each of the possible marker options above.
+class MarkerOptions {
+ public:
+  // Constructor from individual options (including none).
+  // Implicit to allow `{}` and one option type as-is.
+  // Options that are not provided here are defaulted. In particular, timing
+  // defaults to `MarkerTiming::InstantNow()` when the marker is recorded.
+  template <typename... Options>
+  MOZ_IMPLICIT MarkerOptions(Options&&... aOptions) {
+    (Set(std::forward<Options>(aOptions)), ...);
+  }
+
+  // Disallow copy.
+  MarkerOptions(const MarkerOptions&) = delete;
+  MarkerOptions& operator=(const MarkerOptions&) = delete;
+
+  // Allow move.
+  MarkerOptions(MarkerOptions&&) = default;
+  MarkerOptions& operator=(MarkerOptions&&) = default;
+
+  // The embedded `MarkerTiming` hasn't been specified yet.
+  [[nodiscard]] bool IsTimingUnspecified() const {
+    return mTiming.IsUnspecified();
+  }
+
+  // Each option may be added in a chain by e.g.:
+  // `options.Set(MarkerThreadId(123)).Set(MarkerTiming::IntervalEnd())`.
+  // When passed to an add-marker function, it must be an rvalue, either created
+  // on the spot, or `std::move`d from storage, e.g.:
+  // `PROFILER_MARKER_UNTYPED("...", std::move(options).Set(...))`;
+  //
+  // Options can be read by their name (without "Marker"), e.g.: `o.ThreadId()`.
+  // Add "Ref" for a non-const reference, e.g.: `o.ThreadIdRef() = ...;`
+#  define FUNCTIONS_ON_MEMBER(NAME)                      \
+    MarkerOptions& Set(Marker##NAME&& a##NAME)& {        \
+      m##NAME = std::move(a##NAME);                      \
+      return *this;                                      \
+    }                                                    \
+                                                         \
+    MarkerOptions&& Set(Marker##NAME&& a##NAME)&& {      \
+      m##NAME = std::move(a##NAME);                      \
+      return std::move(*this);                           \
+    }                                                    \
+                                                         \
+    const Marker##NAME& NAME() const { return m##NAME; } \
+                                                         \
+    Marker##NAME& NAME##Ref() { return m##NAME; }
+
+  FUNCTIONS_ON_MEMBER(ThreadId);
+  FUNCTIONS_ON_MEMBER(Timing);
+  FUNCTIONS_ON_MEMBER(Stack);
+  FUNCTIONS_ON_MEMBER(InnerWindowId);
+#  undef FUNCTIONS_ON_MEMBER
+
+ private:
+  friend ProfileBufferEntryReader::Deserializer<MarkerOptions>;
+
+  MarkerThreadId mThreadId;
+  MarkerTiming mTiming;
+  MarkerStack mStack;
+  MarkerInnerWindowId mInnerWindowId;
+};
+
+}  // namespace mozilla
+
+namespace mozilla::baseprofiler::markers {
+
+// Default marker payload types, with no extra information, not even a marker
+// type and payload. This is intended for label-only markers.
+struct NoPayload final {};
+
+}  // namespace mozilla::baseprofiler::markers
+
+namespace mozilla {
+
+class JSONWriter;
+
+// This class collects all the information necessary to stream the JSON schema
+// that informs the front-end how to display a type of markers.
+// It will be created and populated in `MarkerTypeDisplay()` functions in each
+// marker type definition, see Add/Set functions.
+class MarkerSchema {
+ public:
+  enum class Location : unsigned {
+    markerChart,
+    markerTable,
+    // This adds markers to the main marker timeline in the header.
+    timelineOverview,
+    // In the timeline, this is a section that breaks out markers that are
+    // related to memory. When memory counters are enabled, this is its own
+    // track, otherwise it is displayed with the main thread.
+    timelineMemory,
+    // This adds markers to the IPC timeline area in the header.
+    timelineIPC,
+    // This adds markers to the FileIO timeline area in the header.
+    timelineFileIO,
+    // TODO - This is not supported yet.
+    stackChart
+  };
+
+  // Used as constructor parameter, to explicitly specify that the location (and
+  // other display options) are handled as a special case in the front-end.
+  // In this case, *no* schema will be output for this type.
+  struct SpecialFrontendLocation {};
+
+  enum class Format {
+    // ----------------------------------------------------
+    // String types.
+
+    // Show the URL, and handle PII sanitization
+    url,
+    // Show the file path, and handle PII sanitization.
+    filePath,
+    // Important, do not put URL or file path information here, as it will not
+    // be sanitized. Please be careful with including other types of PII here as
+    // well.
+    // e.g. "Label: Some String"
+    string,
+
+    // ----------------------------------------------------
+    // Numeric types
+
+    // For time data that represents a duration of time.
+    // e.g. "Label: 5s, 5ms, 5μs"
+    duration,
+    // Data that happened at a specific time, relative to the start of the
+    // profile. e.g. "Label: 15.5s, 20.5ms, 30.5μs"
+    time,
+    // The following are alternatives to display a time only in a specific unit
+    // of time.
+    seconds,       // "Label: 5s"
+    milliseconds,  // "Label: 5ms"
+    microseconds,  // "Label: 5μs"
+    nanoseconds,   // "Label: 5ns"
+    // e.g. "Label: 5.55mb, 5 bytes, 312.5kb"
+    bytes,
+    // This should be a value between 0 and 1.
+    // "Label: 50%"
+    percentage,
+    // The integer should be used for generic representations of numbers.
+    // Do not use it for time information.
+    // "Label: 52, 5,323, 1,234,567"
+    integer,
+    // The decimal should be used for generic representations of numbers.
+    // Do not use it for time information.
+    // "Label: 52.23, 0.0054, 123,456.78"
+    decimal
+  };
+
+  enum class Searchable { notSearchable, searchable };
+
+  // Marker schema, with a non-empty list of locations where markers should be
+  // shown.
+  // Tech note: Even though `aLocations` are templated arguments, they are
+  // assigned to an `enum class` object, so they can only be of that enum type.
+  template <typename... Locations>
+  explicit MarkerSchema(Location aLocation, Locations... aLocations)
+      : mLocations{aLocation, aLocations...} {}
+
+  // Marker schema for types that have special frontend handling.
+  // Nothing else should be set in this case.
+  // Implicit to allow quick return from MarkerTypeDisplay functions.
+  MOZ_IMPLICIT MarkerSchema(SpecialFrontendLocation) {}
+
+  // Caller must specify location(s) or SpecialFrontendLocation above.
+  MarkerSchema() = delete;
+
+  // Optional labels in the marker chart, the chart tooltip, and the marker
+  // table. If not provided, the marker "name" will be used. The given string
+  // can contain element keys in braces to include data elements streamed by
+  // `StreamJSONMarkerData()`. E.g.: "This is {text}"
+
+#  define LABEL_SETTER(name)                       \
+    MarkerSchema& Set##name(std::string a##name) { \
+      m##name = std::move(a##name);                \
+      return *this;                                \
+    }
+
+  LABEL_SETTER(ChartLabel)
+  LABEL_SETTER(TooltipLabel)
+  LABEL_SETTER(TableLabel)
+
+#  undef LABEL_SETTER
+
+  MarkerSchema& SetAllLabels(std::string aText) {
+    // Here we set the same text in each label.
+    // TODO: Move to a single "label" field once the front-end allows it.
+    SetChartLabel(aText);
+    SetTooltipLabel(aText);
+    SetTableLabel(std::move(aText));
+    return *this;
+  }
+
+  // Each data element that is streamed by `StreamJSONMarkerData()` can be
+  // displayed as indicated by using one of the `Add...` function below.
+  // Each `Add...` will add a line in the full marker description. Parameters:
+  // - `aKey`: Element property name as streamed by `StreamJSONMarkerData()`.
+  // - `aLabel`: Optional prefix. Defaults to the key name.
+  // - `aFormat`: How to format the data element value, see `Format` above.
+  // - `aSearchable`: Optional, indicates if the value is used in searches,
+  //   defaults to false.
+
+  MarkerSchema& AddKeyFormat(std::string aKey, Format aFormat) {
+    mData.emplace_back(mozilla::VariantType<DynamicData>{},
+                       DynamicData{std::move(aKey), mozilla::Nothing{}, aFormat,
+                                   mozilla::Nothing{}});
+    return *this;
+  }
+
+  MarkerSchema& AddKeyLabelFormat(std::string aKey, std::string aLabel,
+                                  Format aFormat) {
+    mData.emplace_back(
+        mozilla::VariantType<DynamicData>{},
+        DynamicData{std::move(aKey), mozilla::Some(std::move(aLabel)), aFormat,
+                    mozilla::Nothing{}});
+    return *this;
+  }
+
+  MarkerSchema& AddKeyFormatSearchable(std::string aKey, Format aFormat,
+                                       Searchable aSearchable) {
+    mData.emplace_back(mozilla::VariantType<DynamicData>{},
+                       DynamicData{std::move(aKey), mozilla::Nothing{}, aFormat,
+                                   mozilla::Some(aSearchable)});
+    return *this;
+  }
+
+  MarkerSchema& AddKeyLabelFormatSearchable(std::string aKey,
+                                            std::string aLabel, Format aFormat,
+                                            Searchable aSearchable) {
+    mData.emplace_back(
+        mozilla::VariantType<DynamicData>{},
+        DynamicData{std::move(aKey), mozilla::Some(std::move(aLabel)), aFormat,
+                    mozilla::Some(aSearchable)});
+    return *this;
+  }
+
+  // The display may also include static rows.
+
+  MarkerSchema& AddStaticLabelValue(std::string aLabel, std::string aValue) {
+    mData.emplace_back(mozilla::VariantType<StaticData>{},
+                       StaticData{std::move(aLabel), std::move(aValue)});
+    return *this;
+  }
+
+  // Internal streaming function.
+  MFBT_API void Stream(JSONWriter& aWriter, const Span<const char>& aName) &&;
+
+ private:
+  MFBT_API static Span<const char> LocationToStringSpan(Location aLocation);
+  MFBT_API static Span<const char> FormatToStringSpan(Format aFormat);
+
+  // List of marker display locations. Empty for SpecialFrontendLocation.
+  std::vector<Location> mLocations;
+  // Labels for different places.
+  std::string mChartLabel;
+  std::string mTooltipLabel;
+  std::string mTableLabel;
+  // Main display, made of zero or more rows of key+label+format or label+value.
+ private:
+  struct DynamicData {
+    std::string mKey;
+    mozilla::Maybe<std::string> mLabel;
+    Format mFormat;
+    mozilla::Maybe<Searchable> mSearchable;
+  };
+  struct StaticData {
+    std::string mLabel;
+    std::string mValue;
+  };
+  using DataRow = mozilla::Variant<DynamicData, StaticData>;
+  using DataRowVector = std::vector<DataRow>;
+
+  DataRowVector mData;
+};
+
+}  // namespace mozilla
+
+#endif  // MOZ_GECKO_PROFILER
+
+#endif  // BaseProfilerMarkersPrerequisites_h
diff --git a/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h b/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h
new file mode 100644
index 0000000000..0a104193c3
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilerSharedLibraries.h
@@ -0,0 +1,146 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BASE_PROFILER_SHARED_LIBRARIES_H_
+#define BASE_PROFILER_SHARED_LIBRARIES_H_
+
+#include "BaseProfiler.h"
+
+#ifndef MOZ_GECKO_PROFILER
+#  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+#include <algorithm>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <vector>
+
+class SharedLibrary {
+ public:
+  SharedLibrary(uintptr_t aStart, uintptr_t aEnd, uintptr_t aOffset,
+                const std::string& aBreakpadId, const std::string& aModuleName,
+                const std::string& aModulePath, const std::string& aDebugName,
+                const std::string& aDebugPath, const std::string& aVersion,
+                const char* aArch)
+      : mStart(aStart),
+        mEnd(aEnd),
+        mOffset(aOffset),
+        mBreakpadId(aBreakpadId),
+        mModuleName(aModuleName),
+        mModulePath(aModulePath),
+        mDebugName(aDebugName),
+        mDebugPath(aDebugPath),
+        mVersion(aVersion),
+        mArch(aArch) {}
+
+  SharedLibrary(const SharedLibrary& aEntry)
+      : mStart(aEntry.mStart),
+        mEnd(aEntry.mEnd),
+        mOffset(aEntry.mOffset),
+        mBreakpadId(aEntry.mBreakpadId),
+        mModuleName(aEntry.mModuleName),
+        mModulePath(aEntry.mModulePath),
+        mDebugName(aEntry.mDebugName),
+        mDebugPath(aEntry.mDebugPath),
+        mVersion(aEntry.mVersion),
+        mArch(aEntry.mArch) {}
+
+  SharedLibrary& operator=(const SharedLibrary& aEntry) {
+    // Gracefully handle self assignment
+    if (this == &aEntry) return *this;
+
+    mStart = aEntry.mStart;
+    mEnd = aEntry.mEnd;
+    mOffset = aEntry.mOffset;
+    mBreakpadId = aEntry.mBreakpadId;
+    mModuleName = aEntry.mModuleName;
+    mModulePath = aEntry.mModulePath;
+    mDebugName = aEntry.mDebugName;
+    mDebugPath = aEntry.mDebugPath;
+    mVersion = aEntry.mVersion;
+    mArch = aEntry.mArch;
+    return *this;
+  }
+
+  bool operator==(const SharedLibrary& other) const {
+    return (mStart == other.mStart) && (mEnd == other.mEnd) &&
+           (mOffset == other.mOffset) && (mModuleName == other.mModuleName) &&
+           (mModulePath == other.mModulePath) &&
+           (mDebugName == other.mDebugName) &&
+           (mDebugPath == other.mDebugPath) &&
+           (mBreakpadId == other.mBreakpadId) && (mVersion == other.mVersion) &&
+           (mArch == other.mArch);
+  }
+
+  uintptr_t GetStart() const { return mStart; }
+  uintptr_t GetEnd() const { return mEnd; }
+  uintptr_t GetOffset() const { return mOffset; }
+  const std::string& GetBreakpadId() const { return mBreakpadId; }
+  const std::string& GetModuleName() const { return mModuleName; }
+  const std::string& GetModulePath() const { return mModulePath; }
+  const std::string& GetDebugName() const { return mDebugName; }
+  const std::string& GetDebugPath() const { return mDebugPath; }
+  const std::string& GetVersion() const { return mVersion; }
+  const std::string& GetArch() const { return mArch; }
+
+ private:
+  SharedLibrary() : mStart{0}, mEnd{0}, mOffset{0} {}
+
+  uintptr_t mStart;
+  uintptr_t mEnd;
+  uintptr_t mOffset;
+  std::string mBreakpadId;
+  std::string mModuleName;
+  std::string mModulePath;
+  std::string mDebugName;
+  std::string mDebugPath;
+  std::string mVersion;
+  std::string mArch;
+};
+
+static bool CompareAddresses(const SharedLibrary& first,
+                             const SharedLibrary& second) {
+  return first.GetStart() < second.GetStart();
+}
+
+class SharedLibraryInfo {
+ public:
+  static SharedLibraryInfo GetInfoForSelf();
+  static void Initialize();
+
+  SharedLibraryInfo() {}
+
+  void AddSharedLibrary(SharedLibrary entry) { mEntries.push_back(entry); }
+
+  const SharedLibrary& GetEntry(size_t i) const { return mEntries[i]; }
+
+  SharedLibrary& GetMutableEntry(size_t i) { return mEntries[i]; }
+
+  // Removes items in the range [first, last)
+  // i.e. element at the "last" index is not removed
+  void RemoveEntries(size_t first, size_t last) {
+    mEntries.erase(mEntries.begin() + first, mEntries.begin() + last);
+  }
+
+  bool Contains(const SharedLibrary& searchItem) const {
+    return (mEntries.end() !=
+            std::find(mEntries.begin(), mEntries.end(), searchItem));
+  }
+
+  size_t GetSize() const { return mEntries.size(); }
+
+  void SortByAddress() {
+    std::sort(mEntries.begin(), mEntries.end(), CompareAddresses);
+  }
+
+  void Clear() { mEntries.clear(); }
+
+ private:
+  std::vector<SharedLibrary> mEntries;
+};
+
+#endif  // BASE_PROFILER_SHARED_LIBRARIES_H_
diff --git a/mozglue/baseprofiler/public/BaseProfilingCategory.h b/mozglue/baseprofiler/public/BaseProfilingCategory.h
new file mode 100644
index 0000000000..6892ec40f4
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilingCategory.h
@@ -0,0 +1,72 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilingCategory_h
+#define BaseProfilingCategory_h
+
+#ifndef MOZ_GECKO_PROFILER
+#  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+#include "mozilla/Types.h"
+
+#include <cstdint>
+
+#include "ProfilingCategoryList.h"
+
+namespace mozilla {
+namespace baseprofiler {
+
+// clang-format off
+
+// An enum that lists all possible category pairs in one list.
+// This is the enum that is used in profiler stack labels. Having one list that
+// includes subcategories from all categories in one list allows assigning the
+// category pair to a stack label with just one number.
+#define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color)
+#define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString) name,
+#define CATEGORY_ENUM_END_CATEGORY
+enum class ProfilingCategoryPair : uint32_t {
+  MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY,
+                              CATEGORY_ENUM_SUBCATEGORY,
+                              CATEGORY_ENUM_END_CATEGORY)
+  COUNT,
+  LAST = COUNT - 1,
+};
+#undef CATEGORY_ENUM_BEGIN_CATEGORY
+#undef CATEGORY_ENUM_SUBCATEGORY
+#undef CATEGORY_ENUM_END_CATEGORY
+
+// An enum that lists just the categories without their subcategories.
+#define SUPERCATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color) name,
+#define SUPERCATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString)
+#define SUPERCATEGORY_ENUM_END_CATEGORY
+enum class ProfilingCategory : uint32_t {
+  MOZ_PROFILING_CATEGORY_LIST(SUPERCATEGORY_ENUM_BEGIN_CATEGORY,
+                              SUPERCATEGORY_ENUM_SUBCATEGORY,
+                              SUPERCATEGORY_ENUM_END_CATEGORY)
+  COUNT,
+  LAST = COUNT - 1,
+};
+#undef SUPERCATEGORY_ENUM_BEGIN_CATEGORY
+#undef SUPERCATEGORY_ENUM_SUBCATEGORY
+#undef SUPERCATEGORY_ENUM_END_CATEGORY
+
+// clang-format on
+
+struct ProfilingCategoryPairInfo {
+  ProfilingCategory mCategory;
+  uint32_t mSubcategoryIndex;
+  const char* mLabel;
+};
+
+MFBT_API const ProfilingCategoryPairInfo& GetProfilingCategoryPairInfo(
+    ProfilingCategoryPair aCategoryPair);
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif /* BaseProfilingCategory_h */
diff --git a/mozglue/baseprofiler/public/BaseProfilingStack.h b/mozglue/baseprofiler/public/BaseProfilingStack.h
new file mode 100644
index 0000000000..214fc1ebbf
--- /dev/null
+++ b/mozglue/baseprofiler/public/BaseProfilingStack.h
@@ -0,0 +1,520 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseProfilingStack_h
+#define BaseProfilingStack_h
+
+#include "BaseProfilingCategory.h"
+
+#include "mozilla/Atomics.h"
+
+#include "BaseProfiler.h"
+
+#ifndef MOZ_GECKO_PROFILER
+#  error Do not #include this header when MOZ_GECKO_PROFILER is not #defined.
+#endif
+
+#include <algorithm>
+#include <stdint.h>
+
+// This file defines the classes ProfilingStack and ProfilingStackFrame.
+// The ProfilingStack manages an array of ProfilingStackFrames.
+// It keeps track of the "label stack" and the JS interpreter stack.
+// The two stack types are interleaved.
+//
+// Usage:
+//
+//  ProfilingStack* profilingStack = ...;
+//
+//  // For label frames:
+//  profilingStack->pushLabelFrame(...);
+//  // Execute some code. When finished, pop the frame:
+//  profilingStack->pop();
+//
+//  // For JS stack frames:
+//  profilingStack->pushJSFrame(...);
+//  // Execute some code. When finished, pop the frame:
+//  profilingStack->pop();
+//
+//
+// Concurrency considerations
+//
+// A thread's profiling stack (and the frames inside it) is only modified by
+// that thread. However, the profiling stack can be *read* by a different
+// thread, the sampler thread: Whenever the profiler wants to sample a given
+// thread A, the following happens:
+//  (1) Thread A is suspended.
+//  (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
+//      including all ProfilingStackFrames that are currently in that stack
+//      (profilingStack->frames[0..profilingStack->stackSize()]).
+//  (3) Thread A is resumed.
+//
+// Thread suspension is achieved using platform-specific APIs; refer to each
+// platform's Sampler::SuspendAndSampleAndResumeThread implementation in
+// platform-*.cpp for details.
+//
+// When the thread is suspended, the values in profilingStack->stackPointer and
+// in the stack frame range
+// profilingStack->frames[0..profilingStack->stackPointer] need to be in a
+// consistent state, so that thread S does not read partially- constructed stack
+// frames. More specifically, we have two requirements:
+//  (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
+//      data needs to be put in place *before* the stackPointer is incremented,
+//      and the compiler + CPU need to know that this order matters.
+//  (2) When popping an frame from the stack and then preparing the
+//      ProfilingStackFrame data for the next frame that is about to be pushed,
+//      the decrement of the stackPointer in pop() needs to happen *before* the
+//      ProfilingStackFrame for the new frame is being popuplated, and the
+//      compiler + CPU need to know that this order matters.
+//
+// We can express the relevance of these orderings in multiple ways.
+// Option A is to make stackPointer an atomic with SequentiallyConsistent
+// memory ordering. This would ensure that no writes in thread A would be
+// reordered across any writes to stackPointer, which satisfies requirements
+// (1) and (2) at the same time. Option A is the simplest.
+// Option B is to use ReleaseAcquire memory ordering both for writes to
+// stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
+// ensure that all writes that happened *before this write in program order* are
+// not reordered to happen after this write. ReleaseAcquire ordering places no
+// requirements on the ordering of writes that happen *after* this write in
+// program order.
+// Using release-stores for writes to stackPointer expresses requirement (1),
+// and using release-stores for writes to the ProfilingStackFrame fields
+// expresses requirement (2).
+//
+// Option B is more complicated than option A, but has much better performance
+// on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
+// from option A to option B reduced the overhead of pushing+popping a
+// ProfilingStackFrame by 10 nanoseconds.
+// On x86/64, release-stores require no explicit hardware barriers or lock
+// instructions.
+// On ARM/64, option B may be slower than option A, because the compiler will
+// generate hardware barriers for every single release-store instead of just
+// for the writes to stackPointer. However, the actual performance impact of
+// this has not yet been measured on ARM, so we're currently using option B
+// everywhere. This is something that we may want to change in the future once
+// we've done measurements.
+
+namespace mozilla {
+namespace baseprofiler {
+
+// A call stack can be specified to the JS engine such that all JS entry/exits
+// to functions push/pop a stack frame to/from the specified stack.
+//
+// For more detailed information, see vm/GeckoProfiler.h.
+//
+class ProfilingStackFrame {
+  // A ProfilingStackFrame represents either a label frame or a JS frame.
+
+  // WARNING WARNING WARNING
+  //
+  // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
+  // that writes to these fields are release-writes, which ensures that
+  // earlier writes in this thread don't get reordered after the writes to
+  // these fields. In particular, the decrement of the stack pointer in
+  // ProfilingStack::pop() is a write that *must* happen before the values in
+  // this ProfilingStackFrame are changed. Otherwise, the sampler thread might
+  // see an inconsistent state where the stack pointer still points to a
+  // ProfilingStackFrame which has already been popped off the stack and whose
+  // fields have now been partially repopulated with new values.
+  // See the "Concurrency considerations" paragraph at the top of this file
+  // for more details.
+
+  // Descriptive label for this stack frame. Must be a static string! Can be
+  // an empty string, but not a null pointer.
+  Atomic<const char*, ReleaseAcquire> label_;
+
+  // An additional descriptive string of this frame which is combined with
+  // |label_| in profiler output. Need not be (and usually isn't) static. Can
+  // be null.
+  Atomic<const char*, ReleaseAcquire> dynamicString_;
+
+  // Stack pointer for non-JS stack frames, the script pointer otherwise.
+  Atomic<void*, ReleaseAcquire> spOrScript;
+
+  // ID of the JS Realm for JS stack frames.
+  // Must not be used on non-JS frames; it'll contain either the default 0,
+  // or a leftover value from a previous JS stack frame that was using this
+  // ProfilingStackFrame object.
+  mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_;
+
+  // The bytecode offset for JS stack frames.
+  // Must not be used on non-JS frames; it'll contain either the default 0,
+  // or a leftover value from a previous JS stack frame that was using this
+  // ProfilingStackFrame object.
+  Atomic<int32_t, ReleaseAcquire> pcOffsetIfJS_;
+
+  // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair.
+  Atomic<uint32_t, ReleaseAcquire> flagsAndCategoryPair_;
+
+ public:
+  ProfilingStackFrame() = default;
+  ProfilingStackFrame& operator=(const ProfilingStackFrame& other) {
+    label_ = other.label();
+    dynamicString_ = other.dynamicString();
+    void* spScript = other.spOrScript;
+    spOrScript = spScript;
+    int32_t offsetIfJS = other.pcOffsetIfJS_;
+    pcOffsetIfJS_ = offsetIfJS;
+    int64_t realmID = other.realmID_;
+    realmID_ = realmID;
+    uint32_t flagsAndCategory = other.flagsAndCategoryPair_;
+    flagsAndCategoryPair_ = flagsAndCategory;
+    return *this;
+  }
+
+  // Reserve up to 16 bits for flags, and 16 for category pair.
+  enum class Flags : uint32_t {
+    // The first three flags describe the kind of the frame and are
+    // mutually exclusive. (We still give them individual bits for
+    // simplicity.)
+
+    // A regular label frame. These usually come from AutoProfilerLabel.
+    IS_LABEL_FRAME = 1 << 0,
+
+    // A special frame indicating the start of a run of JS profiling stack
+    // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp
+    // field. These frames are needed to get correct ordering between JS
+    // and LABEL frames because JS frames don't carry sp information.
+    // SP is short for "stack pointer".
+    IS_SP_MARKER_FRAME = 1 << 1,
+
+    // A JS frame.
+    IS_JS_FRAME = 1 << 2,
+
+    // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME
+    // frames can have this flag set and unset during their lifetime.
+    // JS_OSR frames are ignored.
+    JS_OSR = 1 << 3,
+
+    // The next three are mutually exclusive.
+    // By default, for profiling stack frames that have both a label and a
+    // dynamic string, the two strings are combined into one string of the
+    // form "<label> <dynamicString>" during JSON serialization. The
+    // following flags can be used to change this preset.
+    STRING_TEMPLATE_METHOD = 1 << 4,  // "<label>.<dynamicString>"
+    STRING_TEMPLATE_GETTER = 1 << 5,  // "get <label>.<dynamicString>"
+    STRING_TEMPLATE_SETTER = 1 << 6,  // "set <label>.<dynamicString>"
+
+    // If set, causes this stack frame to be marked as "relevantForJS" in
+    // the profile JSON, which will make it show up in the "JS only" call
+    // tree view.
+    RELEVANT_FOR_JS = 1 << 7,
+
+    // If set, causes the label on this ProfilingStackFrame to be ignored
+    // and to be replaced by the subcategory's label.
+    LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8,
+
+    // Frame dynamic string does not contain user data.
+    NONSENSITIVE = 1 << 9,
+
+    // A JS Baseline Interpreter frame.
+    IS_BLINTERP_FRAME = 1 << 10,
+
+    FLAGS_BITCOUNT = 16,
+    FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1
+  };
+
+  static_assert(
+      uint32_t(ProfilingCategoryPair::LAST) <=
+          (UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)),
+      "Too many category pairs to fit into u32 with together with the "
+      "reserved bits for the flags");
+
+  bool isLabelFrame() const {
+    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME);
+  }
+
+  bool isSpMarkerFrame() const {
+    return uint32_t(flagsAndCategoryPair_) &
+           uint32_t(Flags::IS_SP_MARKER_FRAME);
+  }
+
+  bool isJsFrame() const {
+    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME);
+  }
+
+  bool isOSRFrame() const {
+    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR);
+  }
+
+  void setIsOSRFrame(bool isOSR) {
+    if (isOSR) {
+      flagsAndCategoryPair_ =
+          uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR);
+    } else {
+      flagsAndCategoryPair_ =
+          uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR);
+    }
+  }
+
+  const char* label() const {
+    uint32_t flagsAndCategoryPair = flagsAndCategoryPair_;
+    if (flagsAndCategoryPair &
+        uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) {
+      auto categoryPair = ProfilingCategoryPair(
+          flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT));
+      return GetProfilingCategoryPairInfo(categoryPair).mLabel;
+    }
+    return label_;
+  }
+
+  const char* dynamicString() const { return dynamicString_; }
+
+  void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
+                      ProfilingCategoryPair aCategoryPair, uint32_t aFlags) {
+    label_ = aLabel;
+    dynamicString_ = aDynamicString;
+    spOrScript = sp;
+    // pcOffsetIfJS_ is not set and must not be used on label frames.
+    flagsAndCategoryPair_ =
+        uint32_t(Flags::IS_LABEL_FRAME) |
+        (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags;
+    MOZ_ASSERT(isLabelFrame());
+  }
+
+  void initSpMarkerFrame(void* sp) {
+    label_ = "";
+    dynamicString_ = nullptr;
+    spOrScript = sp;
+    // pcOffsetIfJS_ is not set and must not be used on sp marker frames.
+    flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) |
+                            (uint32_t(ProfilingCategoryPair::OTHER)
+                             << uint32_t(Flags::FLAGS_BITCOUNT));
+    MOZ_ASSERT(isSpMarkerFrame());
+  }
+
+  void initJsFrame(const char* aLabel, const char* aDynamicString,
+                   void* /* JSScript* */ aScript, int32_t aOffset,
+                   uint64_t aRealmID) {
+    label_ = aLabel;
+    dynamicString_ = aDynamicString;
+    spOrScript = aScript;
+    pcOffsetIfJS_ = aOffset;
+    realmID_ = aRealmID;
+    flagsAndCategoryPair_ =
+        uint32_t(Flags::IS_JS_FRAME) | (uint32_t(ProfilingCategoryPair::JS)
+                                        << uint32_t(Flags::FLAGS_BITCOUNT));
+    MOZ_ASSERT(isJsFrame());
+  }
+
+  uint32_t flags() const {
+    return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK);
+  }
+
+  ProfilingCategoryPair categoryPair() const {
+    return ProfilingCategoryPair(flagsAndCategoryPair_ >>
+                                 uint32_t(Flags::FLAGS_BITCOUNT));
+  }
+
+  uint64_t realmID() const { return realmID_; }
+
+  void* stackAddress() const {
+    MOZ_ASSERT(!isJsFrame());
+    return spOrScript;
+  }
+
+  // Note that the pointer returned might be invalid.
+  void* rawScript() const {
+    MOZ_ASSERT(isJsFrame());
+    return spOrScript;
+  }
+  void setRawScript(void* aScript) {
+    MOZ_ASSERT(isJsFrame());
+    spOrScript = aScript;
+  }
+
+  int32_t pcOffset() const {
+    MOZ_ASSERT(isJsFrame());
+    return pcOffsetIfJS_;
+  }
+
+  void setPCOffset(int32_t aOffset) {
+    MOZ_ASSERT(isJsFrame());
+    pcOffsetIfJS_ = aOffset;
+  }
+
+  // The offset of a pc into a script's code can actually be 0, so to
+  // signify a nullptr pc, use a -1 index. This is checked against in
+  // pc() and setPC() to set/get the right pc.
+  static const int32_t NullPCOffset = -1;
+};
+
+// Each thread has its own ProfilingStack. That thread modifies the
+// ProfilingStack, pushing and popping elements as necessary.
+//
+// The ProfilingStack is also read periodically by the profiler's sampler
+// thread. This happens only when the thread that owns the ProfilingStack is
+// suspended. So there are no genuine parallel accesses.
+//
+// However, it is possible for pushing/popping to be interrupted by a periodic
+// sample. Because of this, we need pushing/popping to be effectively atomic.
+//
+// - When pushing a new frame, we increment the stack pointer -- making the new
+//   frame visible to the sampler thread -- only after the new frame has been
+//   fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
+//   the increment is a release-store, which ensures that this store is not
+//   reordered before the writes of the frame.
+//
+// - When popping an old frame, the only operation is the decrementing of the
+//   stack pointer, which is obviously atomic.
+//
+class ProfilingStack final {
+ public:
+  ProfilingStack() = default;
+
+  MFBT_API ~ProfilingStack();
+
+  void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
+                      ProfilingCategoryPair categoryPair, uint32_t flags = 0) {
+    // This thread is the only one that ever changes the value of
+    // stackPointer.
+    // Store the value of the atomic in a non-atomic local variable so that
+    // the compiler won't generate two separate loads from the atomic for
+    // the size check and the frames[] array indexing operation.
+    uint32_t stackPointerVal = stackPointer;
+
+    if (MOZ_UNLIKELY(stackPointerVal >= capacity)) {
+      ensureCapacitySlow();
+    }
+    frames[stackPointerVal].initLabelFrame(label, dynamicString, sp,
+                                           categoryPair, flags);
+
+    // This must happen at the end! The compiler will not reorder this
+    // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
+    // the writes above will not be reordered below the stackPointer store.
+    // Do the read and the write as two separate statements, in order to
+    // make it clear that we don't need an atomic increment, which would be
+    // more expensive on x86 than the separate operations done here.
+    // However, don't use stackPointerVal here; instead, allow the compiler
+    // to turn this store into a non-atomic increment instruction which
+    // takes up less code size.
+    stackPointer = stackPointer + 1;
+  }
+
+  void pushSpMarkerFrame(void* sp) {
+    uint32_t oldStackPointer = stackPointer;
+
+    if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
+      ensureCapacitySlow();
+    }
+    frames[oldStackPointer].initSpMarkerFrame(sp);
+
+    // This must happen at the end, see the comment in pushLabelFrame.
+    stackPointer = oldStackPointer + 1;
+  }
+
+  void pushJsOffsetFrame(const char* label, const char* dynamicString,
+                         void* script, int32_t offset, uint64_t aRealmID) {
+    // This thread is the only one that ever changes the value of
+    // stackPointer. Only load the atomic once.
+    uint32_t oldStackPointer = stackPointer;
+
+    if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
+      ensureCapacitySlow();
+    }
+    frames[oldStackPointer].initJsFrame(label, dynamicString, script, offset,
+                                        aRealmID);
+
+    // This must happen at the end, see the comment in pushLabelFrame.
+    stackPointer = stackPointer + 1;
+  }
+
+  void pop() {
+    MOZ_ASSERT(stackPointer > 0);
+    // Do the read and the write as two separate statements, in order to
+    // make it clear that we don't need an atomic decrement, which would be
+    // more expensive on x86 than the separate operations done here.
+    // This thread is the only one that ever changes the value of
+    // stackPointer.
+    uint32_t oldStackPointer = stackPointer;
+    stackPointer = oldStackPointer - 1;
+  }
+
+  uint32_t stackSize() const { return stackPointer; }
+  uint32_t stackCapacity() const { return capacity; }
+
+ private:
+  // Out of line path for expanding the buffer, since otherwise this would get
+  // inlined in every DOM WebIDL call.
+  MFBT_API MOZ_COLD void ensureCapacitySlow();
+
+  // No copying.
+  ProfilingStack(const ProfilingStack&) = delete;
+  void operator=(const ProfilingStack&) = delete;
+
+  // No moving either.
+  ProfilingStack(ProfilingStack&&) = delete;
+  void operator=(ProfilingStack&&) = delete;
+
+  uint32_t capacity = 0;
+
+ public:
+  // The pointer to the stack frames, this is read from the profiler thread and
+  // written from the current thread.
+  //
+  // This is effectively a unique pointer.
+  Atomic<ProfilingStackFrame*, SequentiallyConsistent> frames{nullptr};
+
+  // This may exceed the capacity, so instead use the stackSize() method to
+  // determine the number of valid frames in stackFrames. When this is less
+  // than stackCapacity(), it refers to the first free stackframe past the top
+  // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
+  // frame).
+  //
+  // WARNING WARNING WARNING
+  //
+  // This is an atomic variable that uses ReleaseAcquire memory ordering.
+  // See the "Concurrency considerations" paragraph at the top of this file
+  // for more details.
+  Atomic<uint32_t, ReleaseAcquire> stackPointer{0};
+};
+
+class AutoGeckoProfilerEntry;
+class GeckoProfilerEntryMarker;
+class GeckoProfilerBaselineOSRMarker;
+
+class GeckoProfilerThread {
+  friend class AutoGeckoProfilerEntry;
+  friend class GeckoProfilerEntryMarker;
+  friend class GeckoProfilerBaselineOSRMarker;
+
+  ProfilingStack* profilingStack_;
+
+  // Same as profilingStack_ if the profiler is currently active, otherwise
+  // null.
+  ProfilingStack* profilingStackIfEnabled_;
+
+ public:
+  MFBT_API GeckoProfilerThread();
+
+  uint32_t stackPointer() {
+    MOZ_ASSERT(infraInstalled());
+    return profilingStack_->stackPointer;
+  }
+  ProfilingStackFrame* stack() { return profilingStack_->frames; }
+  ProfilingStack* getProfilingStack() { return profilingStack_; }
+  ProfilingStack* getProfilingStackIfEnabled() {
+    return profilingStackIfEnabled_;
+  }
+
+  /*
+   * True if the profiler infrastructure is setup.  Should be true in builds
+   * that include profiler support except during early startup or late
+   * shutdown.  Unrelated to the presence of the Gecko Profiler addon.
+   */
+  bool infraInstalled() { return profilingStack_ != nullptr; }
+
+  MFBT_API void setProfilingStack(ProfilingStack* profilingStack, bool enabled);
+  void enable(bool enable) {
+    profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr;
+  }
+};
+
+}  // namespace baseprofiler
+}  // namespace mozilla
+
+#endif /* BaseProfilingStack_h */
diff --git a/mozglue/baseprofiler/public/BlocksRingBuffer.h b/mozglue/baseprofiler/public/BlocksRingBuffer.h
new file mode 100644
index 0000000000..6948ab8cf4
--- /dev/null
+++ b/mozglue/baseprofiler/public/BlocksRingBuffer.h
@@ -0,0 +1,1000 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BlocksRingBuffer_h
+#define BlocksRingBuffer_h
+
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/ModuloBuffer.h"
+#include "mozilla/ProfileBufferIndex.h"
+#include "mozilla/ScopeExit.h"
+
+#include <functional>
+#include <string>
+#include <tuple>
+#include <utility>
+
+namespace mozilla {
+
+// Thread-safe Ring buffer that can store blocks of different sizes during
+// defined sessions.
+// Each *block* contains an *entry* and the entry size:
+// [ entry_size | entry ] [ entry_size | entry ] ...
+// *In-session* is a period of time during which `BlocksRingBuffer` allows
+// reading and writing. *Out-of-session*, the `BlocksRingBuffer` object is
+// still valid, but contains no data, and gracefully denies accesses.
+//
+// To write an entry, the buffer reserves a block of sufficient size (to contain
+// user data of predetermined size), writes the entry size, and lets the caller
+// fill the entry contents using ModuloBuffer::Iterator APIs and a few entry-
+// specific APIs. E.g.:
+// ```
+// BlockRingsBuffer brb(PowerOfTwo<BlockRingsBuffer::Length>(1024));
+// brb.ReserveAndPut([]() { return sizeof(123); },
+//                   [&](ProfileBufferEntryWriter& aEW) {
+//                     aEW.WriteObject(123);
+//                   });
+// ```
+// Other `Put...` functions may be used as shortcuts for simple entries.
+// The objects given to the caller's callbacks should only be used inside the
+// callbacks and not stored elsewhere, because they keep their own references to
+// the BlocksRingBuffer and therefore should not live longer.
+// Different type of objects may be serialized into an entry, see `Serializer`
+// for more information.
+//
+// When reading data, the buffer iterates over blocks (it knows how to read the
+// entry size, and therefore move to the next block), and lets the caller read
+// the entry inside of each block. E.g.:
+// ```
+// brb.Read([](BlocksRingBuffer::Reader aR) {}
+//   for (ProfileBufferEntryReader aER : aR) {
+//     /* Use ProfileBufferEntryReader functions to read serialized objects. */
+//     int n = aER.ReadObject<int>();
+//   }
+// });
+// ```
+// Different type of objects may be deserialized from an entry, see
+// `Deserializer` for more information.
+//
+// The caller may retrieve the `ProfileBufferBlockIndex` corresponding to an
+// entry (`ProfileBufferBlockIndex` is an opaque type preventing the user from
+// modifying it). That index may later be used to get back to that particular
+// entry if it still exists.
+class BlocksRingBuffer {
+ public:
+  // Using ModuloBuffer as underlying circular byte buffer.
+  using Buffer = ModuloBuffer<uint32_t, ProfileBufferIndex>;
+  using Byte = Buffer::Byte;
+
+  // Length type for total buffer (as PowerOfTwo<Length>) and each entry.
+  using Length = uint32_t;
+
+  enum class ThreadSafety { WithoutMutex, WithMutex };
+
+  // Default constructor starts out-of-session (nothing to read or write).
+  explicit BlocksRingBuffer(ThreadSafety aThreadSafety)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {}
+
+  // Create a buffer of the given length.
+  explicit BlocksRingBuffer(ThreadSafety aThreadSafety,
+                            PowerOfTwo<Length> aLength)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex),
+        mMaybeUnderlyingBuffer(Some(UnderlyingBuffer(aLength))) {}
+
+  // Take ownership of an existing buffer.
+  BlocksRingBuffer(ThreadSafety aThreadSafety,
+                   UniquePtr<Buffer::Byte[]> aExistingBuffer,
+                   PowerOfTwo<Length> aLength)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex),
+        mMaybeUnderlyingBuffer(
+            Some(UnderlyingBuffer(std::move(aExistingBuffer), aLength))) {}
+
+  // Use an externally-owned buffer.
+  BlocksRingBuffer(ThreadSafety aThreadSafety, Buffer::Byte* aExternalBuffer,
+                   PowerOfTwo<Length> aLength)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex),
+        mMaybeUnderlyingBuffer(
+            Some(UnderlyingBuffer(aExternalBuffer, aLength))) {}
+
+  // Destructor doesn't need to do anything special. (Clearing entries would
+  // only update indices and stats, which won't be accessible after the object
+  // is destroyed anyway.)
+  ~BlocksRingBuffer() = default;
+
+  // Remove underlying buffer, if any.
+  void Reset() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ResetUnderlyingBuffer();
+  }
+
+  // Create a buffer of the given length.
+  void Set(PowerOfTwo<Length> aLength) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ResetUnderlyingBuffer();
+    mMaybeUnderlyingBuffer.emplace(aLength);
+  }
+
+  // Take ownership of an existing buffer.
+  void Set(UniquePtr<Buffer::Byte[]> aExistingBuffer,
+           PowerOfTwo<Length> aLength) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ResetUnderlyingBuffer();
+    mMaybeUnderlyingBuffer.emplace(std::move(aExistingBuffer), aLength);
+  }
+
+  // Use an externally-owned buffer.
+  void Set(Buffer::Byte* aExternalBuffer, PowerOfTwo<Length> aLength) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ResetUnderlyingBuffer();
+    mMaybeUnderlyingBuffer.emplace(aExternalBuffer, aLength);
+  }
+
+  // This cannot change during the lifetime of this buffer, so there's no need
+  // to lock.
+  bool IsThreadSafe() const { return mMutex.IsActivated(); }
+
+  [[nodiscard]] bool IsInSession() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return !!mMaybeUnderlyingBuffer;
+  }
+
+  // Lock the buffer mutex and run the provided callback.
+  // This can be useful when the caller needs to explicitly lock down this
+  // buffer, but not do anything else with it.
+  template <typename Callback>
+  auto LockAndRun(Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return std::forward<Callback>(aCallback)();
+  }
+
+  // Buffer length in bytes.
+  Maybe<PowerOfTwo<Length>> BufferLength() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return mMaybeUnderlyingBuffer.map([](const UnderlyingBuffer& aBuffer) {
+      return aBuffer.mBuffer.BufferLength();
+    });
+    ;
+  }
+
+  // Size of external resources.
+  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    if (!mMaybeUnderlyingBuffer) {
+      return 0;
+    }
+    return mMaybeUnderlyingBuffer->mBuffer.SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  // Snapshot of the buffer state.
+  struct State {
+    // Index to the first block.
+    ProfileBufferBlockIndex mRangeStart;
+
+    // Index past the last block. Equals mRangeStart if empty.
+    ProfileBufferBlockIndex mRangeEnd;
+
+    // Number of blocks that have been pushed into this buffer.
+    uint64_t mPushedBlockCount = 0;
+
+    // Number of blocks that have been removed from this buffer.
+    // Note: Live entries = pushed - cleared.
+    uint64_t mClearedBlockCount = 0;
+  };
+
+  // Get a snapshot of the current state.
+  // When out-of-session, mFirstReadIndex==mNextWriteIndex, and
+  // mPushedBlockCount==mClearedBlockCount==0.
+  // Note that these may change right after this thread-safe call, so they
+  // should only be used for statistical purposes.
+  State GetState() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return {
+        mFirstReadIndex, mNextWriteIndex,
+        mMaybeUnderlyingBuffer ? mMaybeUnderlyingBuffer->mPushedBlockCount : 0,
+        mMaybeUnderlyingBuffer ? mMaybeUnderlyingBuffer->mClearedBlockCount
+                               : 0};
+  }
+
+  class Reader;
+
+  // Class that can iterate through blocks and provide
+  // `ProfileBufferEntryReader`s.
+  // Created through `Reader`, lives within a lock guard lifetime.
+  class BlockIterator {
+   public:
+#ifdef DEBUG
+    ~BlockIterator() {
+      // No BlockIterator should live outside of a mutexed call.
+      mRing->mMutex.AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+
+    // Comparison with other iterator, mostly used in range-for loops.
+    bool operator==(const BlockIterator aRhs) const {
+      MOZ_ASSERT(mRing == aRhs.mRing);
+      return mBlockIndex == aRhs.mBlockIndex;
+    }
+    bool operator!=(const BlockIterator aRhs) const {
+      MOZ_ASSERT(mRing == aRhs.mRing);
+      return mBlockIndex != aRhs.mBlockIndex;
+    }
+
+    // Advance to next BlockIterator.
+    BlockIterator& operator++() {
+      mBlockIndex = NextBlockIndex();
+      return *this;
+    }
+
+    // Dereferencing creates a `ProfileBufferEntryReader` for the entry inside
+    // this block.
+    ProfileBufferEntryReader operator*() const {
+      return mRing->ReaderInBlockAt(mBlockIndex);
+    }
+
+    // True if this iterator is just past the last entry.
+    bool IsAtEnd() const {
+      MOZ_ASSERT(mBlockIndex <= BufferRangeEnd());
+      return mBlockIndex == BufferRangeEnd();
+    }
+
+    // Can be used as reference to come back to this entry with `ReadAt()`.
+    ProfileBufferBlockIndex CurrentBlockIndex() const { return mBlockIndex; }
+
+    // Index past the end of this block, which is the start of the next block.
+    ProfileBufferBlockIndex NextBlockIndex() const {
+      MOZ_ASSERT(!IsAtEnd());
+      const Length entrySize =
+          mRing->ReaderInBlockAt(mBlockIndex).RemainingBytes();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entrySize) +
+          entrySize);
+    }
+
+    // Index of the first block in the whole buffer.
+    ProfileBufferBlockIndex BufferRangeStart() const {
+      return mRing->mFirstReadIndex;
+    }
+
+    // Index past the last block in the whole buffer.
+    ProfileBufferBlockIndex BufferRangeEnd() const {
+      return mRing->mNextWriteIndex;
+    }
+
+   private:
+    // Only a Reader can instantiate a BlockIterator.
+    friend class Reader;
+
+    BlockIterator(const BlocksRingBuffer& aRing,
+                  ProfileBufferBlockIndex aBlockIndex)
+        : mRing(WrapNotNull(&aRing)), mBlockIndex(aBlockIndex) {
+      // No BlockIterator should live outside of a mutexed call.
+      mRing->mMutex.AssertCurrentThreadOwns();
+    }
+
+    // Using a non-null pointer instead of a reference, to allow copying.
+    // This BlockIterator should only live inside one of the thread-safe
+    // BlocksRingBuffer functions, for this reference to stay valid.
+    NotNull<const BlocksRingBuffer*> mRing;
+    ProfileBufferBlockIndex mBlockIndex;
+  };
+
+  // Class that can create `BlockIterator`s (e.g., for range-for), or just
+  // iterate through entries; lives within a lock guard lifetime.
+  class MOZ_RAII Reader {
+   public:
+    Reader(const Reader&) = delete;
+    Reader& operator=(const Reader&) = delete;
+    Reader(Reader&&) = delete;
+    Reader& operator=(Reader&&) = delete;
+
+#ifdef DEBUG
+    ~Reader() {
+      // No Reader should live outside of a mutexed call.
+      mRing.mMutex.AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+
+    // Index of the first block in the whole buffer.
+    ProfileBufferBlockIndex BufferRangeStart() const {
+      return mRing.mFirstReadIndex;
+    }
+
+    // Index past the last block in the whole buffer.
+    ProfileBufferBlockIndex BufferRangeEnd() const {
+      return mRing.mNextWriteIndex;
+    }
+
+    // Iterators to the first and past-the-last blocks.
+    // Compatible with range-for (see `ForEach` below as example).
+    BlockIterator begin() const {
+      return BlockIterator(mRing, BufferRangeStart());
+    }
+    // Note that a `BlockIterator` at the `end()` should not be dereferenced, as
+    // there is no actual block there!
+    BlockIterator end() const { return BlockIterator(mRing, BufferRangeEnd()); }
+
+    // Get a `BlockIterator` at the given `ProfileBufferBlockIndex`, clamped to
+    // the stored range. Note that a `BlockIterator` at the `end()` should not
+    // be dereferenced, as there is no actual block there!
+    BlockIterator At(ProfileBufferBlockIndex aBlockIndex) const {
+      if (aBlockIndex < BufferRangeStart()) {
+        // Anything before the range (including null ProfileBufferBlockIndex) is
+        // clamped at the beginning.
+        return begin();
+      }
+      // Otherwise we at least expect the index to be valid (pointing exactly at
+      // a live block, or just past the end.)
+      mRing.AssertBlockIndexIsValidOrEnd(aBlockIndex);
+      return BlockIterator(mRing, aBlockIndex);
+    }
+
+    // Run `aCallback(ProfileBufferEntryReader&)` on each entry from first to
+    // last. Callback should not store `ProfileBufferEntryReader`, as it may
+    // become invalid after this thread-safe call.
+    template <typename Callback>
+    void ForEach(Callback&& aCallback) const {
+      for (ProfileBufferEntryReader reader : *this) {
+        aCallback(reader);
+      }
+    }
+
+   private:
+    friend class BlocksRingBuffer;
+
+    explicit Reader(const BlocksRingBuffer& aRing) : mRing(aRing) {
+      // No Reader should live outside of a mutexed call.
+      mRing.mMutex.AssertCurrentThreadOwns();
+    }
+
+    // This Reader should only live inside one of the thread-safe
+    // BlocksRingBuffer functions, for this reference to stay valid.
+    const BlocksRingBuffer& mRing;
+  };
+
+  // Call `aCallback(BlocksRingBuffer::Reader*)` (nullptr when out-of-session),
+  // and return whatever `aCallback` returns. Callback should not store
+  // `Reader`, because it may become invalid after this call.
+  template <typename Callback>
+  auto Read(Callback&& aCallback) const {
+    {
+      baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+      if (MOZ_LIKELY(mMaybeUnderlyingBuffer)) {
+        Reader reader(*this);
+        return std::forward<Callback>(aCallback)(&reader);
+      }
+    }
+    return std::forward<Callback>(aCallback)(nullptr);
+  }
+
+  // Call `aCallback(ProfileBufferEntryReader&)` on each item.
+  // Callback should not store `ProfileBufferEntryReader`, because it may become
+  // invalid after this call.
+  template <typename Callback>
+  void ReadEach(Callback&& aCallback) const {
+    Read([&](Reader* aReader) {
+      if (MOZ_LIKELY(aReader)) {
+        aReader->ForEach(aCallback);
+      }
+    });
+  }
+
+  // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at
+  // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if
+  // out-of-session, or if that entry doesn't exist anymore, or if we've reached
+  // just past the last entry. Return whatever `aCallback` returns. Callback
+  // should not store `ProfileBufferEntryReader`, because it may become invalid
+  // after this call.
+  template <typename Callback>
+  auto ReadAt(ProfileBufferBlockIndex aBlockIndex, Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    MOZ_ASSERT(aBlockIndex <= mNextWriteIndex);
+    Maybe<ProfileBufferEntryReader> maybeEntryReader;
+    if (MOZ_LIKELY(mMaybeUnderlyingBuffer) && aBlockIndex >= mFirstReadIndex &&
+        aBlockIndex < mNextWriteIndex) {
+      AssertBlockIndexIsValid(aBlockIndex);
+      maybeEntryReader.emplace(ReaderInBlockAt(aBlockIndex));
+    }
+    return std::forward<Callback>(aCallback)(std::move(maybeEntryReader));
+  }
+
+  // Main function to write entries.
+  // Reserve `aCallbackBytes()` bytes, call `aCallback()` with a pointer to an
+  // on-stack temporary ProfileBufferEntryWriter (nullptr when out-of-session),
+  // and return whatever `aCallback` returns. Callback should not store
+  // `ProfileBufferEntryWriter`, because it may become invalid after this
+  // thread-safe call. Note: `aCallbackBytes` is a callback instead of a simple
+  // value, to delay this potentially-expensive computation until after we're
+  // checked that we're in-session; use `Put(Length, Callback)` below if you
+  // know the size already.
+  template <typename CallbackBytes, typename Callback>
+  auto ReserveAndPut(CallbackBytes aCallbackBytes, Callback&& aCallback) {
+    Maybe<ProfileBufferEntryWriter> maybeEntryWriter;
+
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+
+    if (MOZ_LIKELY(mMaybeUnderlyingBuffer)) {
+      const Length entryBytes = std::forward<CallbackBytes>(aCallbackBytes)();
+      MOZ_RELEASE_ASSERT(entryBytes > 0);
+      const Length bufferBytes =
+          mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value();
+      MOZ_RELEASE_ASSERT(entryBytes <= bufferBytes - ULEB128Size(entryBytes),
+                         "Entry would wrap and overwrite itself");
+      // Compute block size from the requested entry size.
+      const Length blockBytes = ULEB128Size(entryBytes) + entryBytes;
+      // We will put this new block at the end of the current buffer.
+      const ProfileBufferIndex blockIndex =
+          mNextWriteIndex.ConvertToProfileBufferIndex();
+      // Compute the end of this new block.
+      const ProfileBufferIndex blockEnd = blockIndex + blockBytes;
+      while (blockEnd >
+             mFirstReadIndex.ConvertToProfileBufferIndex() + bufferBytes) {
+        // About to trample on an old block.
+        ProfileBufferEntryReader reader = ReaderInBlockAt(mFirstReadIndex);
+        mMaybeUnderlyingBuffer->mClearedBlockCount += 1;
+        // Move the buffer reading start past this cleared block.
+        mFirstReadIndex = ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            mFirstReadIndex.ConvertToProfileBufferIndex() +
+            ULEB128Size(reader.RemainingBytes()) + reader.RemainingBytes());
+      }
+      // Store the new end of buffer.
+      mNextWriteIndex =
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(blockEnd);
+      mMaybeUnderlyingBuffer->mPushedBlockCount += 1;
+      // Finally, let aCallback write into the entry.
+      mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo(maybeEntryWriter,
+                                                        blockIndex, blockEnd);
+      MOZ_ASSERT(maybeEntryWriter.isSome(),
+                 "Non-empty entry should always create an EntryWriter");
+      maybeEntryWriter->WriteULEB128(entryBytes);
+      MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == entryBytes);
+    }
+
+#ifdef DEBUG
+    auto checkAllWritten = MakeScopeExit([&]() {
+      MOZ_ASSERT(!maybeEntryWriter || maybeEntryWriter->RemainingBytes() == 0);
+    });
+#endif  // DEBUG
+    return std::forward<Callback>(aCallback)(maybeEntryWriter);
+  }
+
+  // Add a new entry of known size, call `aCallback` with a pointer to a
+  // temporary ProfileBufferEntryWriter (can be null when out-of-session), and
+  // return whatever `aCallback` returns. Callback should not store the
+  // `ProfileBufferEntryWriter`, as it may become invalid after this thread-safe
+  // call.
+  template <typename Callback>
+  auto Put(Length aBytes, Callback&& aCallback) {
+    return ReserveAndPut([aBytes]() { return aBytes; },
+                         std::forward<Callback>(aCallback));
+  }
+
+  // Add a new entry copied from the given buffer, return block index.
+  ProfileBufferBlockIndex PutFrom(const void* aSrc, Length aBytes) {
+    return ReserveAndPut([aBytes]() { return aBytes; },
+                         [&](Maybe<ProfileBufferEntryWriter>& aEntryWriter) {
+                           if (MOZ_UNLIKELY(aEntryWriter.isNothing())) {
+                             // Out-of-session, return "empty" index.
+                             return ProfileBufferBlockIndex{};
+                           }
+                           aEntryWriter->WriteBytes(aSrc, aBytes);
+                           return aEntryWriter->CurrentBlockIndex();
+                         });
+  }
+
+  // Add a new single entry with *all* given object (using a Serializer for
+  // each), return block index.
+  template <typename... Ts>
+  ProfileBufferBlockIndex PutObjects(const Ts&... aTs) {
+    static_assert(sizeof...(Ts) > 0,
+                  "PutObjects must be given at least one object.");
+    return ReserveAndPut(
+        [&]() { return ProfileBufferEntryWriter::SumBytes(aTs...); },
+        [&](Maybe<ProfileBufferEntryWriter>& aEntryWriter) {
+          if (MOZ_UNLIKELY(aEntryWriter.isNothing())) {
+            // Out-of-session, return "empty" index.
+            return ProfileBufferBlockIndex{};
+          }
+          aEntryWriter->WriteObjects(aTs...);
+          return aEntryWriter->CurrentBlockIndex();
+        });
+  }
+
+  // Add a new entry copied from the given object, return block index.
+  template <typename T>
+  ProfileBufferBlockIndex PutObject(const T& aOb) {
+    return PutObjects(aOb);
+  }
+
+  // Append the contents of another BlocksRingBuffer to this one.
+  ProfileBufferBlockIndex AppendContents(const BlocksRingBuffer& aSrc) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+
+    if (MOZ_UNLIKELY(!mMaybeUnderlyingBuffer)) {
+      // We are out-of-session, could not append contents.
+      return ProfileBufferBlockIndex{};
+    }
+
+    baseprofiler::detail::BaseProfilerMaybeAutoLock srcLock(aSrc.mMutex);
+
+    if (MOZ_UNLIKELY(!aSrc.mMaybeUnderlyingBuffer)) {
+      // The other BRB is out-of-session, nothing to copy, we're done.
+      return ProfileBufferBlockIndex{};
+    }
+
+    const ProfileBufferIndex srcStartIndex =
+        aSrc.mFirstReadIndex.ConvertToProfileBufferIndex();
+    const ProfileBufferIndex srcEndIndex =
+        aSrc.mNextWriteIndex.ConvertToProfileBufferIndex();
+    const Length bytesToCopy = static_cast<Length>(srcEndIndex - srcStartIndex);
+
+    if (MOZ_UNLIKELY(bytesToCopy == 0)) {
+      // The other BRB is empty, nothing to copy, we're done.
+      return ProfileBufferBlockIndex{};
+    }
+
+    const Length bufferBytes =
+        mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value();
+
+    MOZ_RELEASE_ASSERT(bytesToCopy <= bufferBytes,
+                       "Entry would wrap and overwrite itself");
+
+    // We will put all copied blocks at the end of the current buffer.
+    const ProfileBufferIndex dstStartIndex =
+        mNextWriteIndex.ConvertToProfileBufferIndex();
+    // Compute where the copy will end...
+    const ProfileBufferIndex dstEndIndex = dstStartIndex + bytesToCopy;
+
+    while (dstEndIndex >
+           mFirstReadIndex.ConvertToProfileBufferIndex() + bufferBytes) {
+      // About to trample on an old block.
+      ProfileBufferEntryReader reader = ReaderInBlockAt(mFirstReadIndex);
+      mMaybeUnderlyingBuffer->mClearedBlockCount += 1;
+      // Move the buffer reading start past this cleared block.
+      mFirstReadIndex = ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mFirstReadIndex.ConvertToProfileBufferIndex() +
+          ULEB128Size(reader.RemainingBytes()) + reader.RemainingBytes());
+    }
+
+    // Store the new end of buffer.
+    mNextWriteIndex =
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(dstEndIndex);
+    // Update our pushed count with the number of live blocks we are copying.
+    mMaybeUnderlyingBuffer->mPushedBlockCount +=
+        aSrc.mMaybeUnderlyingBuffer->mPushedBlockCount -
+        aSrc.mMaybeUnderlyingBuffer->mClearedBlockCount;
+
+    auto reader = aSrc.mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo(
+        srcStartIndex, srcEndIndex, nullptr, nullptr);
+    auto writer = mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo(
+        dstStartIndex, dstEndIndex);
+    writer.WriteFromReader(reader, bytesToCopy);
+
+    return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(dstStartIndex);
+  }
+
+  // Clear all entries: Move read index to the end so that these entries cannot
+  // be read anymore.
+  void Clear() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    ClearAllEntries();
+  }
+
+  // Clear all entries strictly before aBlockIndex, and move read index to the
+  // end so that these entries cannot be read anymore.
+  void ClearBefore(ProfileBufferBlockIndex aBlockIndex) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (!mMaybeUnderlyingBuffer) {
+      return;
+    }
+    // Don't accept a not-yet-written index. One-past-the-end is ok.
+    MOZ_ASSERT(aBlockIndex <= mNextWriteIndex);
+    if (aBlockIndex <= mFirstReadIndex) {
+      // Already cleared.
+      return;
+    }
+    if (aBlockIndex == mNextWriteIndex) {
+      // Right past the end, just clear everything.
+      ClearAllEntries();
+      return;
+    }
+    // Otherwise we need to clear a subset of entries.
+    AssertBlockIndexIsValid(aBlockIndex);
+    // Just count skipped entries.
+    Reader reader(*this);
+    BlockIterator it = reader.begin();
+    for (; it.CurrentBlockIndex() < aBlockIndex; ++it) {
+      MOZ_ASSERT(it.CurrentBlockIndex() < reader.end().CurrentBlockIndex());
+      mMaybeUnderlyingBuffer->mClearedBlockCount += 1;
+    }
+    MOZ_ASSERT(it.CurrentBlockIndex() == aBlockIndex);
+    // Move read index to given index, so there's effectively no more entries
+    // before.
+    mFirstReadIndex = aBlockIndex;
+  }
+
+#ifdef DEBUG
+  void Dump() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (!mMaybeUnderlyingBuffer) {
+      printf("empty BlocksRingBuffer\n");
+      return;
+    }
+    using ULL = unsigned long long;
+    printf("start=%llu (%llu) end=%llu (%llu) - ",
+           ULL(mFirstReadIndex.ConvertToProfileBufferIndex()),
+           ULL(mFirstReadIndex.ConvertToProfileBufferIndex() &
+               (mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() - 1)),
+           ULL(mNextWriteIndex.ConvertToProfileBufferIndex()),
+           ULL(mNextWriteIndex.ConvertToProfileBufferIndex() &
+               (mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() - 1)));
+    mMaybeUnderlyingBuffer->mBuffer.Dump();
+  }
+#endif  // DEBUG
+
+ private:
+  // In DEBUG mode, assert that `aBlockIndex` is a valid index for a live block.
+  // (Not just in range, but points exactly at the start of a block.)
+  // Slow, so avoid it for internal checks; this is more to check what callers
+  // provide us.
+  void AssertBlockIndexIsValid(ProfileBufferBlockIndex aBlockIndex) const {
+#ifdef DEBUG
+    mMutex.AssertCurrentThreadOwns();
+    MOZ_ASSERT(aBlockIndex >= mFirstReadIndex);
+    MOZ_ASSERT(aBlockIndex < mNextWriteIndex);
+    // Quick check (default), or slow check (change '1' to '0') below:
+#  if 1
+    // Quick check that this looks like a valid block start.
+    // Read the entry size at the start of the block.
+    const Length entryBytes = ReaderInBlockAt(aBlockIndex).RemainingBytes();
+    MOZ_ASSERT(entryBytes > 0, "Empty entries are not allowed");
+    MOZ_ASSERT(
+        entryBytes < mMaybeUnderlyingBuffer->mBuffer.BufferLength().Value() -
+                         ULEB128Size(entryBytes),
+        "Entry would wrap and overwrite itself");
+    // The end of the block should be inside the live buffer range.
+    MOZ_ASSERT(aBlockIndex.ConvertToProfileBufferIndex() +
+                   ULEB128Size(entryBytes) + entryBytes <=
+               mNextWriteIndex.ConvertToProfileBufferIndex());
+#  else
+    // Slow check that the index is really the start of the block.
+    // This kills performances, as it reads from the first index until
+    // aBlockIndex. Only use to debug issues locally.
+    Reader reader(*this);
+    BlockIterator it = reader.begin();
+    for (; it.CurrentBlockIndex() < aBlockIndex; ++it) {
+      MOZ_ASSERT(it.CurrentBlockIndex() < reader.end().CurrentBlockIndex());
+    }
+    MOZ_ASSERT(it.CurrentBlockIndex() == aBlockIndex);
+#  endif
+#endif  // DEBUG
+  }
+
+  // In DEBUG mode, assert that `aBlockIndex` is a valid index for a live block,
+  // or is just past-the-end. (Not just in range, but points exactly at the
+  // start of a block.) Slow, so avoid it for internal checks; this is more to
+  // check what callers provide us.
+  void AssertBlockIndexIsValidOrEnd(ProfileBufferBlockIndex aBlockIndex) const {
+#ifdef DEBUG
+    mMutex.AssertCurrentThreadOwns();
+    if (aBlockIndex == mNextWriteIndex) {
+      return;
+    }
+    AssertBlockIndexIsValid(aBlockIndex);
+#endif  // DEBUG
+  }
+
+  // Create a reader for the block starting at aBlockIndex.
+  ProfileBufferEntryReader ReaderInBlockAt(
+      ProfileBufferBlockIndex aBlockIndex) const {
+    mMutex.AssertCurrentThreadOwns();
+    MOZ_ASSERT(mMaybeUnderlyingBuffer.isSome());
+    MOZ_ASSERT(aBlockIndex >= mFirstReadIndex);
+    MOZ_ASSERT(aBlockIndex < mNextWriteIndex);
+    // Create a reader from the given index until the end of the buffer.
+    ProfileBufferEntryReader reader =
+        mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo(
+            aBlockIndex.ConvertToProfileBufferIndex(),
+            mNextWriteIndex.ConvertToProfileBufferIndex(), nullptr, nullptr);
+    // Read the block size at the beginning.
+    const Length entryBytes = reader.ReadULEB128<Length>();
+    // Make sure we don't overshoot the buffer.
+    MOZ_RELEASE_ASSERT(entryBytes <= reader.RemainingBytes());
+    ProfileBufferIndex nextBlockIndex =
+        aBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entryBytes) +
+        entryBytes;
+    // And reduce the reader to the entry area. Only provide a next-block-index
+    // if it's not at the end of the buffer (i.e., there's an actual block
+    // there).
+    reader = mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo(
+        aBlockIndex.ConvertToProfileBufferIndex() + ULEB128Size(entryBytes),
+        nextBlockIndex, aBlockIndex,
+        (nextBlockIndex < mNextWriteIndex.ConvertToProfileBufferIndex())
+            ? ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                  nextBlockIndex)
+            : ProfileBufferBlockIndex{});
+    return reader;
+  }
+
+  ProfileBufferEntryReader FullBufferReader() const {
+    mMutex.AssertCurrentThreadOwns();
+    if (!mMaybeUnderlyingBuffer) {
+      return {};
+    }
+    return mMaybeUnderlyingBuffer->mBuffer.EntryReaderFromTo(
+        mFirstReadIndex.ConvertToProfileBufferIndex(),
+        mNextWriteIndex.ConvertToProfileBufferIndex(), nullptr, nullptr);
+  }
+
+  // Clear all entries: Move read index to the end so that these entries cannot
+  // be read anymore.
+  void ClearAllEntries() {
+    mMutex.AssertCurrentThreadOwns();
+    if (!mMaybeUnderlyingBuffer) {
+      return;
+    }
+    // Mark all entries pushed so far as cleared.
+    mMaybeUnderlyingBuffer->mClearedBlockCount =
+        mMaybeUnderlyingBuffer->mPushedBlockCount;
+    // Move read index to write index, so there's effectively no more entries
+    // that can be read. (Not setting both to 0, in case user is keeping
+    // `ProfileBufferBlockIndex`'es to old entries.)
+    mFirstReadIndex = mNextWriteIndex;
+  }
+
+  // If there is an underlying buffer, clear all entries, and discard the
+  // buffer. This BlocksRingBuffer will now gracefully reject all API calls, and
+  // is in a state where a new underlying buffer may be set.
+  void ResetUnderlyingBuffer() {
+    mMutex.AssertCurrentThreadOwns();
+    if (!mMaybeUnderlyingBuffer) {
+      return;
+    }
+    ClearAllEntries();
+    mMaybeUnderlyingBuffer.reset();
+  }
+
+  // Used to de/serialize a BlocksRingBuffer (e.g., containing a backtrace).
+  friend ProfileBufferEntryWriter::Serializer<BlocksRingBuffer>;
+  friend ProfileBufferEntryReader::Deserializer<BlocksRingBuffer>;
+  friend ProfileBufferEntryWriter::Serializer<UniquePtr<BlocksRingBuffer>>;
+  friend ProfileBufferEntryReader::Deserializer<UniquePtr<BlocksRingBuffer>>;
+
+  // Mutex guarding the following members.
+  mutable baseprofiler::detail::BaseProfilerMaybeMutex mMutex;
+
+  struct UnderlyingBuffer {
+    // Create a buffer of the given length.
+    explicit UnderlyingBuffer(PowerOfTwo<Length> aLength) : mBuffer(aLength) {
+      MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(),
+                 "Buffer should be able to contain more than a block size");
+    }
+
+    // Take ownership of an existing buffer.
+    UnderlyingBuffer(UniquePtr<Buffer::Byte[]> aExistingBuffer,
+                     PowerOfTwo<Length> aLength)
+        : mBuffer(std::move(aExistingBuffer), aLength) {
+      MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(),
+                 "Buffer should be able to contain more than a block size");
+    }
+
+    // Use an externally-owned buffer.
+    UnderlyingBuffer(Buffer::Byte* aExternalBuffer, PowerOfTwo<Length> aLength)
+        : mBuffer(aExternalBuffer, aLength) {
+      MOZ_ASSERT(aLength.Value() > ULEB128MaxSize<Length>(),
+                 "Buffer should be able to contain more than a block size");
+    }
+
+    // Only allow move-construction.
+    UnderlyingBuffer(UnderlyingBuffer&&) = default;
+
+    // Copies and move-assignment are explictly disallowed.
+    UnderlyingBuffer(const UnderlyingBuffer&) = delete;
+    UnderlyingBuffer& operator=(const UnderlyingBuffer&) = delete;
+    UnderlyingBuffer& operator=(UnderlyingBuffer&&) = delete;
+
+    // Underlying circular byte buffer.
+    Buffer mBuffer;
+
+    // Statistics.
+    uint64_t mPushedBlockCount = 0;
+    uint64_t mClearedBlockCount = 0;
+  };
+
+  // Underlying buffer, with stats.
+  // Only valid during in-session period.
+  Maybe<UnderlyingBuffer> mMaybeUnderlyingBuffer;
+
+  // Index to the first block to be read (or cleared). Initialized to 1 because
+  // 0 is reserved for the "empty" ProfileBufferBlockIndex value. Kept between
+  // sessions, so that stored indices from one session will be gracefully denied
+  // in future sessions.
+  ProfileBufferBlockIndex mFirstReadIndex =
+      ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          ProfileBufferIndex(1));
+  // Index where the next new block should be allocated. Initialized to 1
+  // because 0 is reserved for the "empty" ProfileBufferBlockIndex value. Kept
+  // between sessions, so that stored indices from one session will be
+  // gracefully denied in future sessions.
+  ProfileBufferBlockIndex mNextWriteIndex =
+      ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          ProfileBufferIndex(1));
+};
+
+// ----------------------------------------------------------------------------
+// BlocksRingBuffer serialization
+
+// A BlocksRingBuffer can hide another one!
+// This will be used to store marker backtraces; They can be read back into a
+// UniquePtr<BlocksRingBuffer>.
+// Format: len (ULEB128) | start | end | buffer (len bytes) | pushed | cleared
+// len==0 marks an out-of-session buffer, or empty buffer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<BlocksRingBuffer> {
+  static Length Bytes(const BlocksRingBuffer& aBuffer) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(aBuffer.mMutex);
+    if (aBuffer.mMaybeUnderlyingBuffer.isNothing()) {
+      // Out-of-session, we only need 1 byte to store a length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    const auto start = aBuffer.mFirstReadIndex.ConvertToProfileBufferIndex();
+    const auto end = aBuffer.mNextWriteIndex.ConvertToProfileBufferIndex();
+    const auto len = end - start;
+    if (len == 0) {
+      // In-session but empty, also store a length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    return ULEB128Size(len) + sizeof(start) + sizeof(end) + len +
+           sizeof(aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount) +
+           sizeof(aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const BlocksRingBuffer& aBuffer) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(aBuffer.mMutex);
+    if (aBuffer.mMaybeUnderlyingBuffer.isNothing()) {
+      // Out-of-session, only store a length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    const auto start = aBuffer.mFirstReadIndex.ConvertToProfileBufferIndex();
+    const auto end = aBuffer.mNextWriteIndex.ConvertToProfileBufferIndex();
+    MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max());
+    const auto len = static_cast<Length>(end - start);
+    if (len == 0) {
+      // In-session but empty, only store a length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    // In-session.
+    // Store buffer length, start and end indices.
+    aEW.WriteULEB128<Length>(len);
+    aEW.WriteObject(start);
+    aEW.WriteObject(end);
+    // Write all the bytes.
+    auto reader = aBuffer.FullBufferReader();
+    aEW.WriteFromReader(reader, reader.RemainingBytes());
+    // And write stats.
+    aEW.WriteObject(aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount);
+    aEW.WriteObject(aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount);
+  }
+};
+
+// A serialized BlocksRingBuffer can be read into an empty buffer (either
+// out-of-session, or in-session with enough room).
+template <>
+struct ProfileBufferEntryReader::Deserializer<BlocksRingBuffer> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       BlocksRingBuffer& aBuffer) {
+    // Expect an empty buffer, as we're going to overwrite it.
+    MOZ_ASSERT(aBuffer.GetState().mRangeStart == aBuffer.GetState().mRangeEnd);
+    // Read the stored buffer length.
+    const auto len = aER.ReadULEB128<Length>();
+    if (len == 0) {
+      // 0-length means an "uninteresting" buffer, just return now.
+      return;
+    }
+    // We have a non-empty buffer to read.
+    if (aBuffer.BufferLength().isSome()) {
+      // Output buffer is in-session (i.e., it already has a memory buffer
+      // attached). Make sure the caller allocated enough space.
+      MOZ_RELEASE_ASSERT(aBuffer.BufferLength()->Value() >= len);
+    } else {
+      // Output buffer is out-of-session, attach a new memory buffer.
+      aBuffer.Set(PowerOfTwo<Length>(len));
+      MOZ_ASSERT(aBuffer.BufferLength()->Value() >= len);
+    }
+    // Read start and end indices.
+    const auto start = aER.ReadObject<ProfileBufferIndex>();
+    aBuffer.mFirstReadIndex =
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(start);
+    const auto end = aER.ReadObject<ProfileBufferIndex>();
+    aBuffer.mNextWriteIndex =
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(end);
+    MOZ_ASSERT(end - start == len);
+    // Copy bytes into the buffer.
+    auto writer =
+        aBuffer.mMaybeUnderlyingBuffer->mBuffer.EntryWriterFromTo(start, end);
+    writer.WriteFromReader(aER, end - start);
+    MOZ_ASSERT(writer.RemainingBytes() == 0);
+    // Finally copy stats.
+    aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount = aER.ReadObject<decltype(
+        aBuffer.mMaybeUnderlyingBuffer->mPushedBlockCount)>();
+    aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount =
+        aER.ReadObject<decltype(
+            aBuffer.mMaybeUnderlyingBuffer->mClearedBlockCount)>();
+  }
+
+  // We cannot output a BlocksRingBuffer object (not copyable), use `ReadInto()`
+  // or `aER.ReadObject<UniquePtr<BlocksRinbBuffer>>()` instead.
+  static BlocksRingBuffer Read(ProfileBufferEntryReader& aER) = delete;
+};
+
+// A BlocksRingBuffer is usually refererenced through a UniquePtr, for
+// convenience we support (de)serializing that UniquePtr directly.
+// This is compatible with the non-UniquePtr serialization above, with a null
+// pointer being treated like an out-of-session or empty buffer; and any of
+// these would be deserialized into a null pointer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<UniquePtr<BlocksRingBuffer>> {
+  static Length Bytes(const UniquePtr<BlocksRingBuffer>& aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    // Otherwise write the pointed-at BlocksRingBuffer (which could be
+    // out-of-session or empty.)
+    return SumBytes(*aBufferUPtr);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const UniquePtr<BlocksRingBuffer>& aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    // Otherwise write the pointed-at BlocksRingBuffer (which could be
+    // out-of-session or empty.)
+    aEW.WriteObject(*aBufferUPtr);
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<UniquePtr<BlocksRingBuffer>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       UniquePtr<BlocksRingBuffer>& aBuffer) {
+    aBuffer = Read(aER);
+  }
+
+  static UniquePtr<BlocksRingBuffer> Read(ProfileBufferEntryReader& aER) {
+    UniquePtr<BlocksRingBuffer> bufferUPtr;
+    // Keep a copy of the reader before reading the length, so we can restart
+    // from here below.
+    ProfileBufferEntryReader readerBeforeLen = aER;
+    // Read the stored buffer length.
+    const auto len = aER.ReadULEB128<Length>();
+    if (len == 0) {
+      // 0-length means an "uninteresting" buffer, just return nullptr.
+      return bufferUPtr;
+    }
+    // We have a non-empty buffer.
+    // allocate an empty BlocksRingBuffer without mutex.
+    bufferUPtr = MakeUnique<BlocksRingBuffer>(
+        BlocksRingBuffer::ThreadSafety::WithoutMutex);
+    // Rewind the reader before the length and deserialize the contents, using
+    // the non-UniquePtr Deserializer.
+    aER = readerBeforeLen;
+    aER.ReadIntoObject(*bufferUPtr);
+    return bufferUPtr;
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // BlocksRingBuffer_h
diff --git a/mozglue/baseprofiler/public/ModuloBuffer.h b/mozglue/baseprofiler/public/ModuloBuffer.h
new file mode 100644
index 0000000000..80e765279e
--- /dev/null
+++ b/mozglue/baseprofiler/public/ModuloBuffer.h
@@ -0,0 +1,618 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ModuloBuffer_h
+#define ModuloBuffer_h
+
+#include "mozilla/leb128iterator.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/NotNull.h"
+#include "mozilla/PowerOfTwo.h"
+#include "mozilla/ProfileBufferEntrySerialization.h"
+#include "mozilla/UniquePtr.h"
+
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+
+namespace mozilla {
+
+// The ModuloBuffer class is a circular buffer that holds raw byte values, with
+// data-read/write helpers.
+//
+// OffsetT: Type of the internal offset into the buffer of bytes, it should be
+// large enough to access all bytes of the buffer. It will also be used as
+// Length (in bytes) of the buffer and of any subset. Default uint32_t
+// IndexT: Type of the external index, it should be large enough that overflows
+// should not happen during the lifetime of the ModuloBuffer.
+//
+// The basic usage is to create an iterator-like object with `ReaderAt(Index)`
+// or `WriterAt(Index)`, and use it to read/write data blobs. Iterators
+// automatically manage the wrap-around (through "Modulo", which is effectively
+// an AND-masking with the PowerOfTwo buffer size.)
+//
+// There is zero safety: No thread safety, no checks that iterators may be
+// overwriting data that's still to be read, etc. It's up to the caller to add
+// adequate checks.
+// The intended use is as an underlying buffer for a safer container.
+template <typename OffsetT = uint32_t, typename IndexT = uint64_t>
+class ModuloBuffer {
+ public:
+  using Byte = uint8_t;
+  static_assert(sizeof(Byte) == 1, "ModuloBuffer::Byte must be 1 byte");
+  using Offset = OffsetT;
+  static_assert(!std::numeric_limits<Offset>::is_signed,
+                "ModuloBuffer::Offset must be an unsigned integral type");
+  using Length = Offset;
+  using Index = IndexT;
+  static_assert(!std::numeric_limits<Index>::is_signed,
+                "ModuloBuffer::Index must be an unsigned integral type");
+  static_assert(sizeof(Index) >= sizeof(Offset),
+                "ModuloBuffer::Index size must >= Offset");
+
+  // Create a buffer of the given length.
+  explicit ModuloBuffer(PowerOfTwo<Length> aLength)
+      : mMask(aLength.Mask()),
+        mBuffer(WrapNotNull(new Byte[aLength.Value()])),
+        mBufferDeleter([](Byte* aBuffer) { delete[] aBuffer; }) {}
+
+  // Take ownership of an existing buffer. Existing contents is ignored.
+  // Done by extracting the raw pointer from UniquePtr<Byte[]>, and adding
+  // an equivalent `delete[]` in `mBufferDeleter`.
+  ModuloBuffer(UniquePtr<Byte[]> aExistingBuffer, PowerOfTwo<Length> aLength)
+      : mMask(aLength.Mask()),
+        mBuffer(WrapNotNull(aExistingBuffer.release())),
+        mBufferDeleter([](Byte* aBuffer) { delete[] aBuffer; }) {}
+
+  // Use an externally-owned buffer. Existing contents is ignored.
+  ModuloBuffer(Byte* aExternalBuffer, PowerOfTwo<Length> aLength)
+      : mMask(aLength.Mask()), mBuffer(WrapNotNull(aExternalBuffer)) {}
+
+  // Disallow copying, as we may uniquely own the resource.
+  ModuloBuffer(const ModuloBuffer& aOther) = delete;
+  ModuloBuffer& operator=(const ModuloBuffer& aOther) = delete;
+
+  // Allow move-construction. Stealing ownership if the original had it.
+  // This effectively prevents copy construction, and all assignments; needed so
+  // that a ModuloBuffer may be initialized from a separate construction.
+  // The moved-from ModuloBuffer still points at the resource but doesn't own
+  // it, so it won't try to free it; but accesses are not guaranteed, so it
+  // should not be used anymore.
+  ModuloBuffer(ModuloBuffer&& aOther)
+      : mMask(std::move(aOther.mMask)),
+        mBuffer(std::move(aOther.mBuffer)),
+        mBufferDeleter(std::move(aOther.mBufferDeleter)) {
+    // The above move leaves `aOther.mBufferDeleter` in a valid state but with
+    // an unspecified value, so it could theoretically still contain the
+    // original function, which would be bad because we don't want aOther to
+    // delete the resource that `this` now owns.
+    if (aOther.mBufferDeleter) {
+      // `aOther` still had a non-empty deleter, reset it.
+      aOther.mBufferDeleter = nullptr;
+    }
+  }
+
+  // Disallow assignment, as we have some `const` members.
+  ModuloBuffer& operator=(ModuloBuffer&& aOther) = delete;
+
+  // Destructor, deletes the resource if we uniquely own it.
+  ~ModuloBuffer() {
+    if (mBufferDeleter) {
+      mBufferDeleter(mBuffer);
+    }
+  }
+
+  PowerOfTwo<Length> BufferLength() const {
+    return PowerOfTwo<Length>(mMask.MaskValue() + 1);
+  }
+
+  // Size of external resources.
+  // Note: `mBufferDeleter`'s potential external data (for its captures) is not
+  // included, as it's hidden in the `std::function` implementation.
+  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    if (!mBufferDeleter) {
+      // If we don't have a buffer deleter, assume we don't own the data, so
+      // it's probably on the stack, or should be reported by its owner.
+      return 0;
+    }
+    return aMallocSizeOf(mBuffer);
+  }
+
+  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  ProfileBufferEntryReader EntryReaderFromTo(
+      Index aStart, Index aEnd, ProfileBufferBlockIndex aBlockIndex,
+      ProfileBufferBlockIndex aNextBlockIndex) const {
+    using EntrySpan = Span<const ProfileBufferEntryReader::Byte>;
+    if (aStart == aEnd) {
+      return ProfileBufferEntryReader{};
+    }
+    // Don't allow over-wrapping.
+    MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+    // Start offset in 0 .. (buffer size - 1)
+    Offset start = static_cast<Offset>(aStart) & mMask;
+    // End offset in 1 .. (buffer size)
+    Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+    if (start < end) {
+      // Segment doesn't cross buffer threshold, one span is enough.
+      return ProfileBufferEntryReader{EntrySpan(&mBuffer[start], end - start),
+                                      aBlockIndex, aNextBlockIndex};
+    }
+    // Segment crosses buffer threshold, we need one span until the end and one
+    // span restarting at the beginning of the buffer.
+    return ProfileBufferEntryReader{
+        EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+        EntrySpan(&mBuffer[0], end), aBlockIndex, aNextBlockIndex};
+  }
+
+  // Return an entry writer for the given range.
+  ProfileBufferEntryWriter EntryWriterFromTo(Index aStart, Index aEnd) const {
+    using EntrySpan = Span<ProfileBufferEntryReader::Byte>;
+    if (aStart == aEnd) {
+      return ProfileBufferEntryWriter{};
+    }
+    MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+    // Start offset in 0 .. (buffer size - 1)
+    Offset start = static_cast<Offset>(aStart) & mMask;
+    // End offset in 1 .. (buffer size)
+    Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+    if (start < end) {
+      // Segment doesn't cross buffer threshold, one span is enough.
+      return ProfileBufferEntryWriter{
+          EntrySpan(&mBuffer[start], end - start),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)};
+    }
+    // Segment crosses buffer threshold, we need one span until the end and one
+    // span restarting at the beginning of the buffer.
+    return ProfileBufferEntryWriter{
+        EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+        EntrySpan(&mBuffer[0], end),
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd)};
+  }
+
+  // Emplace an entry writer into `aMaybeEntryWriter` for the given range.
+  void EntryWriterFromTo(Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter,
+                         Index aStart, Index aEnd) const {
+    MOZ_ASSERT(aMaybeEntryWriter.isNothing(),
+               "Reference entry writer should be Nothing.");
+    using EntrySpan = Span<ProfileBufferEntryReader::Byte>;
+    if (aStart == aEnd) {
+      return;
+    }
+    MOZ_ASSERT(aEnd - aStart <= mMask.MaskValue() + 1);
+    // Start offset in 0 .. (buffer size - 1)
+    Offset start = static_cast<Offset>(aStart) & mMask;
+    // End offset in 1 .. (buffer size)
+    Offset end = (static_cast<Offset>(aEnd - 1) & mMask) + 1;
+    if (start < end) {
+      // Segment doesn't cross buffer threshold, one span is enough.
+      aMaybeEntryWriter.emplace(
+          EntrySpan(&mBuffer[start], end - start),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd));
+    } else {
+      // Segment crosses buffer threshold, we need one span until the end and
+      // one span restarting at the beginning of the buffer.
+      aMaybeEntryWriter.emplace(
+          EntrySpan(&mBuffer[start], mMask.MaskValue() + 1 - start),
+          EntrySpan(&mBuffer[0], end),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aStart),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(aEnd));
+    }
+  }
+
+  // All ModuloBuffer operations should be done through this iterator, which has
+  // an effectively infinite range. The underlying wrapping-around is hidden.
+  // Use `ReaderAt(Index)` or `WriterAt(Index)` to create it.
+  //
+  // `const Iterator<...>` means the iterator itself cannot change, i.e., it
+  // cannot move, and only its const methods are available. Note that these
+  // const methods may still be used to modify the buffer contents (e.g.:
+  // `operator*()`, `Poke()`).
+  //
+  // `Iterator</*IsBufferConst=*/true>` means the buffer contents cannot be
+  // modified, i.e., write operations are forbidden, but the iterator may still
+  // move if non-const itself.
+  template <bool IsBufferConst>
+  class Iterator {
+    // Alias to const- or mutable-`ModuloBuffer` depending on `IsBufferConst`.
+    using ConstOrMutableBuffer =
+        std::conditional_t<IsBufferConst, const ModuloBuffer, ModuloBuffer>;
+
+    // Implementation note about the strange enable-if's below:
+    //   `template <bool NotIBC = !IsBufferConst> enable_if_t<NotIBC>`
+    // which intuitively could be simplified to:
+    //   `enable_if_t<!IsBufferConst>`
+    // The former extra-templated syntax is in fact necessary to delay
+    // instantiation of these functions until they are actually needed.
+    //
+    // If we were just doing `enable_if_t<!IsBufferConst>`, this would only
+    // depend on the *class* (`ModuloBuffer<...>::Iterator`), which gets
+    // instantiated when a `ModuloBuffer` is created with some template
+    // arguments; at that point, all non-templated methods get instantiated, so
+    // there's no "SFINAE" happening, and `enable_if_t<...>` is actually doing
+    // `typename enable_if<...>::type` on the spot, but there is no `type` if
+    // `IsBufferConst` is true, so it just fails right away. E.g.:
+    // error: no type named 'type' in 'std::enable_if<false, void>';
+    //        'enable_if' cannot be used to disable this declaration
+    // note: in instantiation of template type alias 'enable_if_t'
+    // > std::enable_if_t<!IsBufferConst> WriteObject(const T& aObject) {
+    //       in instantiation of template class
+    //       'mozilla::ModuloBuffer<...>::Iterator<true>'
+    // > auto it = mb.ReaderAt(1);
+    //
+    // By adding another template level `template <bool NotIsBufferConst =
+    // !IsBufferConst>`, the instantiation is delayed until the function is
+    // actually invoked somewhere, e.g. `it.Poke(...);`.
+    // So at that invocation point, the compiler looks for a "Poke" name in it,
+    // and considers potential template instantiations that could work. The
+    // `enable_if_t` is *now* attempted, with `NotIsBufferConst` taking its
+    // value from `!IsBufferConst`:
+    // - If `IsBufferConst` is false, `NotIsBufferConst` is true,
+    // `enable_if<NotIsBufferConst>` does define a `type` (`void` by default),
+    // so `enable_if_t` happily becomes `void`, the function exists and may be
+    // called.
+    // - Otherwise if `IsBufferConst` is true, `NotIsBufferConst` is false,
+    // `enable_if<NotIsBufferConst>` does *not* define a `type`, therefore
+    // `enable_if_t` produces an error because there is no `type`. Now "SFINAE"
+    // happens: This "Substitution Failure Is Not An Error" (by itself)... But
+    // then, there are no other functions named "Poke" as requested in the
+    // `it.Poke(...);` call, so we are now getting an error (can't find
+    // function), as expected because `it` had `IsBufferConst`==true. (But at
+    // least the compiler waited until this invocation attempt before outputting
+    // an error.)
+    //
+    // C++ is fun!
+
+   public:
+    // These definitions are expected by std functions, to recognize this as an
+    // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+    using difference_type = Index;
+    using value_type = Byte;
+    using pointer = std::conditional_t<IsBufferConst, const Byte*, Byte*>;
+    using reference = std::conditional_t<IsBufferConst, const Byte&, Byte&>;
+    using iterator_category = std::random_access_iterator_tag;
+
+    // Can always copy/assign from the same kind of iterator.
+    Iterator(const Iterator& aRhs) = default;
+    Iterator& operator=(const Iterator& aRhs) = default;
+
+    // Can implicitly copy an Iterator-to-mutable (reader+writer) to
+    // Iterator-to-const (reader-only), but not the reverse.
+    template <bool IsRhsBufferConst,
+              typename = std::enable_if_t<(!IsRhsBufferConst) && IsBufferConst>>
+    MOZ_IMPLICIT Iterator(const Iterator<IsRhsBufferConst>& aRhs)
+        : mModuloBuffer(aRhs.mModuloBuffer), mIndex(aRhs.mIndex) {}
+
+    // Can implicitly assign from an Iterator-to-mutable (reader+writer) to
+    // Iterator-to-const (reader-only), but not the reverse.
+    template <bool IsRhsBufferConst,
+              typename = std::enable_if_t<(!IsRhsBufferConst) && IsBufferConst>>
+    Iterator& operator=(const Iterator<IsRhsBufferConst>& aRhs) {
+      mModuloBuffer = aRhs.mModuloBuffer;
+      mIndex = aRhs.mIndex;
+      return *this;
+    }
+
+    // Current location of the iterator in the `Index` range.
+    // Note that due to wrapping, multiple indices may effectively point at the
+    // same byte in the buffer.
+    Index CurrentIndex() const { return mIndex; }
+
+    // Location comparison in the `Index` range. I.e., two `Iterator`s may look
+    // unequal, but refer to the same buffer location.
+    // Must be on the same buffer.
+    bool operator==(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex == aRhs.mIndex;
+    }
+    bool operator!=(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex != aRhs.mIndex;
+    }
+    bool operator<(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex < aRhs.mIndex;
+    }
+    bool operator<=(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex <= aRhs.mIndex;
+    }
+    bool operator>(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex > aRhs.mIndex;
+    }
+    bool operator>=(const Iterator& aRhs) const {
+      MOZ_ASSERT(mModuloBuffer == aRhs.mModuloBuffer);
+      return mIndex >= aRhs.mIndex;
+    }
+
+    // Movement in the `Index` range.
+    Iterator& operator++() {
+      ++mIndex;
+      return *this;
+    }
+    Iterator operator++(int) {
+      Iterator here(*mModuloBuffer, mIndex);
+      ++mIndex;
+      return here;
+    }
+    Iterator& operator--() {
+      --mIndex;
+      return *this;
+    }
+    Iterator operator--(int) {
+      Iterator here(*mModuloBuffer, mIndex);
+      --mIndex;
+      return here;
+    }
+    Iterator& operator+=(Length aLength) {
+      mIndex += aLength;
+      return *this;
+    }
+    Iterator operator+(Length aLength) const {
+      return Iterator(*mModuloBuffer, mIndex + aLength);
+    }
+    friend Iterator operator+(Length aLength, const Iterator& aIt) {
+      return aIt + aLength;
+    }
+    Iterator& operator-=(Length aLength) {
+      mIndex -= aLength;
+      return *this;
+    }
+    Iterator operator-(Length aLength) const {
+      return Iterator(*mModuloBuffer, mIndex - aLength);
+    }
+
+    // Distance from `aRef` to here in the `Index` range.
+    // May be negative (as 2's complement) if `aRef > *this`.
+    Index operator-(const Iterator& aRef) const {
+      MOZ_ASSERT(mModuloBuffer == aRef.mModuloBuffer);
+      return mIndex - aRef.mIndex;
+    }
+
+    // Dereference a single byte (read-only if `IsBufferConst` is true).
+    reference operator*() const {
+      return mModuloBuffer->mBuffer[OffsetInBuffer()];
+    }
+
+    // Random-access dereference.
+    reference operator[](Length aLength) const { return *(*this + aLength); }
+
+    // Write data (if `IsBufferConst` is false) but don't move iterator.
+    template <bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> Poke(const void* aSrc,
+                                            Length aLength) const {
+      // Don't allow data larger than the buffer.
+      MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+      // Offset inside the buffer (corresponding to our Index).
+      Offset offset = OffsetInBuffer();
+      // Compute remaining bytes between this offset and the end of the buffer.
+      Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+      if (MOZ_LIKELY(remaining >= aLength)) {
+        // Enough space to write everything before the end.
+        memcpy(&mModuloBuffer->mBuffer[offset], aSrc, aLength);
+      } else {
+        // Not enough space. Write as much as possible before the end.
+        memcpy(&mModuloBuffer->mBuffer[offset], aSrc, remaining);
+        // And then continue from the beginning of the buffer.
+        memcpy(&mModuloBuffer->mBuffer[0],
+               static_cast<const Byte*>(aSrc) + remaining,
+               (aLength - remaining));
+      }
+    }
+
+    // Write object data (if `IsBufferConst` is false) but don't move iterator.
+    // Note that this copies bytes from the object, with the intent to read them
+    // back later. Restricted to trivially-copyable types, which support this
+    // without Undefined Behavior!
+    template <typename T, bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> PokeObject(const T& aObject) const {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "PokeObject<T> - T must be trivially copyable");
+      return Poke(&aObject, sizeof(T));
+    }
+
+    // Write data (if `IsBufferConst` is false) and move iterator ahead.
+    template <bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> Write(const void* aSrc, Length aLength) {
+      Poke(aSrc, aLength);
+      mIndex += aLength;
+    }
+
+    // Write object data (if `IsBufferConst` is false) and move iterator ahead.
+    // Note that this copies bytes from the object, with the intent to read them
+    // back later. Restricted to trivially-copyable types, which support this
+    // without Undefined Behavior!
+    template <typename T, bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> WriteObject(const T& aObject) {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "WriteObject<T> - T must be trivially copyable");
+      return Write(&aObject, sizeof(T));
+    }
+
+    // Number of bytes needed to represent `aValue` in unsigned LEB128.
+    template <typename T>
+    static unsigned ULEB128Size(T aValue) {
+      return ::mozilla::ULEB128Size(aValue);
+    }
+
+    // Write number as unsigned LEB128 (if `IsBufferConst` is false) and move
+    // iterator ahead.
+    template <typename T, bool NotIsBufferConst = !IsBufferConst>
+    std::enable_if_t<NotIsBufferConst> WriteULEB128(T aValue) {
+      ::mozilla::WriteULEB128(aValue, *this);
+    }
+
+    // Read data but don't move iterator.
+    void Peek(void* aDst, Length aLength) const {
+      // Don't allow data larger than the buffer.
+      MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+      // Offset inside the buffer (corresponding to our Index).
+      Offset offset = OffsetInBuffer();
+      // Compute remaining bytes between this offset and the end of the buffer.
+      Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+      if (MOZ_LIKELY(remaining >= aLength)) {
+        // Can read everything we need before the end of the buffer.
+        memcpy(aDst, &mModuloBuffer->mBuffer[offset], aLength);
+      } else {
+        // Read as much as possible before the end of the buffer.
+        memcpy(aDst, &mModuloBuffer->mBuffer[offset], remaining);
+        // And then continue from the beginning of the buffer.
+        memcpy(static_cast<Byte*>(aDst) + remaining, &mModuloBuffer->mBuffer[0],
+               (aLength - remaining));
+      }
+    }
+
+    // Read data into an object but don't move iterator.
+    // Note that this overwrites `aObject` with bytes from the buffer.
+    // Restricted to trivially-copyable types, which support this without
+    // Undefined Behavior!
+    template <typename T>
+    void PeekIntoObject(T& aObject) const {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "PeekIntoObject<T> - T must be trivially copyable");
+      Peek(&aObject, sizeof(T));
+    }
+
+    // Read data as an object but don't move iterator.
+    // Note that this creates an default `T` first, and then overwrites it with
+    // bytes from the buffer. Restricted to trivially-copyable types, which
+    // support this without Undefined Behavior!
+    template <typename T>
+    T PeekObject() const {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "PeekObject<T> - T must be trivially copyable");
+      T object;
+      PeekIntoObject(object);
+      return object;
+    }
+
+    // Read data and move iterator ahead.
+    void Read(void* aDst, Length aLength) {
+      Peek(aDst, aLength);
+      mIndex += aLength;
+    }
+
+    // Read data into a mutable iterator and move both iterators ahead.
+    void ReadInto(Iterator</* IsBufferConst */ false>& aDst, Length aLength) {
+      // Don't allow data larger than the buffer.
+      MOZ_ASSERT(aLength <= mModuloBuffer->BufferLength().Value());
+      MOZ_ASSERT(aLength <= aDst.mModuloBuffer->BufferLength().Value());
+      // Offset inside the buffer (corresponding to our Index).
+      Offset offset = OffsetInBuffer();
+      // Compute remaining bytes between this offset and the end of the buffer.
+      Length remaining = mModuloBuffer->BufferLength().Value() - offset;
+      if (MOZ_LIKELY(remaining >= aLength)) {
+        // Can read everything we need before the end of the buffer.
+        aDst.Write(&mModuloBuffer->mBuffer[offset], aLength);
+      } else {
+        // Read as much as possible before the end of the buffer.
+        aDst.Write(&mModuloBuffer->mBuffer[offset], remaining);
+        // And then continue from the beginning of the buffer.
+        aDst.Write(&mModuloBuffer->mBuffer[0], (aLength - remaining));
+      }
+      mIndex += aLength;
+    }
+
+    // Read data into an object and move iterator ahead.
+    // Note that this overwrites `aObject` with bytes from the buffer.
+    // Restricted to trivially-copyable types, which support this without
+    // Undefined Behavior!
+    template <typename T>
+    void ReadIntoObject(T& aObject) {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "ReadIntoObject<T> - T must be trivially copyable");
+      Read(&aObject, sizeof(T));
+    }
+
+    // Read data as an object and move iterator ahead.
+    // Note that this creates an default `T` first, and then overwrites it with
+    // bytes from the buffer. Restricted to trivially-copyable types, which
+    // support this without Undefined Behavior!
+    template <typename T>
+    T ReadObject() {
+      static_assert(std::is_trivially_copyable<T>::value,
+                    "ReadObject<T> - T must be trivially copyable");
+      T object;
+      ReadIntoObject(object);
+      return object;
+    }
+
+    // Read an unsigned LEB128 number and move iterator ahead.
+    template <typename T>
+    T ReadULEB128() {
+      return ::mozilla::ReadULEB128<T>(*this);
+    }
+
+   private:
+    // Only a ModuloBuffer can instantiate its iterator.
+    friend class ModuloBuffer;
+
+    Iterator(ConstOrMutableBuffer& aBuffer, Index aIndex)
+        : mModuloBuffer(WrapNotNull(&aBuffer)), mIndex(aIndex) {}
+
+    // Convert the Iterator's mIndex into an offset inside the byte buffer.
+    Offset OffsetInBuffer() const {
+      return static_cast<Offset>(mIndex) & mModuloBuffer->mMask;
+    }
+
+    // ModuloBuffer that this Iterator operates on.
+    // Using a non-null pointer instead of a reference, to allow re-assignment
+    // of an Iterator variable.
+    NotNull<ConstOrMutableBuffer*> mModuloBuffer;
+
+    // Position of this iterator in the wider `Index` range. (Will be wrapped
+    // around as needed when actually accessing bytes from the buffer.)
+    Index mIndex;
+  };
+
+  // Shortcut to iterator to const (read-only) data.
+  using Reader = Iterator<true>;
+  // Shortcut to iterator to non-const (read/write) data.
+  using Writer = Iterator<false>;
+
+  // Create an iterator to const data at the given index.
+  Reader ReaderAt(Index aIndex) const { return Reader(*this, aIndex); }
+
+  // Create an iterator to non-const data at the given index.
+  Writer WriterAt(Index aIndex) { return Writer(*this, aIndex); }
+
+#ifdef DEBUG
+  void Dump() const {
+    Length len = BufferLength().Value();
+    if (len > 128) {
+      len = 128;
+    }
+    for (Length i = 0; i < len; ++i) {
+      printf("%02x ", mBuffer[i]);
+    }
+    printf("\n");
+  }
+#endif  // DEBUG
+
+ private:
+  // Mask used to convert an index to an offset in `mBuffer`
+  const PowerOfTwoMask<Offset> mMask;
+
+  // Buffer data. `const NotNull<...>` shows that `mBuffer is `const`, and
+  // `Byte* const` shows that the pointer cannot be changed to point at
+  // something else, but the pointed-at `Byte`s are writable.
+  const NotNull<Byte* const> mBuffer;
+
+  // Function used to release the buffer resource (if needed).
+  std::function<void(Byte*)> mBufferDeleter;
+};
+
+}  // namespace mozilla
+
+#endif  // ModuloBuffer_h
diff --git a/mozglue/baseprofiler/public/PowerOfTwo.h b/mozglue/baseprofiler/public/PowerOfTwo.h
new file mode 100644
index 0000000000..7d396c15e6
--- /dev/null
+++ b/mozglue/baseprofiler/public/PowerOfTwo.h
@@ -0,0 +1,322 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// PowerOfTwo is a value type that always hold a power of 2.
+// It has the same size as their underlying unsigned type, but offer the
+// guarantee of being a power of 2, which permits some optimizations when
+// involved in modulo operations (using masking instead of actual modulo).
+//
+// PowerOfTwoMask contains a mask corresponding to a power of 2.
+// E.g., 2^8 is 256 or 0x100, the corresponding mask is 2^8-1 or 255 or 0xFF.
+// It should be used instead of PowerOfTwo in situations where most operations
+// would be modulo, this saves having to recompute the mask from the stored
+// power of 2.
+//
+// One common use would be for ring-buffer containers with a power-of-2 size,
+// where an index is usually converted to an in-buffer offset by `i % size`.
+// Instead, the container could store a PowerOfTwo or PowerOfTwoMask, and do
+// `i % p2` or `i & p2m`, which is more efficient than for arbitrary sizes.
+//
+// Shortcuts for common 32- and 64-bit values: PowerOfTwo32, etc.
+//
+// To create constexpr constants, use MakePowerOfTwo<Type, Value>(), etc.
+
+#ifndef PowerOfTwo_h
+#define PowerOfTwo_h
+
+#include "mozilla/MathAlgorithms.h"
+
+#include <limits>
+
+namespace mozilla {
+
+// Compute the smallest power of 2 greater than or equal to aInput, except if
+// that would overflow in which case the highest possible power of 2 if chosen.
+// 0->1, 1->1, 2->2, 3->4, ... 2^31->2^31, 2^31+1->2^31 (for uint32_t), etc.
+template <typename T>
+T FriendlyRoundUpPow2(T aInput) {
+  // This is the same code as `RoundUpPow2()`, except we handle any type (that
+  // CeilingLog2 supports) and allow the greater-than-max-power case.
+  constexpr T max = T(1) << (sizeof(T) * CHAR_BIT - 1);
+  if (aInput >= max) {
+    return max;
+  }
+  return T(1) << CeilingLog2(aInput);
+}
+
+namespace detail {
+// Same function name `CountLeadingZeroes` with uint32_t and uint64_t overloads.
+inline uint_fast8_t CountLeadingZeroes(uint32_t aValue) {
+  MOZ_ASSERT(aValue != 0);
+  return detail::CountLeadingZeroes32(aValue);
+}
+inline uint_fast8_t CountLeadingZeroes(uint64_t aValue) {
+  MOZ_ASSERT(aValue != 0);
+  return detail::CountLeadingZeroes64(aValue);
+}
+// Refuse anything else.
+template <typename T>
+inline uint_fast8_t CountLeadingZeroes(T aValue) = delete;
+}  // namespace detail
+
+// Compute the smallest 2^N-1 mask where aInput can fit.
+// I.e., `x & mask == x`, but `x & (mask >> 1) != x`.
+// Or looking at binary, we want a mask with as many leading zeroes as the
+// input, by right-shifting a full mask: (8-bit examples)
+// input:          00000000    00000001   00000010  00010110  01111111 10000000
+// N leading 0s:   ^^^^^^^^ 8  ^^^^^^^ 7  ^^^^^^ 6  ^^^ 3     ^ 1      0
+// full mask:      11111111    11111111   11111111  11111111  11111111 11111111
+// full mask >> N: 00000000    00000001   00000011  00011111  01111111 11111111
+template <typename T>
+T RoundUpPow2Mask(T aInput) {
+  // Special case, as CountLeadingZeroes(0) is undefined. (And even if that was
+  // defined, shifting by the full type size is also undefined!)
+  if (aInput == 0) {
+    return 0;
+  }
+  return T(-1) >> detail::CountLeadingZeroes(aInput);
+}
+
+template <typename T>
+class PowerOfTwoMask;
+
+template <typename T, T Mask>
+constexpr PowerOfTwoMask<T> MakePowerOfTwoMask();
+
+template <typename T>
+class PowerOfTwo;
+
+template <typename T, T Value>
+constexpr PowerOfTwo<T> MakePowerOfTwo();
+
+// PowerOfTwoMask will always contain a mask for a power of 2, which is useful
+// for power-of-2 modulo operations (e.g., to keep an index inside a power-of-2
+// container).
+// Use this instead of PowerOfTwo if masking is the primary use of the value.
+//
+// Note that this class can store a "full" mask where all bits are set, so it
+// works for mask corresponding to the power of 2 that would overflow `T`
+// (e.g., 2^32 for uint32_t gives a mask of 2^32-1, which fits in a uint32_t).
+// For this reason there is no API that computes the power of 2 corresponding to
+// the mask; But this can be done explicitly with `MaskValue() + 1`, which may
+// be useful for computing things like distance-to-the-end by doing
+// `MaskValue() + 1 - offset`, which works fine with unsigned number types.
+template <typename T>
+class PowerOfTwoMask {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "PowerOfTwoMask must use an unsigned type");
+
+ public:
+  // Construct a power of 2 mask where the given value can fit.
+  // Cannot be constexpr because of `RoundUpPow2Mask()`.
+  explicit PowerOfTwoMask(T aInput) : mMask(RoundUpPow2Mask(aInput)) {}
+
+  // Compute the mask corresponding to a PowerOfTwo.
+  // This saves having to compute the nearest 2^N-1.
+  // Not a conversion constructor, as that could be ambiguous whether we'd want
+  // the mask corresponding to the power of 2 (2^N -> 2^N-1), or the mask that
+  // can *contain* the PowerOfTwo value (2^N -> 2^(N+1)-1).
+  // Note: Not offering reverse PowerOfTwoMark-to-PowerOfTwo conversion, because
+  // that could result in an unexpected 0 result for the largest possible mask.
+  template <typename U>
+  static constexpr PowerOfTwoMask<U> MaskForPowerOfTwo(
+      const PowerOfTwo<U>& aP2) {
+    return PowerOfTwoMask(aP2);
+  }
+
+  // Allow smaller unsigned types as input.
+  // Bigger or signed types must be explicitly converted by the caller.
+  template <typename U>
+  explicit constexpr PowerOfTwoMask(U aInput)
+      : mMask(RoundUpPow2Mask(static_cast<T>(aInput))) {
+    static_assert(!std::numeric_limits<T>::is_signed,
+                  "PowerOfTwoMask does not accept signed types");
+    static_assert(sizeof(U) <= sizeof(T),
+                  "PowerOfTwoMask does not accept bigger types");
+  }
+
+  constexpr T MaskValue() const { return mMask; }
+
+  // `x & aPowerOfTwoMask` just works.
+  template <typename U>
+  friend U operator&(U aNumber, PowerOfTwoMask aP2M) {
+    return static_cast<U>(aNumber & aP2M.MaskValue());
+  }
+
+  // `aPowerOfTwoMask & x` just works.
+  template <typename U>
+  friend constexpr U operator&(PowerOfTwoMask aP2M, U aNumber) {
+    return static_cast<U>(aP2M.MaskValue() & aNumber);
+  }
+
+  // `x % aPowerOfTwoMask(2^N-1)` is equivalent to `x % 2^N` but is more
+  // optimal by doing `x & (2^N-1)`.
+  // Useful for templated code doing modulo with a template argument type.
+  template <typename U>
+  friend constexpr U operator%(U aNumerator, PowerOfTwoMask aDenominator) {
+    return aNumerator & aDenominator.MaskValue();
+  }
+
+  constexpr bool operator==(const PowerOfTwoMask& aRhs) const {
+    return mMask == aRhs.mMask;
+  }
+  constexpr bool operator!=(const PowerOfTwoMask& aRhs) const {
+    return mMask != aRhs.mMask;
+  }
+
+ private:
+  // Trust `PowerOfTwo` to call the private Trusted constructor below.
+  friend class PowerOfTwo<T>;
+
+  // Trust `MakePowerOfTwoMask()` to call the private Trusted constructor below.
+  template <typename U, U Mask>
+  friend constexpr PowerOfTwoMask<U> MakePowerOfTwoMask();
+
+  struct Trusted {
+    T mMask;
+  };
+  // Construct the mask corresponding to a PowerOfTwo.
+  // This saves having to compute the nearest 2^N-1.
+  // Note: Not a public PowerOfTwo->PowerOfTwoMask conversion constructor, as
+  // that could be ambiguous whether we'd want the mask corresponding to the
+  // power of 2 (2^N -> 2^N-1), or the mask that can *contain* the PowerOfTwo
+  // value (2^N -> 2^(N+1)-1).
+  explicit constexpr PowerOfTwoMask(const Trusted& aP2) : mMask(aP2.mMask) {}
+
+  T mMask = 0;
+};
+
+// Make a PowerOfTwoMask constant, statically-checked.
+template <typename T, T Mask>
+constexpr PowerOfTwoMask<T> MakePowerOfTwoMask() {
+  static_assert(Mask == T(-1) || IsPowerOfTwo(Mask + 1),
+                "MakePowerOfTwoMask<T, Mask>: Mask must be 2^N-1");
+  using Trusted = typename PowerOfTwoMask<T>::Trusted;
+  return PowerOfTwoMask<T>(Trusted{Mask});
+}
+
+// PowerOfTwo will always contain a power of 2.
+template <typename T>
+class PowerOfTwo {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "PowerOfTwo must use an unsigned type");
+
+ public:
+  // Construct a power of 2 that can fit the given value, or the highest power
+  // of 2 possible.
+  // Caller should explicitly check/assert `Value() <= aInput` if they want to.
+  // Cannot be constexpr because of `FriendlyRoundUpPow2()`.
+  explicit PowerOfTwo(T aInput) : mValue(FriendlyRoundUpPow2(aInput)) {}
+
+  // Allow smaller unsigned types as input.
+  // Bigger or signed types must be explicitly converted by the caller.
+  template <typename U>
+  explicit PowerOfTwo(U aInput)
+      : mValue(FriendlyRoundUpPow2(static_cast<T>(aInput))) {
+    static_assert(!std::numeric_limits<T>::is_signed,
+                  "PowerOfTwo does not accept signed types");
+    static_assert(sizeof(U) <= sizeof(T),
+                  "PowerOfTwo does not accept bigger types");
+  }
+
+  constexpr T Value() const { return mValue; }
+
+  // Binary mask corresponding to the power of 2, useful for modulo.
+  // E.g., `x & powerOfTwo(y).Mask()` == `x % powerOfTwo(y)`.
+  // Consider PowerOfTwoMask class instead of PowerOfTwo if masking is the
+  // primary use case.
+  constexpr T MaskValue() const { return mValue - 1; }
+
+  // PowerOfTwoMask corresponding to this power of 2, useful for modulo.
+  constexpr PowerOfTwoMask<T> Mask() const {
+    using Trusted = typename PowerOfTwoMask<T>::Trusted;
+    return PowerOfTwoMask<T>(Trusted{MaskValue()});
+  }
+
+  // `x % aPowerOfTwo` works optimally.
+  // Useful for templated code doing modulo with a template argument type.
+  // Use PowerOfTwoMask class instead if masking is the primary use case.
+  template <typename U>
+  friend constexpr U operator%(U aNumerator, PowerOfTwo aDenominator) {
+    return aNumerator & aDenominator.MaskValue();
+  }
+
+  constexpr bool operator==(const PowerOfTwo& aRhs) const {
+    return mValue == aRhs.mValue;
+  }
+  constexpr bool operator!=(const PowerOfTwo& aRhs) const {
+    return mValue != aRhs.mValue;
+  }
+  constexpr bool operator<(const PowerOfTwo& aRhs) const {
+    return mValue < aRhs.mValue;
+  }
+  constexpr bool operator<=(const PowerOfTwo& aRhs) const {
+    return mValue <= aRhs.mValue;
+  }
+  constexpr bool operator>(const PowerOfTwo& aRhs) const {
+    return mValue > aRhs.mValue;
+  }
+  constexpr bool operator>=(const PowerOfTwo& aRhs) const {
+    return mValue >= aRhs.mValue;
+  }
+
+ private:
+  // Trust `MakePowerOfTwo()` to call the private Trusted constructor below.
+  template <typename U, U Value>
+  friend constexpr PowerOfTwo<U> MakePowerOfTwo();
+
+  struct Trusted {
+    T mValue;
+  };
+  // Construct a PowerOfTwo with the given trusted value.
+  // This saves having to compute the nearest 2^N.
+  // Note: Not offering PowerOfTwoMark-to-PowerOfTwo conversion, because that
+  // could result in an unexpected 0 result for the largest possible mask.
+  explicit constexpr PowerOfTwo(const Trusted& aP2) : mValue(aP2.mValue) {}
+
+  // The smallest power of 2 is 2^0 == 1.
+  T mValue = 1;
+};
+
+// Make a PowerOfTwo constant, statically-checked.
+template <typename T, T Value>
+constexpr PowerOfTwo<T> MakePowerOfTwo() {
+  static_assert(IsPowerOfTwo(Value),
+                "MakePowerOfTwo<T, Value>: Value must be 2^N");
+  using Trusted = typename PowerOfTwo<T>::Trusted;
+  return PowerOfTwo<T>(Trusted{Value});
+}
+
+// Shortcuts for the most common types and functions.
+
+using PowerOfTwoMask32 = PowerOfTwoMask<uint32_t>;
+using PowerOfTwo32 = PowerOfTwo<uint32_t>;
+using PowerOfTwoMask64 = PowerOfTwoMask<uint64_t>;
+using PowerOfTwo64 = PowerOfTwo<uint64_t>;
+
+template <uint32_t Mask>
+constexpr PowerOfTwoMask32 MakePowerOfTwoMask32() {
+  return MakePowerOfTwoMask<uint32_t, Mask>();
+}
+
+template <uint32_t Value>
+constexpr PowerOfTwo32 MakePowerOfTwo32() {
+  return MakePowerOfTwo<uint32_t, Value>();
+}
+
+template <uint64_t Mask>
+constexpr PowerOfTwoMask64 MakePowerOfTwoMask64() {
+  return MakePowerOfTwoMask<uint64_t, Mask>();
+}
+
+template <uint64_t Value>
+constexpr PowerOfTwo64 MakePowerOfTwo64() {
+  return MakePowerOfTwo<uint64_t, Value>();
+}
+
+}  // namespace mozilla
+
+#endif  // PowerOfTwo_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunk.h b/mozglue/baseprofiler/public/ProfileBufferChunk.h
new file mode 100644
index 0000000000..24a516bcaf
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunk.h
@@ -0,0 +1,543 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunk_h
+#define ProfileBufferChunk_h
+
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/ProfileBufferIndex.h"
+#include "mozilla/Span.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#if defined(MOZ_MEMORY)
+#  include "mozmemory.h"
+#endif
+
+#include <algorithm>
+#include <limits>
+#include <type_traits>
+
+#ifdef DEBUG
+#  include <cstdio>
+#endif
+
+namespace mozilla {
+
+// Represents a single chunk of memory, with a link to the next chunk (or null).
+//
+// A chunk is made of an internal header (which contains a public part) followed
+// by user-accessible bytes.
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private |         memory containing user blocks        |
+// +---------------+---------+----------------------------------------------+
+//                           <---------------BufferBytes()------------------>
+// <------------------------------ChunkBytes()------------------------------>
+//
+// The chunk can reserve "blocks", but doesn't know the internal contents of
+// each block, it only knows where the first one starts, and where the last one
+// ends (which is where the next one will begin, if not already out of range).
+// It is up to the user to add structure to each block so that they can be
+// distinguished when later read.
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private |      [1st block]...[last full block]         |
+// +---------------+---------+----------------------------------------------+
+//  ChunkHeader().mOffsetFirstBlock ^                             ^
+//                           ChunkHeader().mOffsetPastLastBlock --'
+//
+// It is possible to attempt to reserve more than the remaining space, in which
+// case only what is available is returned. The caller is responsible for using
+// another chunk, reserving a block "tail" in it, and using both parts to
+// constitute a full block. (This initial tail may be empty in some chunks.)
+//
+// +---------------+---------+----------------------------------------------+
+// | public Header | private | tail][1st block]...[last full block][head... |
+// +---------------+---------+----------------------------------------------+
+//  ChunkHeader().mOffsetFirstBlock ^                                       ^
+//                                     ChunkHeader().mOffsetPastLastBlock --'
+//
+// Each Chunk has an internal state (checked in DEBUG builds) that directs how
+// to use it during creation, initialization, use, end of life, recycling, and
+// destruction. See `State` below for details.
+// In particular:
+// - `ReserveInitialBlockAsTail()` must be called before the first `Reserve()`
+//   after construction or recycling, even with a size of 0 (no actual tail),
+// - `MarkDone()` and `MarkRecycled()` must be called as appropriate.
+class ProfileBufferChunk {
+ public:
+  using Byte = uint8_t;
+  using Length = uint32_t;
+
+  using SpanOfBytes = Span<Byte>;
+
+  // Hint about the size of the metadata (public and private headers).
+  // `Create()` below takes the minimum *buffer* size, so the minimum total
+  // Chunk size is at least `SizeofChunkMetadata() + aMinBufferBytes`.
+  [[nodiscard]] static constexpr Length SizeofChunkMetadata() {
+    return static_cast<Length>(sizeof(InternalHeader));
+  }
+
+  // Allocate space for a chunk with a given minimum size, and construct it.
+  // The actual size may be higher, to match the actual space taken in the
+  // memory pool.
+  [[nodiscard]] static UniquePtr<ProfileBufferChunk> Create(
+      Length aMinBufferBytes) {
+    // We need at least one byte, to cover the always-present `mBuffer` byte.
+    aMinBufferBytes = std::max(aMinBufferBytes, Length(1));
+    // Trivial struct with the same alignment as `ProfileBufferChunk`, and size
+    // equal to that alignment, because typically the sizeof of an object is
+    // a multiple of its alignment.
+    struct alignas(alignof(InternalHeader)) ChunkStruct {
+      Byte c[alignof(InternalHeader)];
+    };
+    static_assert(std::is_trivial_v<ChunkStruct>,
+                  "ChunkStruct must be trivial to avoid any construction");
+    // Allocate an array of that struct, enough to contain the expected
+    // `ProfileBufferChunk` (with its header+buffer).
+    size_t count = (sizeof(InternalHeader) + aMinBufferBytes +
+                    (alignof(InternalHeader) - 1)) /
+                   alignof(InternalHeader);
+#if defined(MOZ_MEMORY)
+    // Potentially expand the array to use more of the effective allocation.
+    count = (malloc_good_size(count * sizeof(ChunkStruct)) +
+             (sizeof(ChunkStruct) - 1)) /
+            sizeof(ChunkStruct);
+#endif
+    auto chunkStorage = MakeUnique<ChunkStruct[]>(count);
+    MOZ_ASSERT(reinterpret_cast<uintptr_t>(chunkStorage.get()) %
+                   alignof(InternalHeader) ==
+               0);
+    // After the allocation, compute the actual chunk size (including header).
+    const size_t chunkBytes = count * sizeof(ChunkStruct);
+    MOZ_ASSERT(chunkBytes >= sizeof(ProfileBufferChunk),
+               "Not enough space to construct a ProfileBufferChunk");
+    MOZ_ASSERT(chunkBytes <=
+               static_cast<size_t>(std::numeric_limits<Length>::max()));
+    // Compute the size of the user-accessible buffer inside the chunk.
+    const Length bufferBytes =
+        static_cast<Length>(chunkBytes - sizeof(InternalHeader));
+    MOZ_ASSERT(bufferBytes >= aMinBufferBytes,
+               "Not enough space for minimum buffer size");
+    // Construct the header at the beginning of the allocated array, with the
+    // known buffer size.
+    new (chunkStorage.get()) ProfileBufferChunk(bufferBytes);
+    // We now have a proper `ProfileBufferChunk` object, create the appropriate
+    // UniquePtr for it.
+    UniquePtr<ProfileBufferChunk> chunk{
+        reinterpret_cast<ProfileBufferChunk*>(chunkStorage.release())};
+    MOZ_ASSERT(
+        size_t(reinterpret_cast<const char*>(
+                   &chunk.get()->BufferSpan()[bufferBytes - 1]) -
+               reinterpret_cast<const char*>(chunk.get())) == chunkBytes - 1,
+        "Buffer span spills out of chunk allocation");
+    return chunk;
+  }
+
+#ifdef DEBUG
+  ~ProfileBufferChunk() {
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+               mInternalHeader.mState == InternalHeader::State::Done ||
+               mInternalHeader.mState == InternalHeader::State::Recycled);
+  }
+#endif
+
+  // Must be called with the first block tail (may be empty), which will be
+  // skipped if the reader starts with this ProfileBufferChunk.
+  [[nodiscard]] SpanOfBytes ReserveInitialBlockAsTail(Length aTailSize) {
+#ifdef DEBUG
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+               mInternalHeader.mState == InternalHeader::State::Recycled);
+    mInternalHeader.mState = InternalHeader::State::InUse;
+#endif
+    mInternalHeader.mHeader.mOffsetFirstBlock = aTailSize;
+    mInternalHeader.mHeader.mOffsetPastLastBlock = aTailSize;
+    return SpanOfBytes(&mBuffer, aTailSize);
+  }
+
+  struct ReserveReturn {
+    SpanOfBytes mSpan;
+    ProfileBufferBlockIndex mBlockRangeIndex;
+  };
+
+  // Reserve a block of up to `aBlockSize` bytes, and return a Span to it, and
+  // its starting index. The actual size may be smaller, if the block cannot fit
+  // in the remaining space.
+  [[nodiscard]] ReserveReturn ReserveBlock(Length aBlockSize) {
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Created);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Recycled);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::InUse);
+    MOZ_ASSERT(RangeStart() != 0,
+               "Expected valid range start before first Reserve()");
+    const Length blockOffset = mInternalHeader.mHeader.mOffsetPastLastBlock;
+    Length reservedSize = aBlockSize;
+    if (MOZ_UNLIKELY(aBlockSize >= RemainingBytes())) {
+      reservedSize = RemainingBytes();
+#ifdef DEBUG
+      mInternalHeader.mState = InternalHeader::State::Full;
+#endif
+    }
+    mInternalHeader.mHeader.mOffsetPastLastBlock += reservedSize;
+    mInternalHeader.mHeader.mBlockCount += 1;
+    return {SpanOfBytes(&mBuffer + blockOffset, reservedSize),
+            ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                mInternalHeader.mHeader.mRangeStart + blockOffset)};
+  }
+
+  // When a chunk will not be used to store more blocks (because it is full, or
+  // because the profiler will not add more data), it should be marked "done".
+  // Access to its content is still allowed.
+  void MarkDone() {
+#ifdef DEBUG
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Created);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Done);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Recycled);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::InUse ||
+               mInternalHeader.mState == InternalHeader::State::Full);
+    mInternalHeader.mState = InternalHeader::State::Done;
+#endif
+    mInternalHeader.mHeader.mDoneTimeStamp = TimeStamp::NowUnfuzzed();
+  }
+
+  // A "Done" chunk may be recycled, to avoid allocating a new one.
+  void MarkRecycled() {
+#ifdef DEBUG
+    // We also allow Created and already-Recycled chunks to be recycled, this
+    // way it's easier to recycle chunks when their state is not easily
+    // trackable.
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::InUse);
+    MOZ_ASSERT(mInternalHeader.mState != InternalHeader::State::Full);
+    MOZ_ASSERT(mInternalHeader.mState == InternalHeader::State::Created ||
+               mInternalHeader.mState == InternalHeader::State::Done ||
+               mInternalHeader.mState == InternalHeader::State::Recycled);
+    mInternalHeader.mState = InternalHeader::State::Recycled;
+#endif
+    // Reset all header fields, in case this recycled chunk gets read.
+    mInternalHeader.mHeader.Reset();
+  }
+
+  // Public header, meant to uniquely identify a chunk, it may be shared with
+  // other processes to coordinate global memory handling.
+  struct Header {
+    explicit Header(Length aBufferBytes) : mBufferBytes(aBufferBytes) {}
+
+    // Reset all members to their as-new values (apart from the buffer size,
+    // which cannot change), ready for re-use.
+    void Reset() {
+      mOffsetFirstBlock = 0;
+      mOffsetPastLastBlock = 0;
+      mDoneTimeStamp = TimeStamp{};
+      mBlockCount = 0;
+      mRangeStart = 0;
+      mProcessId = 0;
+    }
+
+    // Note: Part of the ordering of members below is to avoid unnecessary
+    // padding.
+
+    // Members managed by the ProfileBufferChunk.
+
+    // Offset of the first block (past the initial tail block, which may be 0).
+    Length mOffsetFirstBlock = 0;
+    // Offset past the last byte of the last reserved block
+    // It may be past mBufferBytes when last block continues in the next
+    // ProfileBufferChunk. It may be before mBufferBytes if ProfileBufferChunk
+    // is marked "Done" before the end is reached.
+    Length mOffsetPastLastBlock = 0;
+    // Timestamp when buffer is "Done" (which happens when the last block is
+    // written). This will be used to find and discard the oldest
+    // ProfileBufferChunk.
+    TimeStamp mDoneTimeStamp;
+    // Number of bytes in the buffer, set once at construction time.
+    const Length mBufferBytes;
+    // Number of reserved blocks (including final one even if partial, but
+    // excluding initial tail).
+    Length mBlockCount = 0;
+
+    // Meta-data set by the user.
+
+    // Index of the first byte of this ProfileBufferChunk, relative to all
+    // Chunks for this process. Index 0 is reserved as nullptr-like index,
+    // mRangeStart should be set to a non-0 value before the first `Reserve()`.
+    ProfileBufferIndex mRangeStart = 0;
+    // Process writing to this ProfileBufferChunk.
+    int mProcessId = 0;
+
+    // A bit of spare space (necessary here because of the alignment due to
+    // other members), may be later repurposed for extra data.
+    const int mPADDING = 0;
+  };
+
+  [[nodiscard]] const Header& ChunkHeader() const {
+    return mInternalHeader.mHeader;
+  }
+
+  [[nodiscard]] Length BufferBytes() const {
+    return ChunkHeader().mBufferBytes;
+  }
+
+  // Total size of the chunk (buffer + header).
+  [[nodiscard]] Length ChunkBytes() const {
+    return static_cast<Length>(sizeof(InternalHeader)) + BufferBytes();
+  }
+
+  // Size of external resources, in this case all the following chunks.
+  [[nodiscard]] size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    const ProfileBufferChunk* const next = GetNext();
+    return next ? next->SizeOfIncludingThis(aMallocSizeOf) : 0;
+  }
+
+  // Size of this chunk and all following ones.
+  [[nodiscard]] size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    // Just in case `aMallocSizeOf` falls back on just `sizeof`, make sure we
+    // account for at least the actual Chunk requested allocation size.
+    return std::max<size_t>(aMallocSizeOf(this), ChunkBytes()) +
+           SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+  [[nodiscard]] Length RemainingBytes() const {
+    return BufferBytes() - OffsetPastLastBlock();
+  }
+
+  [[nodiscard]] Length OffsetFirstBlock() const {
+    return ChunkHeader().mOffsetFirstBlock;
+  }
+
+  [[nodiscard]] Length OffsetPastLastBlock() const {
+    return ChunkHeader().mOffsetPastLastBlock;
+  }
+
+  [[nodiscard]] Length BlockCount() const { return ChunkHeader().mBlockCount; }
+
+  [[nodiscard]] int ProcessId() const { return ChunkHeader().mProcessId; }
+
+  void SetProcessId(int aProcessId) {
+    mInternalHeader.mHeader.mProcessId = aProcessId;
+  }
+
+  // Global range index at the start of this Chunk.
+  [[nodiscard]] ProfileBufferIndex RangeStart() const {
+    return ChunkHeader().mRangeStart;
+  }
+
+  void SetRangeStart(ProfileBufferIndex aRangeStart) {
+    mInternalHeader.mHeader.mRangeStart = aRangeStart;
+  }
+
+  // Get a read-only Span to the buffer. It is up to the caller to decypher the
+  // contents, based on known offsets and the internal block structure.
+  [[nodiscard]] Span<const Byte> BufferSpan() const {
+    return Span<const Byte>(&mBuffer, BufferBytes());
+  }
+
+  [[nodiscard]] Byte ByteAt(Length aOffset) const {
+    MOZ_ASSERT(aOffset < OffsetPastLastBlock());
+    return *(&mBuffer + aOffset);
+  }
+
+  [[nodiscard]] ProfileBufferChunk* GetNext() {
+    return mInternalHeader.mNext.get();
+  }
+  [[nodiscard]] const ProfileBufferChunk* GetNext() const {
+    return mInternalHeader.mNext.get();
+  }
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> ReleaseNext() {
+    return std::move(mInternalHeader.mNext);
+  }
+
+  void InsertNext(UniquePtr<ProfileBufferChunk>&& aChunk) {
+    if (!aChunk) {
+      return;
+    }
+    aChunk->SetLast(ReleaseNext());
+    mInternalHeader.mNext = std::move(aChunk);
+  }
+
+  // Find the last chunk in this chain (it may be `this`).
+  [[nodiscard]] ProfileBufferChunk* Last() {
+    ProfileBufferChunk* chunk = this;
+    for (;;) {
+      ProfileBufferChunk* next = chunk->GetNext();
+      if (!next) {
+        return chunk;
+      }
+      chunk = next;
+    }
+  }
+  [[nodiscard]] const ProfileBufferChunk* Last() const {
+    const ProfileBufferChunk* chunk = this;
+    for (;;) {
+      const ProfileBufferChunk* next = chunk->GetNext();
+      if (!next) {
+        return chunk;
+      }
+      chunk = next;
+    }
+  }
+
+  void SetLast(UniquePtr<ProfileBufferChunk>&& aChunk) {
+    if (!aChunk) {
+      return;
+    }
+    Last()->mInternalHeader.mNext = std::move(aChunk);
+  }
+
+  // Join two possibly-null chunk lists.
+  [[nodiscard]] static UniquePtr<ProfileBufferChunk> Join(
+      UniquePtr<ProfileBufferChunk>&& aFirst,
+      UniquePtr<ProfileBufferChunk>&& aLast) {
+    if (aFirst) {
+      aFirst->SetLast(std::move(aLast));
+      return std::move(aFirst);
+    }
+    return std::move(aLast);
+  }
+
+#ifdef DEBUG
+  void Dump(std::FILE* aFile = stdout) const {
+    fprintf(aFile,
+            "Chunk[%p] chunkSize=%u bufferSize=%u state=%s rangeStart=%u "
+            "firstBlockOffset=%u offsetPastLastBlock=%u blockCount=%u",
+            this, unsigned(ChunkBytes()), unsigned(BufferBytes()),
+            mInternalHeader.StateString(), unsigned(RangeStart()),
+            unsigned(OffsetFirstBlock()), unsigned(OffsetPastLastBlock()),
+            unsigned(BlockCount()));
+    const auto len = OffsetPastLastBlock();
+    constexpr unsigned columns = 16;
+    unsigned char ascii[columns + 1];
+    ascii[columns] = '\0';
+    for (Length i = 0; i < len; ++i) {
+      if (i % columns == 0) {
+        fprintf(aFile, "\n  %4u=0x%03x:", unsigned(i), unsigned(i));
+        for (unsigned a = 0; a < columns; ++a) {
+          ascii[a] = ' ';
+        }
+      }
+      unsigned char sep = ' ';
+      if (i == OffsetFirstBlock()) {
+        if (i == OffsetPastLastBlock()) {
+          sep = '#';
+        } else {
+          sep = '[';
+        }
+      } else if (i == OffsetPastLastBlock()) {
+        sep = ']';
+      }
+      unsigned char c = *(&mBuffer + i);
+      fprintf(aFile, "%c%02x", sep, c);
+
+      if (i == len - 1) {
+        if (i + 1 == OffsetPastLastBlock()) {
+          // Special case when last block ends right at the end.
+          fprintf(aFile, "]");
+        } else {
+          fprintf(aFile, " ");
+        }
+      } else if (i % columns == columns - 1) {
+        fprintf(aFile, " ");
+      }
+
+      ascii[i % columns] = (c >= ' ' && c <= '~') ? c : '.';
+
+      if (i % columns == columns - 1) {
+        fprintf(aFile, " %s", ascii);
+      }
+    }
+
+    if (len % columns < columns - 1) {
+      for (Length i = len % columns; i < columns; ++i) {
+        fprintf(aFile, "   ");
+      }
+      fprintf(aFile, " %s", ascii);
+    }
+
+    fprintf(aFile, "\n");
+  }
+#endif  // DEBUG
+
+ private:
+  // ProfileBufferChunk constructor. Use static `Create()` to allocate and
+  // construct a ProfileBufferChunk.
+  explicit ProfileBufferChunk(Length aBufferBytes)
+      : mInternalHeader(aBufferBytes) {}
+
+  // This internal header starts with the public `Header`, and adds some data
+  // only necessary for local handling.
+  // This encapsulation is also necessary to perform placement-new in
+  // `Create()`.
+  struct InternalHeader {
+    explicit InternalHeader(Length aBufferBytes) : mHeader(aBufferBytes) {}
+
+    Header mHeader;
+    UniquePtr<ProfileBufferChunk> mNext;
+
+#ifdef DEBUG
+    enum class State {
+      Created,  // Self-set. Just constructed, waiting for initial block tail.
+      InUse,    // Ready to accept blocks.
+      Full,     // Self-set. Blocks reach the end (or further).
+      Done,     // Blocks won't be added anymore.
+      Recycled  // Still full of data, but expecting an initial block tail.
+    };
+
+    State mState = State::Created;
+    // Transition table: (X=unexpected)
+    // Method          \  State   Created  InUse    Full     Done     Recycled
+    // ReserveInitialBlockAsTail   InUse     X       X        X        InUse
+    // Reserve                       X   InUse/Full  X        X          X
+    // MarkDone                      X     Done     Done      X          X
+    // MarkRecycled                  X       X       X      Recycled     X
+    // destructor                    ok      X       X        ok         ok
+
+    const char* StateString() const {
+      switch (mState) {
+        case State::Created:
+          return "Created";
+        case State::InUse:
+          return "InUse";
+        case State::Full:
+          return "Full";
+        case State::Done:
+          return "Done";
+        case State::Recycled:
+          return "Recycled";
+        default:
+          return "?";
+      }
+    }
+#else  // DEBUG
+    const char* StateString() const { return "(non-DEBUG)"; }
+#endif
+  };
+
+  InternalHeader mInternalHeader;
+
+  // KEEP THIS LAST!
+  // First byte of the buffer. Note that ProfileBufferChunk::Create allocates a
+  // bigger block, such that `mBuffer` is the first of `mBufferBytes` available
+  // bytes.
+  // The initialization is not strictly needed, because bytes should only be
+  // read after they have been written and `mOffsetPastLastBlock` has been
+  // updated. However:
+  // - Reviewbot complains that it's not initialized.
+  // - It's cheap to initialize one byte.
+  // - In the worst case (reading does happen), zero is not a valid entry size
+  //   and should get caught in entry readers.
+  Byte mBuffer = '\0';
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferChunk_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManager.h b/mozglue/baseprofiler/public/ProfileBufferChunkManager.h
new file mode 100644
index 0000000000..e7f12bf21f
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManager.h
@@ -0,0 +1,134 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManager_h
+#define ProfileBufferChunkManager_h
+
+#include "mozilla/ProfileBufferChunk.h"
+#include "mozilla/ScopeExit.h"
+
+#include <functional>
+
+namespace mozilla {
+
+// Manages the ProfileBufferChunks for this process.
+// The main user of this class is the buffer that needs chunks to store its
+// data.
+// The main ProfileBufferChunks responsibilities are:
+// - It can create new chunks, they are called "unreleased".
+// - Later these chunks are returned here, and become "released".
+// - The manager is free to destroy or recycle the oldest released chunks
+//   (usually to reclaim memory), and will inform the user through a provided
+//   callback.
+// - The user may access still-alive released chunks.
+class ProfileBufferChunkManager {
+ public:
+  virtual ~ProfileBufferChunkManager()
+#ifdef DEBUG
+  {
+    MOZ_ASSERT(!mUser, "Still registered when being destroyed");
+  }
+#else
+      = default;
+#endif
+
+  // Expected maximum size needed to store one stack sample.
+  // Most ChunkManager sub-classes will require chunk sizes, this can serve as
+  // a minimum recommendation to hold most backtraces.
+  constexpr static ProfileBufferChunk::Length scExpectedMaximumStackSize =
+      128 * 1024;
+
+  // Estimated maximum buffer size.
+  [[nodiscard]] virtual size_t MaxTotalSize() const = 0;
+
+  // Create or recycle a chunk right now. May return null in case of allocation
+  // failure.
+  // Note that the chunk-destroyed callback may be invoked during this call;
+  // user should be careful with reentrancy issues.
+  [[nodiscard]] virtual UniquePtr<ProfileBufferChunk> GetChunk() = 0;
+
+  // `aChunkReceiver` may be called with a new or recycled chunk, or nullptr.
+  // (See `FulfillChunkRequests()` regarding when the callback may happen.)
+  virtual void RequestChunk(
+      std::function<void(UniquePtr<ProfileBufferChunk>)>&& aChunkReceiver) = 0;
+
+  // This method may be invoked at any time on any thread (and not necessarily
+  // by the main user of this class), to do the work necessary to respond to a
+  // previous `RequestChunk()`.
+  // It is optional: If it is never called, or called too late, the user is
+  // responsible for directly calling `GetChunk()` when a chunk is really
+  // needed (or it should at least fail gracefully).
+  // The idea is to fulfill chunk request on a separate thread, and most
+  // importantly outside of profiler calls, to avoid doing expensive memory
+  // allocations during these calls.
+  virtual void FulfillChunkRequests() = 0;
+
+  // One chunk is released by the user, the ProfileBufferChunkManager should
+  // keep it as long as possible (depending on local or global memory/time
+  // limits). Note that the chunk-destroyed callback may be invoked during this
+  // call; user should be careful with reentrancy issues.
+  virtual void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) = 0;
+
+  // `aChunkDestroyedCallback` will be called whenever the contents of a
+  // previously-released chunk is about to be destroyed or recycled.
+  // Note that it may be called during other functions above, or at other times
+  // from the same or other threads; user should be careful with reentrancy
+  // issues.
+  virtual void SetChunkDestroyedCallback(
+      std::function<void(const ProfileBufferChunk&)>&&
+          aChunkDestroyedCallback) = 0;
+
+  // Give away all released chunks that have not yet been destroyed.
+  [[nodiscard]] virtual UniquePtr<ProfileBufferChunk>
+  GetExtantReleasedChunks() = 0;
+
+  // Let a callback see all released chunks that have not yet been destroyed, if
+  // any. Return whatever the callback returns.
+  template <typename Callback>
+  [[nodiscard]] auto PeekExtantReleasedChunks(Callback&& aCallback) {
+    const ProfileBufferChunk* chunks = PeekExtantReleasedChunksAndLock();
+    auto unlock =
+        MakeScopeExit([&]() { UnlockAfterPeekExtantReleasedChunks(); });
+    return std::forward<Callback>(aCallback)(chunks);
+  }
+
+  // Chunks that were still unreleased will never be released.
+  virtual void ForgetUnreleasedChunks() = 0;
+
+  [[nodiscard]] virtual size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf) const = 0;
+  [[nodiscard]] virtual size_t SizeOfIncludingThis(
+      MallocSizeOf aMallocSizeOf) const = 0;
+
+ protected:
+  // Derived classes to implement `PeekExtantReleasedChunks` through these:
+  virtual const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() = 0;
+  virtual void UnlockAfterPeekExtantReleasedChunks() = 0;
+
+#ifdef DEBUG
+ public:
+  // DEBUG checks ensuring that this manager and its users avoid UAFs.
+  // Derived classes should assert that mUser is not null in their functions.
+
+  void RegisteredWith(const void* aUser) {
+    MOZ_ASSERT(!mUser);
+    MOZ_ASSERT(aUser);
+    mUser = aUser;
+  }
+
+  void DeregisteredFrom(const void* aUser) {
+    MOZ_ASSERT(mUser == aUser);
+    mUser = nullptr;
+  }
+
+ protected:
+  const void* mUser = nullptr;
+#endif  // DEBUG
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferChunkManager_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h b/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h
new file mode 100644
index 0000000000..c91b38cbdb
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManagerSingle.h
@@ -0,0 +1,172 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManagerSingle_h
+#define ProfileBufferChunkManagerSingle_h
+
+#include "mozilla/ProfileBufferChunkManager.h"
+
+#ifdef DEBUG
+#  include "mozilla/Atomics.h"
+#endif  // DEBUG
+
+namespace mozilla {
+
+// Manages only one Chunk.
+// The first call to `Get`/`RequestChunk()` will retrieve the one chunk, and all
+// subsequent calls will return nullptr. That chunk may still be released, but
+// it will never be destroyed or recycled.
+// Unlike others, this manager may be `Reset()`, to allow another round of
+// small-data gathering.
+// The main use is with short-lived ProfileChunkedBuffers that collect little
+// data that can fit in one chunk, e.g., capturing one stack.
+// It is not thread-safe.
+class ProfileBufferChunkManagerSingle final : public ProfileBufferChunkManager {
+ public:
+  using Length = ProfileBufferChunk::Length;
+
+  // Use a preallocated chunk. (Accepting null to gracefully handle OOM.)
+  explicit ProfileBufferChunkManagerSingle(UniquePtr<ProfileBufferChunk> aChunk)
+      : mInitialChunk(std::move(aChunk)),
+        mBufferBytes(mInitialChunk ? mInitialChunk->BufferBytes() : 0) {
+    MOZ_ASSERT(!mInitialChunk || !mInitialChunk->GetNext(),
+               "Expected at most one chunk");
+  }
+
+  // ChunkMinBufferBytes: Minimum number of user-available bytes in the Chunk.
+  // Note that Chunks use a bit more memory for their header.
+  explicit ProfileBufferChunkManagerSingle(Length aChunkMinBufferBytes)
+      : mInitialChunk(ProfileBufferChunk::Create(aChunkMinBufferBytes)),
+        mBufferBytes(mInitialChunk ? mInitialChunk->BufferBytes() : 0) {}
+
+#ifdef DEBUG
+  ~ProfileBufferChunkManagerSingle() { MOZ_ASSERT(mVirtuallyLocked == false); }
+#endif  // DEBUG
+
+  // Reset this manager, using the provided chunk (probably coming from the
+  // ProfileChunkedBuffer that just used it); if null, fallback on current or
+  // released chunk.
+  void Reset(UniquePtr<ProfileBufferChunk> aPossibleChunk) {
+    if (aPossibleChunk) {
+      mInitialChunk = std::move(aPossibleChunk);
+      mReleasedChunk = nullptr;
+    } else if (!mInitialChunk) {
+      MOZ_ASSERT(!!mReleasedChunk, "Can't reset properly!");
+      mInitialChunk = std::move(mReleasedChunk);
+    }
+
+    if (mInitialChunk) {
+      mInitialChunk->MarkRecycled();
+      mBufferBytes = mInitialChunk->BufferBytes();
+    } else {
+      mBufferBytes = 0;
+    }
+  }
+
+  [[nodiscard]] size_t MaxTotalSize() const final { return mBufferBytes; }
+
+  // One of `GetChunk` and `RequestChunk` will only work the very first time (if
+  // there's even a chunk).
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetChunk() final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return std::move(mInitialChunk);
+  }
+
+  void RequestChunk(std::function<void(UniquePtr<ProfileBufferChunk>)>&&
+                        aChunkReceiver) final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    // Simple retrieval.
+    std::move(aChunkReceiver)(GetChunk());
+  }
+
+  void FulfillChunkRequests() final {
+    // Nothing to do here.
+  }
+
+  void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    if (!aChunk) {
+      return;
+    }
+    MOZ_ASSERT(!mReleasedChunk, "Unexpected 2nd released chunk");
+    MOZ_ASSERT(!aChunk->GetNext(), "Only expected one released chunk");
+    mReleasedChunk = std::move(aChunk);
+  }
+
+  void SetChunkDestroyedCallback(
+      std::function<void(const ProfileBufferChunk&)>&& aChunkDestroyedCallback)
+      final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    // The chunk-destroyed callback will never actually be called, but we keep
+    // the callback here in case the caller expects it to live as long as this
+    // manager.
+    mChunkDestroyedCallback = std::move(aChunkDestroyedCallback);
+  }
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetExtantReleasedChunks() final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return std::move(mReleasedChunk);
+  }
+
+  void ForgetUnreleasedChunks() final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf) const final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    size_t size = 0;
+    if (mInitialChunk) {
+      size += mInitialChunk->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    if (mReleasedChunk) {
+      size += mReleasedChunk->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    // Note: Missing size of std::function external resources (if any).
+    return size;
+  }
+
+  [[nodiscard]] size_t SizeOfIncludingThis(
+      MallocSizeOf aMallocSizeOf) const final {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
+  }
+
+ protected:
+  // This manager is not thread-safe, so there's not actual locking needed.
+  const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() final {
+    MOZ_ASSERT(mVirtuallyLocked.compareExchange(false, true));
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return mReleasedChunk.get();
+  }
+  void UnlockAfterPeekExtantReleasedChunks() final {
+    MOZ_ASSERT(mVirtuallyLocked.compareExchange(true, false));
+  }
+
+ private:
+  // Initial chunk created with this manager, given away at first Get/Request.
+  UniquePtr<ProfileBufferChunk> mInitialChunk;
+
+  // Storage for the released chunk (which should probably not happen, as it
+  // means the chunk is full).
+  UniquePtr<ProfileBufferChunk> mReleasedChunk;
+
+  // Size of the one chunk we're managing. Stored here, because the chunk may
+  // be moved out and inaccessible from here.
+  Length mBufferBytes;
+
+  // The chunk-destroyed callback will never actually be called, but we keep it
+  // here in case the caller expects it to live as long as this manager.
+  std::function<void(const ProfileBufferChunk&)> mChunkDestroyedCallback;
+
+#ifdef DEBUG
+  mutable Atomic<bool> mVirtuallyLocked{false};
+#endif  // DEBUG
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferChunkManagerSingle_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h b/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h
new file mode 100644
index 0000000000..5b1af6d66c
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferChunkManagerWithLocalLimit.h
@@ -0,0 +1,428 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferChunkManagerWithLocalLimit_h
+#define ProfileBufferChunkManagerWithLocalLimit_h
+
+#include "BaseProfiler.h"
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/ProfileBufferChunkManager.h"
+#include "mozilla/ProfileBufferControlledChunkManager.h"
+
+#include <utility>
+
+namespace mozilla {
+
+// Manages the Chunks for this process in a thread-safe manner, with a maximum
+// size per process.
+//
+// "Unreleased" chunks are not owned here, only "released" chunks can be
+// destroyed or recycled when reaching the memory limit, so it is theoretically
+// possible to break that limit, if:
+// - The user of this class doesn't release their chunks, AND/OR
+// - The limit is too small (e.g., smaller than 2 or 3 chunks, which should be
+//   the usual number of unreleased chunks in flight).
+// In this case, it just means that we will use more memory than allowed,
+// potentially risking OOMs. Hopefully this shouldn't happen in real code,
+// assuming that the user is doing the right thing and releasing chunks ASAP,
+// and that the memory limit is reasonably large.
+class ProfileBufferChunkManagerWithLocalLimit final
+    : public ProfileBufferChunkManager,
+      public ProfileBufferControlledChunkManager {
+ public:
+  using Length = ProfileBufferChunk::Length;
+
+  // MaxTotalBytes: Maximum number of bytes allocated in all local Chunks.
+  // ChunkMinBufferBytes: Minimum number of user-available bytes in each Chunk.
+  // Note that Chunks use a bit more memory for their header.
+  explicit ProfileBufferChunkManagerWithLocalLimit(size_t aMaxTotalBytes,
+                                                   Length aChunkMinBufferBytes)
+      : mMaxTotalBytes(aMaxTotalBytes),
+        mChunkMinBufferBytes(aChunkMinBufferBytes) {}
+
+  ~ProfileBufferChunkManagerWithLocalLimit() {
+    if (mUpdateCallback) {
+      // Signal the end of this callback.
+      std::move(mUpdateCallback)(Update(nullptr));
+    }
+  }
+
+  [[nodiscard]] size_t MaxTotalSize() const final {
+    // `mMaxTotalBytes` is `const` so there is no need to lock the mutex.
+    return mMaxTotalBytes;
+  }
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetChunk() final {
+    AUTO_PROFILER_STATS(Local_GetChunk);
+
+    ChunkAndUpdate chunkAndUpdate = [&]() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      return GetChunk(lock);
+    }();
+
+    baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+    if (mUpdateCallback && !chunkAndUpdate.second.IsNotUpdate()) {
+      mUpdateCallback(std::move(chunkAndUpdate.second));
+    }
+
+    return std::move(chunkAndUpdate.first);
+  }
+
+  void RequestChunk(std::function<void(UniquePtr<ProfileBufferChunk>)>&&
+                        aChunkReceiver) final {
+    AUTO_PROFILER_STATS(Local_RequestChunk);
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    if (mChunkReceiver) {
+      // We already have a chunk receiver, meaning a request is pending.
+      return;
+    }
+    // Store the chunk receiver. This indicates that a request is pending, and
+    // it will be handled in the next `FulfillChunkRequests()` call.
+    mChunkReceiver = std::move(aChunkReceiver);
+  }
+
+  void FulfillChunkRequests() final {
+    AUTO_PROFILER_STATS(Local_FulfillChunkRequests);
+    std::function<void(UniquePtr<ProfileBufferChunk>)> chunkReceiver;
+    ChunkAndUpdate chunkAndUpdate = [&]() -> ChunkAndUpdate {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      if (!mChunkReceiver) {
+        // No receiver means no pending request, we're done.
+        return {};
+      }
+      // Otherwise there is a request, extract the receiver to call below.
+      std::swap(chunkReceiver, mChunkReceiver);
+      MOZ_ASSERT(!mChunkReceiver, "mChunkReceiver should have been emptied");
+      // And allocate the requested chunk. This may fail, it's fine, we're
+      // letting the receiver know about it.
+      AUTO_PROFILER_STATS(Local_FulfillChunkRequests_GetChunk);
+      return GetChunk(lock);
+    }();
+
+    if (chunkReceiver) {
+      {
+        baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+        if (mUpdateCallback && !chunkAndUpdate.second.IsNotUpdate()) {
+          mUpdateCallback(std::move(chunkAndUpdate.second));
+        }
+      }
+
+      // Invoke callback outside of lock, so that it can use other chunk manager
+      // functions if needed.
+      // Note that this means there could be a race, where another request
+      // happens now and even gets fulfilled before this one is! It should be
+      // rare, and shouldn't be a problem anyway, the user will still get their
+      // requested chunks, new/recycled chunks look the same so their order
+      // doesn't matter.
+      std::move(chunkReceiver)(std::move(chunkAndUpdate.first));
+    }
+  }
+
+  void ReleaseChunk(UniquePtr<ProfileBufferChunk> aChunk) final {
+    if (!aChunk) {
+      return;
+    }
+
+    MOZ_RELEASE_ASSERT(!aChunk->GetNext(), "ReleaseChunk only accepts 1 chunk");
+    MOZ_RELEASE_ASSERT(!aChunk->ChunkHeader().mDoneTimeStamp.IsNull(),
+                       "Released chunk should have a 'Done' timestamp");
+
+    Update update = [&]() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      MOZ_ASSERT(mUser, "Not registered yet");
+      // Keep a pointer to the first newly-released chunk, so we can use it to
+      // prepare an update (after `aChunk` is moved-from).
+      const ProfileBufferChunk* const newlyReleasedChunk = aChunk.get();
+      // Transfer the chunk size from the unreleased bucket to the released one.
+      mUnreleasedBufferBytes -= aChunk->BufferBytes();
+      mReleasedBufferBytes += aChunk->BufferBytes();
+      if (!mReleasedChunks) {
+        // No other released chunks at the moment, we're starting the list.
+        MOZ_ASSERT(mReleasedBufferBytes == aChunk->BufferBytes());
+        mReleasedChunks = std::move(aChunk);
+      } else {
+        // Insert aChunk in mReleasedChunks to keep done-timestamp order.
+        const TimeStamp& releasedChunkDoneTimeStamp =
+            aChunk->ChunkHeader().mDoneTimeStamp;
+        if (releasedChunkDoneTimeStamp <
+            mReleasedChunks->ChunkHeader().mDoneTimeStamp) {
+          // aChunk is the oldest -> Insert at the beginning.
+          aChunk->SetLast(std::move(mReleasedChunks));
+          mReleasedChunks = std::move(aChunk);
+        } else {
+          // Go through the already-released chunk list, and insert aChunk
+          // before the first younger released chunk, or at the end.
+          ProfileBufferChunk* chunk = mReleasedChunks.get();
+          for (;;) {
+            ProfileBufferChunk* const nextChunk = chunk->GetNext();
+            if (!nextChunk || releasedChunkDoneTimeStamp <
+                                  nextChunk->ChunkHeader().mDoneTimeStamp) {
+              // Either we're at the last released chunk, or the next released
+              // chunk is younger -> Insert right after this released chunk.
+              chunk->InsertNext(std::move(aChunk));
+              break;
+            }
+            chunk = nextChunk;
+          }
+        }
+      }
+
+      return Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+                    mReleasedChunks.get(), newlyReleasedChunk);
+    }();
+
+    baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+    if (mUpdateCallback && !update.IsNotUpdate()) {
+      mUpdateCallback(std::move(update));
+    }
+  }
+
+  void SetChunkDestroyedCallback(
+      std::function<void(const ProfileBufferChunk&)>&& aChunkDestroyedCallback)
+      final {
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    MOZ_ASSERT(mUser, "Not registered yet");
+    mChunkDestroyedCallback = std::move(aChunkDestroyedCallback);
+  }
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetExtantReleasedChunks() final {
+    UniquePtr<ProfileBufferChunk> chunks;
+    size_t unreleasedBufferBytes = [&]() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      MOZ_ASSERT(mUser, "Not registered yet");
+      mReleasedBufferBytes = 0;
+      chunks = std::move(mReleasedChunks);
+      return mUnreleasedBufferBytes;
+    }();
+
+    baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+    if (mUpdateCallback) {
+      mUpdateCallback(Update(unreleasedBufferBytes, 0, nullptr, nullptr));
+    }
+
+    return chunks;
+  }
+
+  void ForgetUnreleasedChunks() final {
+    Update update = [&]() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+      MOZ_ASSERT(mUser, "Not registered yet");
+      mUnreleasedBufferBytes = 0;
+      return Update(0, mReleasedBufferBytes, mReleasedChunks.get(), nullptr);
+    }();
+    baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+    if (mUpdateCallback) {
+      mUpdateCallback(std::move(update));
+    }
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf) const final {
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    return SizeOfExcludingThis(aMallocSizeOf, lock);
+  }
+
+  [[nodiscard]] size_t SizeOfIncludingThis(
+      MallocSizeOf aMallocSizeOf) const final {
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf, lock);
+  }
+
+  void SetUpdateCallback(UpdateCallback&& aUpdateCallback) final {
+    {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+      if (mUpdateCallback) {
+        // Signal the end of the previous callback.
+        std::move(mUpdateCallback)(Update(nullptr));
+        mUpdateCallback = nullptr;
+      }
+    }
+
+    if (aUpdateCallback) {
+      Update initialUpdate = [&]() {
+        baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+        return Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+                      mReleasedChunks.get(), nullptr);
+      }();
+
+      baseprofiler::detail::BaseProfilerAutoLock lock(mUpdateCallbackMutex);
+      MOZ_ASSERT(!mUpdateCallback, "Only one update callback allowed");
+      mUpdateCallback = std::move(aUpdateCallback);
+      mUpdateCallback(std::move(initialUpdate));
+    }
+  }
+
+  void DestroyChunksAtOrBefore(TimeStamp aDoneTimeStamp) final {
+    MOZ_ASSERT(!aDoneTimeStamp.IsNull());
+    baseprofiler::detail::BaseProfilerAutoLock lock(mMutex);
+    for (;;) {
+      if (!mReleasedChunks) {
+        // We don't own any released chunks (anymore), we're done.
+        break;
+      }
+      if (mReleasedChunks->ChunkHeader().mDoneTimeStamp > aDoneTimeStamp) {
+        // The current chunk is strictly after the given timestamp, we're done.
+        break;
+      }
+      // We've found a chunk at or before the timestamp, discard it.
+      DiscardOldestReleasedChunk(lock);
+    }
+  }
+
+ protected:
+  const ProfileBufferChunk* PeekExtantReleasedChunksAndLock() final {
+    mMutex.Lock();
+    MOZ_ASSERT(mUser, "Not registered yet");
+    return mReleasedChunks.get();
+  }
+  void UnlockAfterPeekExtantReleasedChunks() final { mMutex.Unlock(); }
+
+ private:
+  void MaybeRecycleChunk(
+      UniquePtr<ProfileBufferChunk>&& chunk,
+      const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+    // Try to recycle big-enough chunks. (All chunks should have the same size,
+    // but it's a cheap test and may allow future adjustments based on actual
+    // data rate.)
+    if (chunk->BufferBytes() >= mChunkMinBufferBytes) {
+      // We keep up to two recycled chunks at any time.
+      if (!mRecycledChunks) {
+        mRecycledChunks = std::move(chunk);
+      } else if (!mRecycledChunks->GetNext()) {
+        mRecycledChunks->InsertNext(std::move(chunk));
+      }
+    }
+  }
+
+  UniquePtr<ProfileBufferChunk> TakeRecycledChunk(
+      const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+    UniquePtr<ProfileBufferChunk> recycled;
+    if (mRecycledChunks) {
+      recycled = std::exchange(mRecycledChunks, mRecycledChunks->ReleaseNext());
+      recycled->MarkRecycled();
+    }
+    return recycled;
+  }
+
+  void DiscardOldestReleasedChunk(
+      const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+    MOZ_ASSERT(!!mReleasedChunks);
+    UniquePtr<ProfileBufferChunk> oldest =
+        std::exchange(mReleasedChunks, mReleasedChunks->ReleaseNext());
+    mReleasedBufferBytes -= oldest->BufferBytes();
+    if (mChunkDestroyedCallback) {
+      // Inform the user that we're going to destroy this chunk.
+      mChunkDestroyedCallback(*oldest);
+    }
+    MaybeRecycleChunk(std::move(oldest), aLock);
+  }
+
+  using ChunkAndUpdate = std::pair<UniquePtr<ProfileBufferChunk>, Update>;
+  [[nodiscard]] ChunkAndUpdate GetChunk(
+      const baseprofiler::detail::BaseProfilerAutoLock& aLock) {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    // After this function, the total memory consumption will be the sum of:
+    // - Bytes from released (i.e., full) chunks,
+    // - Bytes from unreleased (still in use) chunks,
+    // - Bytes from the chunk we want to create/recycle. (Note that we don't
+    //   count the extra bytes of chunk header, and of extra allocation ability,
+    //   for the new chunk, as it's assumed to be negligible compared to the
+    //   total memory limit.)
+    // If this total is higher than the local limit, we'll want to destroy
+    // the oldest released chunks until we're under the limit; if any, we may
+    // recycle one of them to avoid a deallocation followed by an allocation.
+    while (mReleasedBufferBytes + mUnreleasedBufferBytes +
+                   mChunkMinBufferBytes >=
+               mMaxTotalBytes &&
+           !!mReleasedChunks) {
+      // We have reached the local limit, discard the oldest released chunk.
+      DiscardOldestReleasedChunk(aLock);
+    }
+
+    // Extract the recycled chunk, if any.
+    ChunkAndUpdate chunkAndUpdate{TakeRecycledChunk(aLock), Update()};
+    UniquePtr<ProfileBufferChunk>& chunk = chunkAndUpdate.first;
+
+    if (!chunk) {
+      // No recycled chunk -> Create a chunk now. (This could still fail.)
+      chunk = ProfileBufferChunk::Create(mChunkMinBufferBytes);
+    }
+
+    if (chunk) {
+      // We do have a chunk (recycled or new), record its size as "unreleased".
+      mUnreleasedBufferBytes += chunk->BufferBytes();
+
+      chunkAndUpdate.second =
+          Update(mUnreleasedBufferBytes, mReleasedBufferBytes,
+                 mReleasedChunks.get(), nullptr);
+    }
+
+    return chunkAndUpdate;
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf,
+      const baseprofiler::detail::BaseProfilerAutoLock&) const {
+    MOZ_ASSERT(mUser, "Not registered yet");
+    size_t size = 0;
+    if (mReleasedChunks) {
+      size += mReleasedChunks->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    if (mRecycledChunks) {
+      size += mRecycledChunks->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    // Note: Missing size of std::function external resources (if any).
+    return size;
+  }
+
+  // Maxumum number of bytes that should be used by all unreleased and released
+  // chunks. Note that only released chunks can be destroyed here, so it is the
+  // responsibility of the user to properly release their chunks when possible.
+  const size_t mMaxTotalBytes;
+
+  // Minimum number of bytes that new chunks should be able to store.
+  // Used when calling `ProfileBufferChunk::Create()`.
+  const Length mChunkMinBufferBytes;
+
+  // Mutex guarding the following members.
+  mutable baseprofiler::detail::BaseProfilerMutex mMutex;
+
+  // Number of bytes currently held in chunks that have been given away (through
+  // `GetChunk` or `RequestChunk`) and not released yet.
+  size_t mUnreleasedBufferBytes = 0;
+
+  // Number of bytes currently held in chunks that have been released and stored
+  // in `mReleasedChunks` below.
+  size_t mReleasedBufferBytes = 0;
+
+  // List of all released chunks. The oldest one should be at the start of the
+  // list, and may be destroyed or recycled when the memory limit is reached.
+  UniquePtr<ProfileBufferChunk> mReleasedChunks;
+
+  // This may hold chunks that were released then slated for destruction, they
+  // will be reused next time an allocation would have been needed.
+  UniquePtr<ProfileBufferChunk> mRecycledChunks;
+
+  // Optional callback used to notify the user when a chunk is about to be
+  // destroyed or recycled. (The data content is always destroyed, but the chunk
+  // container may be reused.)
+  std::function<void(const ProfileBufferChunk&)> mChunkDestroyedCallback;
+
+  // Callback set from `RequestChunk()`, until it is serviced in
+  // `FulfillChunkRequests()`. There can only be one request in flight.
+  std::function<void(UniquePtr<ProfileBufferChunk>)> mChunkReceiver;
+
+  // Separate mutex guarding mUpdateCallback, so that it may be invoked outside
+  // of the main buffer `mMutex`.
+  mutable baseprofiler::detail::BaseProfilerMutex mUpdateCallbackMutex;
+
+  UpdateCallback mUpdateCallback;
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferChunkManagerWithLocalLimit_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h b/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h
new file mode 100644
index 0000000000..45b39b163c
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferControlledChunkManager.h
@@ -0,0 +1,203 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferControlledChunkManager_h
+#define ProfileBufferControlledChunkManager_h
+
+#include "mozilla/ProfileBufferChunk.h"
+
+#include <functional>
+#include <vector>
+
+namespace mozilla {
+
+// A "Controlled" chunk manager will provide updates about chunks that it
+// creates, releases, and destroys; and it can destroy released chunks as
+// requested.
+class ProfileBufferControlledChunkManager {
+ public:
+  using Length = ProfileBufferChunk::Length;
+
+  virtual ~ProfileBufferControlledChunkManager() = default;
+
+  // Minimum amount of chunk metadata to be transferred between processes.
+  struct ChunkMetadata {
+    // Timestamp when chunk was marked "done", which is used to:
+    // - determine its age, so the oldest one will be destroyed first,
+    // - uniquely identify this chunk in this process. (The parent process is
+    //   responsible for associating this timestamp to its process id.)
+    TimeStamp mDoneTimeStamp;
+    // Size of this chunk's buffer.
+    Length mBufferBytes;
+
+    ChunkMetadata(TimeStamp aDoneTimeStamp, Length aBufferBytes)
+        : mDoneTimeStamp(aDoneTimeStamp), mBufferBytes(aBufferBytes) {}
+  };
+
+  // Class collecting all information necessary to describe updates that
+  // happened in a chunk manager.
+  // An update can be folded into a previous update.
+  class Update {
+   public:
+    // Construct a "not-an-Update" object, which should only be used after a
+    // real update is folded into it.
+    Update() = default;
+
+    // Construct a "final" Update, which marks the end of all updates from a
+    // chunk manager.
+    explicit Update(decltype(nullptr)) : mUnreleasedBytes(FINAL) {}
+
+    // Construct an Update from the given data and released chunks.
+    // The chunk pointers may be null, and it doesn't matter if
+    // `aNewlyReleasedChunks` is already linked to `aExistingReleasedChunks` or
+    // not.
+    Update(size_t aUnreleasedBytes, size_t aReleasedBytes,
+           const ProfileBufferChunk* aExistingReleasedChunks,
+           const ProfileBufferChunk* aNewlyReleasedChunks)
+        : mUnreleasedBytes(aUnreleasedBytes),
+          mReleasedBytes(aReleasedBytes),
+          mOldestDoneTimeStamp(
+              aExistingReleasedChunks
+                  ? aExistingReleasedChunks->ChunkHeader().mDoneTimeStamp
+                  : TimeStamp{}) {
+      MOZ_RELEASE_ASSERT(
+          !IsNotUpdate(),
+          "Empty update should only be constructed with default constructor");
+      MOZ_RELEASE_ASSERT(
+          !IsFinal(),
+          "Final update should only be constructed with nullptr constructor");
+      for (const ProfileBufferChunk* chunk = aNewlyReleasedChunks; chunk;
+           chunk = chunk->GetNext()) {
+        mNewlyReleasedChunks.emplace_back(ChunkMetadata{
+            chunk->ChunkHeader().mDoneTimeStamp, chunk->BufferBytes()});
+      }
+    }
+
+    // Construct an Update from raw data.
+    // This may be used to re-construct an Update that was previously
+    // serialized.
+    Update(size_t aUnreleasedBytes, size_t aReleasedBytes,
+           TimeStamp aOldestDoneTimeStamp,
+           std::vector<ChunkMetadata>&& aNewlyReleasedChunks)
+        : mUnreleasedBytes(aUnreleasedBytes),
+          mReleasedBytes(aReleasedBytes),
+          mOldestDoneTimeStamp(aOldestDoneTimeStamp),
+          mNewlyReleasedChunks(std::move(aNewlyReleasedChunks)) {}
+
+    // Clear the Update completely and return it to a "not-an-Update" state.
+    void Clear() {
+      mUnreleasedBytes = NO_UPDATE;
+      mReleasedBytes = 0;
+      mOldestDoneTimeStamp = TimeStamp{};
+      mNewlyReleasedChunks.clear();
+    }
+
+    bool IsNotUpdate() const { return mUnreleasedBytes == NO_UPDATE; }
+
+    bool IsFinal() const { return mUnreleasedBytes == FINAL; }
+
+    size_t UnreleasedBytes() const {
+      MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+                         "Cannot access UnreleasedBytes from empty update");
+      MOZ_RELEASE_ASSERT(!IsFinal(),
+                         "Cannot access UnreleasedBytes from final update");
+      return mUnreleasedBytes;
+    }
+
+    size_t ReleasedBytes() const {
+      MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+                         "Cannot access ReleasedBytes from empty update");
+      MOZ_RELEASE_ASSERT(!IsFinal(),
+                         "Cannot access ReleasedBytes from final update");
+      return mReleasedBytes;
+    }
+
+    TimeStamp OldestDoneTimeStamp() const {
+      MOZ_RELEASE_ASSERT(!IsNotUpdate(),
+                         "Cannot access OldestDoneTimeStamp from empty update");
+      MOZ_RELEASE_ASSERT(!IsFinal(),
+                         "Cannot access OldestDoneTimeStamp from final update");
+      return mOldestDoneTimeStamp;
+    }
+
+    const std::vector<ChunkMetadata>& NewlyReleasedChunksRef() const {
+      MOZ_RELEASE_ASSERT(
+          !IsNotUpdate(),
+          "Cannot access NewlyReleasedChunksRef from empty update");
+      MOZ_RELEASE_ASSERT(
+          !IsFinal(), "Cannot access NewlyReleasedChunksRef from final update");
+      return mNewlyReleasedChunks;
+    }
+
+    // Fold a later update into this one.
+    void Fold(Update&& aNewUpdate) {
+      MOZ_ASSERT(
+          !IsFinal() || aNewUpdate.IsFinal(),
+          "There shouldn't be another non-final update after the final update");
+
+      if (IsNotUpdate() || aNewUpdate.IsFinal()) {
+        // We were empty, or the new update is the final update, we just switch
+        // to that new update.
+        *this = std::move(aNewUpdate);
+        return;
+      }
+
+      mUnreleasedBytes = aNewUpdate.mUnreleasedBytes;
+      mReleasedBytes = aNewUpdate.mReleasedBytes;
+      if (!aNewUpdate.mOldestDoneTimeStamp.IsNull()) {
+        MOZ_ASSERT(mOldestDoneTimeStamp.IsNull() ||
+                   mOldestDoneTimeStamp <= aNewUpdate.mOldestDoneTimeStamp);
+        mOldestDoneTimeStamp = aNewUpdate.mOldestDoneTimeStamp;
+        auto it = mNewlyReleasedChunks.begin();
+        while (it != mNewlyReleasedChunks.end() &&
+               it->mDoneTimeStamp < mOldestDoneTimeStamp) {
+          it = mNewlyReleasedChunks.erase(it);
+        }
+      }
+      if (!aNewUpdate.mNewlyReleasedChunks.empty()) {
+        mNewlyReleasedChunks.reserve(mNewlyReleasedChunks.size() +
+                                     aNewUpdate.mNewlyReleasedChunks.size());
+        mNewlyReleasedChunks.insert(mNewlyReleasedChunks.end(),
+                                    aNewUpdate.mNewlyReleasedChunks.begin(),
+                                    aNewUpdate.mNewlyReleasedChunks.end());
+      }
+    }
+
+   private:
+    static const size_t NO_UPDATE = size_t(-1);
+    static const size_t FINAL = size_t(-2);
+
+    size_t mUnreleasedBytes = NO_UPDATE;
+    size_t mReleasedBytes = 0;
+    TimeStamp mOldestDoneTimeStamp;
+    std::vector<ChunkMetadata> mNewlyReleasedChunks;
+  };
+
+  using UpdateCallback = std::function<void(Update&&)>;
+
+  // This *may* be set (or reset) by an object that needs to know about all
+  // chunk updates that happen in this manager. The main use will be to
+  // coordinate the global memory usage of Firefox.
+  // If a non-empty callback is given, it will be immediately invoked with the
+  // current state.
+  // When the callback is about to be destroyed (by overwriting it here, or in
+  // the class destructor), it will be invoked one last time with an empty
+  // update.
+  // Note that the callback (even the first current-state callback) will be
+  // invoked from inside a locked scope, so it should *not* call other functions
+  // of the chunk manager. A side benefit of this locking is that it guarantees
+  // that no two invocations can overlap.
+  virtual void SetUpdateCallback(UpdateCallback&& aUpdateCallback) = 0;
+
+  // This is a request to destroy all chunks before the given timestamp.
+  // This timestamp should be one that was given in a previous UpdateCallback
+  // call. Obviously, only released chunks can be destroyed.
+  virtual void DestroyChunksAtOrBefore(TimeStamp aDoneTimeStamp) = 0;
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferControlledChunkManager_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h b/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h
new file mode 100644
index 0000000000..c8280a92d7
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferEntryKinds.h
@@ -0,0 +1,94 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntryKinds_h
+#define ProfileBufferEntryKinds_h
+
+#include <cstdint>
+
+namespace mozilla {
+
+// This is equal to sizeof(double), which is the largest non-char variant in
+// |u|.
+static constexpr size_t ProfileBufferEntryNumChars = 8;
+
+// NOTE!  If you add entries, you need to verify if they need to be added to the
+// switch statement in DuplicateLastSample!
+// This will evaluate the MACRO with (KIND, TYPE, SIZE)
+#define FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(MACRO)                 \
+  MACRO(CategoryPair, int, sizeof(int))                           \
+  MACRO(CollectionStart, double, sizeof(double))                  \
+  MACRO(CollectionEnd, double, sizeof(double))                    \
+  MACRO(Label, const char*, sizeof(const char*))                  \
+  MACRO(FrameFlags, uint64_t, sizeof(uint64_t))                   \
+  MACRO(DynamicStringFragment, char*, ProfileBufferEntryNumChars) \
+  MACRO(JitReturnAddr, void*, sizeof(void*))                      \
+  MACRO(InnerWindowID, uint64_t, sizeof(uint64_t))                \
+  MACRO(LineNumber, int, sizeof(int))                             \
+  MACRO(ColumnNumber, int, sizeof(int))                           \
+  MACRO(NativeLeafAddr, void*, sizeof(void*))                     \
+  MACRO(Pause, double, sizeof(double))                            \
+  MACRO(Resume, double, sizeof(double))                           \
+  MACRO(PauseSampling, double, sizeof(double))                    \
+  MACRO(ResumeSampling, double, sizeof(double))                   \
+  MACRO(Responsiveness, double, sizeof(double))                   \
+  MACRO(ThreadId, int, sizeof(int))                               \
+  MACRO(Time, double, sizeof(double))                             \
+  MACRO(TimeBeforeCompactStack, double, sizeof(double))           \
+  MACRO(CounterId, void*, sizeof(void*))                          \
+  MACRO(CounterKey, uint64_t, sizeof(uint64_t))                   \
+  MACRO(Number, uint64_t, sizeof(uint64_t))                       \
+  MACRO(Count, int64_t, sizeof(int64_t))                          \
+  MACRO(ProfilerOverheadTime, double, sizeof(double))             \
+  MACRO(ProfilerOverheadDuration, double, sizeof(double))
+
+// The `Kind` is a single byte identifying the type of data that is actually
+// stored in a `ProfileBufferEntry`, as per the list in
+// `FOR_EACH_PROFILE_BUFFER_ENTRY_KIND`.
+//
+// This byte is also used to identify entries in ProfileChunkedBuffer blocks,
+// for both "legacy" entries that do contain a `ProfileBufferEntry`, and for
+// new types of entries that may carry more data of different types.
+// TODO: Eventually each type of "legacy" entry should be replaced with newer,
+// more efficient kinds of entries (e.g., stack frames could be stored in one
+// bigger entry, instead of multiple `ProfileBufferEntry`s); then we could
+// discard `ProfileBufferEntry` and move this enum to a more appropriate spot.
+using ProfileBufferEntryKindUnderlyingType = uint8_t;
+
+enum class ProfileBufferEntryKind : ProfileBufferEntryKindUnderlyingType {
+  INVALID = 0,
+#define KIND(KIND, TYPE, SIZE) KIND,
+  FOR_EACH_PROFILE_BUFFER_ENTRY_KIND(KIND)
+#undef KIND
+
+  // Any value under `LEGACY_LIMIT` represents a `ProfileBufferEntry`.
+  LEGACY_LIMIT,
+
+  // Any value starting here does *not* represent a `ProfileBufferEntry` and
+  // requires separate decoding and handling.
+
+  // Markers and their data.
+  Marker = LEGACY_LIMIT,
+
+  // Entry with "running times", such as CPU usage measurements.
+  // Optional between TimeBeforeCompactStack and CompactStack.
+  RunningTimes,
+
+  // Optional between TimeBeforeCompactStack and CompactStack.
+  UnresponsiveDurationMs,
+
+  // Collection of legacy stack entries, must follow a ThreadId and
+  // TimeBeforeCompactStack (which are not included in the CompactStack;
+  // TimeBeforeCompactStack is equivalent to Time, but indicates that a
+  // CompactStack follows shortly afterwards).
+  CompactStack,
+
+  MODERN_LIMIT
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferEntryKinds_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h b/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h
new file mode 100644
index 0000000000..267b99f10d
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferEntrySerialization.h
@@ -0,0 +1,1166 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferEntrySerialization_h
+#define ProfileBufferEntrySerialization_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/leb128iterator.h"
+#include "mozilla/Likely.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ProfileBufferIndex.h"
+#include "mozilla/Span.h"
+#include "mozilla/Tuple.h"
+#include "mozilla/UniquePtrExtensions.h"
+#include "mozilla/Unused.h"
+#include "mozilla/Variant.h"
+
+#include <string>
+#include <tuple>
+
+namespace mozilla {
+
+class ProfileBufferEntryWriter;
+
+// Iterator-like class used to read from an entry.
+// An entry may be split in two memory segments (e.g., the ends of a ring
+// buffer, or two chunks of a chunked buffer); it doesn't deal with this
+// underlying buffer, but only with one or two spans pointing at the space
+// where the entry lives.
+class ProfileBufferEntryReader {
+ public:
+  using Byte = uint8_t;
+  using Length = uint32_t;
+
+  using SpanOfConstBytes = Span<const Byte>;
+
+  // Class to be specialized for types to be read from a profile buffer entry.
+  // See common specializations at the bottom of this header.
+  // The following static functions must be provided:
+  //   static void ReadInto(EntryReader aER&, T& aT)
+  //   {
+  //     /* Call `aER.ReadX(...)` function to deserialize into aT, be sure to
+  //        read exactly `Bytes(aT)`! */
+  //   }
+  //   static T Read(EntryReader& aER) {
+  //     /* Call `aER.ReadX(...)` function to deserialize and return a `T`, be
+  //        sure to read exactly `Bytes(returned value)`! */
+  //   }
+  template <typename T>
+  struct Deserializer;
+
+  ProfileBufferEntryReader() = default;
+
+  // Reader over one Span.
+  ProfileBufferEntryReader(SpanOfConstBytes aSpan,
+                           ProfileBufferBlockIndex aCurrentBlockIndex,
+                           ProfileBufferBlockIndex aNextBlockIndex)
+      : mCurrentSpan(aSpan),
+        mNextSpanOrEmpty(aSpan.Last(0)),
+        mCurrentBlockIndex(aCurrentBlockIndex),
+        mNextBlockIndex(aNextBlockIndex) {
+    // 2nd internal Span points at the end of the 1st internal Span, to enforce
+    // invariants.
+    CheckInvariants();
+  }
+
+  // Reader over two Spans, the second one must not be empty.
+  ProfileBufferEntryReader(SpanOfConstBytes aSpanHead,
+                           SpanOfConstBytes aSpanTail,
+                           ProfileBufferBlockIndex aCurrentBlockIndex,
+                           ProfileBufferBlockIndex aNextBlockIndex)
+      : mCurrentSpan(aSpanHead),
+        mNextSpanOrEmpty(aSpanTail),
+        mCurrentBlockIndex(aCurrentBlockIndex),
+        mNextBlockIndex(aNextBlockIndex) {
+    MOZ_RELEASE_ASSERT(!mNextSpanOrEmpty.IsEmpty());
+    if (MOZ_UNLIKELY(mCurrentSpan.IsEmpty())) {
+      // First span is already empty, skip it.
+      mCurrentSpan = mNextSpanOrEmpty;
+      mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+    }
+    CheckInvariants();
+  }
+
+  // Allow copying, which is needed when used as an iterator in some std
+  // functions (e.g., string assignment), and to occasionally backtrack.
+  // Be aware that the main profile buffer APIs give a reference to an entry
+  // reader, and expect that reader to advance to the end of the entry, so don't
+  // just advance copies!
+  ProfileBufferEntryReader(const ProfileBufferEntryReader&) = default;
+  ProfileBufferEntryReader& operator=(const ProfileBufferEntryReader&) =
+      default;
+
+  // Don't =default moving, as it doesn't bring any benefit in this class.
+
+  [[nodiscard]] Length RemainingBytes() const {
+    return mCurrentSpan.LengthBytes() + mNextSpanOrEmpty.LengthBytes();
+  }
+
+  void SetRemainingBytes(Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (aBytes <= mCurrentSpan.LengthBytes()) {
+      mCurrentSpan = mCurrentSpan.First(aBytes);
+      mNextSpanOrEmpty = mCurrentSpan.Last(0);
+    } else {
+      mNextSpanOrEmpty =
+          mNextSpanOrEmpty.First(aBytes - mCurrentSpan.LengthBytes());
+    }
+  }
+
+  [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+    return mCurrentBlockIndex;
+  }
+
+  [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+    return mNextBlockIndex;
+  }
+
+  // Create a reader of size zero, pointing at aOffset past the current position
+  // of this Reader, so it can be used as end iterator.
+  [[nodiscard]] ProfileBufferEntryReader EmptyIteratorAtOffset(
+      Length aOffset) const {
+    MOZ_RELEASE_ASSERT(aOffset <= RemainingBytes());
+    if (MOZ_LIKELY(aOffset < mCurrentSpan.LengthBytes())) {
+      // aOffset is before the end of mCurrentSpan.
+      return ProfileBufferEntryReader(mCurrentSpan.Subspan(aOffset, 0),
+                                      mCurrentBlockIndex, mNextBlockIndex);
+    }
+    // aOffset is right at the end of mCurrentSpan, or inside mNextSpanOrEmpty.
+    return ProfileBufferEntryReader(
+        mNextSpanOrEmpty.Subspan(aOffset - mCurrentSpan.LengthBytes(), 0),
+        mCurrentBlockIndex, mNextBlockIndex);
+  }
+
+  // Be like a limited input iterator, with only `*`, prefix-`++`, `==`, `!=`.
+  // These definitions are expected by std functions, to recognize this as an
+  // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+  using difference_type = std::make_signed_t<Length>;
+  using value_type = Byte;
+  using pointer = const Byte*;
+  using reference = const Byte&;
+  using iterator_category = std::input_iterator_tag;
+
+  [[nodiscard]] const Byte& operator*() {
+    // Assume the caller will read from the returned reference (and not just
+    // take the address).
+    MOZ_RELEASE_ASSERT(mCurrentSpan.LengthBytes() >= 1);
+    return *(mCurrentSpan.Elements());
+  }
+
+  ProfileBufferEntryReader& operator++() {
+    MOZ_RELEASE_ASSERT(mCurrentSpan.LengthBytes() >= 1);
+    if (MOZ_LIKELY(mCurrentSpan.LengthBytes() > 1)) {
+      // More than 1 byte left in mCurrentSpan, just eat it.
+      mCurrentSpan = mCurrentSpan.From(1);
+    } else {
+      // mCurrentSpan will be empty, move mNextSpanOrEmpty to mCurrentSpan.
+      mCurrentSpan = mNextSpanOrEmpty;
+      mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+    }
+    CheckInvariants();
+    return *this;
+  }
+
+  ProfileBufferEntryReader& operator+=(Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+      // All bytes are in mCurrentSpan.
+      // Update mCurrentSpan past the read bytes.
+      mCurrentSpan = mCurrentSpan.From(aBytes);
+      if (mCurrentSpan.IsEmpty() && !mNextSpanOrEmpty.IsEmpty()) {
+        // Don't leave mCurrentSpan empty, move non-empty mNextSpanOrEmpty into
+        // mCurrentSpan.
+        mCurrentSpan = mNextSpanOrEmpty;
+        mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+      }
+    } else {
+      // mCurrentSpan does not hold enough bytes.
+      // This should only happen at most once: Only for double spans, and when
+      // data crosses the gap.
+      const Length tail =
+          aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+      // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+      // will go back to the true case above.
+      mCurrentSpan = mNextSpanOrEmpty.From(tail);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+    }
+    CheckInvariants();
+    return *this;
+  }
+
+  [[nodiscard]] bool operator==(const ProfileBufferEntryReader& aOther) const {
+    return mCurrentSpan.Elements() == aOther.mCurrentSpan.Elements();
+  }
+  [[nodiscard]] bool operator!=(const ProfileBufferEntryReader& aOther) const {
+    return mCurrentSpan.Elements() != aOther.mCurrentSpan.Elements();
+  }
+
+  // Read an unsigned LEB128 number and move iterator ahead.
+  template <typename T>
+  [[nodiscard]] T ReadULEB128() {
+    return ::mozilla::ReadULEB128<T>(*this);
+  }
+
+  // Read a sequence of bytes, like memcpy.
+  void ReadBytes(void* aDest, Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+      // All bytes are in mCurrentSpan.
+      memcpy(aDest, mCurrentSpan.Elements(), aBytes);
+      // Update mCurrentSpan past the read bytes.
+      mCurrentSpan = mCurrentSpan.From(aBytes);
+      if (mCurrentSpan.IsEmpty() && !mNextSpanOrEmpty.IsEmpty()) {
+        // Don't leave mCurrentSpan empty, move non-empty mNextSpanOrEmpty into
+        // mCurrentSpan.
+        mCurrentSpan = mNextSpanOrEmpty;
+        mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+      }
+    } else {
+      // mCurrentSpan does not hold enough bytes.
+      // This should only happen at most once: Only for double spans, and when
+      // data crosses the gap.
+      // Split data between the end of mCurrentSpan and the beginning of
+      // mNextSpanOrEmpty.
+      memcpy(aDest, mCurrentSpan.Elements(), mCurrentSpan.LengthBytes());
+      const Length tail =
+          aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+      memcpy(reinterpret_cast<Byte*>(aDest) + mCurrentSpan.LengthBytes(),
+             mNextSpanOrEmpty.Elements(), tail);
+      // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+      // will go back to the true case above.
+      mCurrentSpan = mNextSpanOrEmpty.From(tail);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.Last(0);
+    }
+    CheckInvariants();
+  }
+
+  template <typename T>
+  void ReadIntoObject(T& aObject) {
+    Deserializer<T>::ReadInto(*this, aObject);
+  }
+
+  // Read into one or more objects, sequentially.
+  // `EntryReader::ReadIntoObjects()` with nothing is implicitly allowed, this
+  // could be useful for generic programming.
+  template <typename... Ts>
+  void ReadIntoObjects(Ts&... aTs) {
+    (ReadIntoObject(aTs), ...);
+  }
+
+  // Read data as an object and move iterator ahead.
+  template <typename T>
+  [[nodiscard]] T ReadObject() {
+    T ob = Deserializer<T>::Read(*this);
+    return ob;
+  }
+
+ private:
+  friend class ProfileBufferEntryWriter;
+
+  // Invariants:
+  // - mCurrentSpan cannot be empty unless mNextSpanOrEmpty is also empty. So
+  //   mCurrentSpan always points at the next byte to read or the end.
+  // - If mNextSpanOrEmpty is empty, it points at the end of mCurrentSpan. So
+  //   when reaching the end of mCurrentSpan, we can blindly move
+  //   mNextSpanOrEmpty to mCurrentSpan and keep the invariants.
+  SpanOfConstBytes mCurrentSpan;
+  SpanOfConstBytes mNextSpanOrEmpty;
+  ProfileBufferBlockIndex mCurrentBlockIndex;
+  ProfileBufferBlockIndex mNextBlockIndex;
+
+  void CheckInvariants() const {
+    MOZ_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+    MOZ_ASSERT(!mNextSpanOrEmpty.IsEmpty() ||
+               (mNextSpanOrEmpty == mCurrentSpan.Last(0)));
+  }
+};
+
+// Iterator-like class used to write into an entry.
+// An entry may be split in two memory segments (e.g., the ends of a ring
+// buffer, or two chunks of a chunked buffer); it doesn't deal with this
+// underlying buffer, but only with one or two spans pointing at the space
+// reserved for the entry.
+class ProfileBufferEntryWriter {
+ public:
+  using Byte = uint8_t;
+  using Length = uint32_t;
+
+  using SpanOfBytes = Span<Byte>;
+
+  // Class to be specialized for types to be written in an entry.
+  // See common specializations at the bottom of this header.
+  // The following static functions must be provided:
+  //   static Length Bytes(const T& aT) {
+  //     /* Return number of bytes that will be written. */
+  //   }
+  //   static void Write(ProfileBufferEntryWriter& aEW,
+  //                     const T& aT) {
+  //     /* Call `aEW.WriteX(...)` functions to serialize aT, be sure to write
+  //        exactly `Bytes(aT)` bytes! */
+  //   }
+  template <typename T>
+  struct Serializer;
+
+  ProfileBufferEntryWriter() = default;
+
+  ProfileBufferEntryWriter(SpanOfBytes aSpan,
+                           ProfileBufferBlockIndex aCurrentBlockIndex,
+                           ProfileBufferBlockIndex aNextBlockIndex)
+      : mCurrentSpan(aSpan),
+        mCurrentBlockIndex(aCurrentBlockIndex),
+        mNextBlockIndex(aNextBlockIndex) {}
+
+  ProfileBufferEntryWriter(SpanOfBytes aSpanHead, SpanOfBytes aSpanTail,
+                           ProfileBufferBlockIndex aCurrentBlockIndex,
+                           ProfileBufferBlockIndex aNextBlockIndex)
+      : mCurrentSpan(aSpanHead),
+        mNextSpanOrEmpty(aSpanTail),
+        mCurrentBlockIndex(aCurrentBlockIndex),
+        mNextBlockIndex(aNextBlockIndex) {
+    // Either:
+    // - mCurrentSpan is not empty, OR
+    // - mNextSpanOrEmpty is empty if mNextSpanOrEmpty is empty as well.
+    MOZ_RELEASE_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+  }
+
+  // Disable copying and moving, so we can't have multiple writing heads.
+  ProfileBufferEntryWriter(const ProfileBufferEntryWriter&) = delete;
+  ProfileBufferEntryWriter& operator=(const ProfileBufferEntryWriter&) = delete;
+  ProfileBufferEntryWriter(ProfileBufferEntryWriter&&) = delete;
+  ProfileBufferEntryWriter& operator=(ProfileBufferEntryWriter&&) = delete;
+
+  void Set() {
+    mCurrentSpan = SpanOfBytes{};
+    mNextSpanOrEmpty = SpanOfBytes{};
+    mCurrentBlockIndex = nullptr;
+    mNextBlockIndex = nullptr;
+  }
+
+  void Set(SpanOfBytes aSpan, ProfileBufferBlockIndex aCurrentBlockIndex,
+           ProfileBufferBlockIndex aNextBlockIndex) {
+    mCurrentSpan = aSpan;
+    mNextSpanOrEmpty = SpanOfBytes{};
+    mCurrentBlockIndex = aCurrentBlockIndex;
+    mNextBlockIndex = aNextBlockIndex;
+  }
+
+  void Set(SpanOfBytes aSpan0, SpanOfBytes aSpan1,
+           ProfileBufferBlockIndex aCurrentBlockIndex,
+           ProfileBufferBlockIndex aNextBlockIndex) {
+    mCurrentSpan = aSpan0;
+    mNextSpanOrEmpty = aSpan1;
+    mCurrentBlockIndex = aCurrentBlockIndex;
+    mNextBlockIndex = aNextBlockIndex;
+    // Either:
+    // - mCurrentSpan is not empty, OR
+    // - mNextSpanOrEmpty is empty if mNextSpanOrEmpty is empty as well.
+    MOZ_RELEASE_ASSERT(!mCurrentSpan.IsEmpty() || mNextSpanOrEmpty.IsEmpty());
+  }
+
+  [[nodiscard]] Length RemainingBytes() const {
+    return mCurrentSpan.LengthBytes() + mNextSpanOrEmpty.LengthBytes();
+  }
+
+  [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+    return mCurrentBlockIndex;
+  }
+
+  [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+    return mNextBlockIndex;
+  }
+
+  // Be like a limited output iterator, with only `*` and prefix-`++`.
+  // These definitions are expected by std functions, to recognize this as an
+  // iterator. See https://en.cppreference.com/w/cpp/iterator/iterator_traits
+  using value_type = Byte;
+  using pointer = Byte*;
+  using reference = Byte&;
+  using iterator_category = std::output_iterator_tag;
+
+  [[nodiscard]] Byte& operator*() {
+    MOZ_RELEASE_ASSERT(RemainingBytes() >= 1);
+    return *(
+        (MOZ_LIKELY(!mCurrentSpan.IsEmpty()) ? mCurrentSpan : mNextSpanOrEmpty)
+            .Elements());
+  }
+
+  ProfileBufferEntryWriter& operator++() {
+    if (MOZ_LIKELY(mCurrentSpan.LengthBytes() >= 1)) {
+      // There is at least 1 byte in mCurrentSpan, eat it.
+      mCurrentSpan = mCurrentSpan.From(1);
+    } else {
+      // mCurrentSpan is empty, move mNextSpanOrEmpty (past the first byte) to
+      // mCurrentSpan.
+      MOZ_RELEASE_ASSERT(mNextSpanOrEmpty.LengthBytes() >= 1);
+      mCurrentSpan = mNextSpanOrEmpty.From(1);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+    }
+    return *this;
+  }
+
+  ProfileBufferEntryWriter& operator+=(Length aBytes) {
+    // Note: This is a rare operation. The code below is a copy of `WriteBytes`
+    // but without the `memcpy`s.
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+      // Data fits in mCurrentSpan.
+      // Update mCurrentSpan. It may become empty, so in case of a double span,
+      // the next call will go to the false case below.
+      mCurrentSpan = mCurrentSpan.From(aBytes);
+    } else {
+      // Data does not fully fit in mCurrentSpan.
+      // This should only happen at most once: Only for double spans, and when
+      // data crosses the gap or starts there.
+      const Length tail =
+          aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+      // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+      // will go back to the true case above.
+      mCurrentSpan = mNextSpanOrEmpty.From(tail);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+    }
+    return *this;
+  }
+
+  // Number of bytes needed to represent `aValue` in unsigned LEB128.
+  template <typename T>
+  [[nodiscard]] static unsigned ULEB128Size(T aValue) {
+    return ::mozilla::ULEB128Size(aValue);
+  }
+
+  // Write number as unsigned LEB128 and move iterator ahead.
+  template <typename T>
+  void WriteULEB128(T aValue) {
+    ::mozilla::WriteULEB128(aValue, *this);
+  }
+
+  // Number of bytes needed to serialize objects.
+  template <typename... Ts>
+  [[nodiscard]] static Length SumBytes(const Ts&... aTs) {
+    return (0 + ... + Serializer<Ts>::Bytes(aTs));
+  }
+
+  // Write a sequence of bytes, like memcpy.
+  void WriteBytes(const void* aSrc, Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    if (MOZ_LIKELY(aBytes <= mCurrentSpan.LengthBytes())) {
+      // Data fits in mCurrentSpan.
+      memcpy(mCurrentSpan.Elements(), aSrc, aBytes);
+      // Update mCurrentSpan. It may become empty, so in case of a double span,
+      // the next call will go to the false case below.
+      mCurrentSpan = mCurrentSpan.From(aBytes);
+    } else {
+      // Data does not fully fit in mCurrentSpan.
+      // This should only happen at most once: Only for double spans, and when
+      // data crosses the gap or starts there.
+      // Split data between the end of mCurrentSpan and the beginning of
+      // mNextSpanOrEmpty. (mCurrentSpan could be empty, it's ok to do a memcpy
+      // because Span::Elements() is never null.)
+      memcpy(mCurrentSpan.Elements(), aSrc, mCurrentSpan.LengthBytes());
+      const Length tail =
+          aBytes - static_cast<Length>(mCurrentSpan.LengthBytes());
+      memcpy(mNextSpanOrEmpty.Elements(),
+             reinterpret_cast<const Byte*>(aSrc) + mCurrentSpan.LengthBytes(),
+             tail);
+      // Move mNextSpanOrEmpty to mCurrentSpan, past the data. So the next call
+      // will go back to the true case above.
+      mCurrentSpan = mNextSpanOrEmpty.From(tail);
+      mNextSpanOrEmpty = mNextSpanOrEmpty.First(0);
+    }
+  }
+
+  void WriteFromReader(ProfileBufferEntryReader& aReader, Length aBytes) {
+    MOZ_RELEASE_ASSERT(aBytes <= RemainingBytes());
+    MOZ_RELEASE_ASSERT(aBytes <= aReader.RemainingBytes());
+    Length read0 = std::min(
+        aBytes, static_cast<Length>(aReader.mCurrentSpan.LengthBytes()));
+    if (read0 != 0) {
+      WriteBytes(aReader.mCurrentSpan.Elements(), read0);
+    }
+    Length read1 = aBytes - read0;
+    if (read1 != 0) {
+      WriteBytes(aReader.mNextSpanOrEmpty.Elements(), read1);
+    }
+    aReader += aBytes;
+  }
+
+  // Write a single object by using the appropriate Serializer.
+  template <typename T>
+  void WriteObject(const T& aObject) {
+    Serializer<T>::Write(*this, aObject);
+  }
+
+  // Write one or more objects, sequentially.
+  // Allow `EntryWrite::WriteObjects()` with nothing, this could be useful
+  // for generic programming.
+  template <typename... Ts>
+  void WriteObjects(const Ts&... aTs) {
+    (WriteObject(aTs), ...);
+  }
+
+ private:
+  // The two spans covering the memory still to be written.
+  SpanOfBytes mCurrentSpan;
+  SpanOfBytes mNextSpanOrEmpty;
+  ProfileBufferBlockIndex mCurrentBlockIndex;
+  ProfileBufferBlockIndex mNextBlockIndex;
+};
+
+// ============================================================================
+// Serializer and Deserializer ready-to-use specializations.
+
+// ----------------------------------------------------------------------------
+// Trivially-copyable types (default)
+
+// The default implementation works for all trivially-copyable types (e.g.,
+// PODs).
+//
+// Usage: `aEW.WriteObject(123);`.
+//
+// Raw pointers, though trivially-copyable, are explictly forbidden when writing
+// (to avoid unexpected leaks/UAFs), instead use one of
+// `WrapProfileBufferLiteralCStringPointer`, `WrapProfileBufferUnownedCString`,
+// or `WrapProfileBufferRawPointer` as needed.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer {
+  static_assert(std::is_trivially_copyable<T>::value,
+                "Serializer only works with trivially-copyable types by "
+                "default, use/add specialization for other types.");
+
+  static constexpr Length Bytes(const T&) { return sizeof(T); }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const T& aT) {
+    static_assert(!std::is_pointer<T>::value,
+                  "Serializer won't write raw pointers by default, use "
+                  "WrapProfileBufferRawPointer or other.");
+    aEW.WriteBytes(&aT, sizeof(T));
+  }
+};
+
+// Usage: `aER.ReadObject<int>();` or `int x; aER.ReadIntoObject(x);`.
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer {
+  static_assert(std::is_trivially_copyable<T>::value,
+                "Deserializer only works with trivially-copyable types by "
+                "default, use/add specialization for other types.");
+
+  static void ReadInto(ProfileBufferEntryReader& aER, T& aT) {
+    aER.ReadBytes(&aT, sizeof(T));
+  }
+
+  static T Read(ProfileBufferEntryReader& aER) {
+    // Note that this creates a default `T` first, and then overwrites it with
+    // bytes from the buffer. Trivially-copyable types support this without UB.
+    T ob;
+    ReadInto(aER, ob);
+    return ob;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Strip const/volatile/reference from types.
+
+// Automatically strip `const`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<const T>
+    : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<const T>
+    : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `volatile`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<volatile T>
+    : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<volatile T>
+    : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `lvalue-reference`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<T&>
+    : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<T&>
+    : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// Automatically strip `rvalue-reference`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<T&&>
+    : public ProfileBufferEntryWriter::Serializer<T> {};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<T&&>
+    : public ProfileBufferEntryReader::Deserializer<T> {};
+
+// ----------------------------------------------------------------------------
+// ProfileBufferBlockIndex
+
+// ProfileBufferBlockIndex, serialized as the underlying value.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferBlockIndex> {
+  static constexpr Length Bytes(const ProfileBufferBlockIndex& aBlockIndex) {
+    return sizeof(ProfileBufferBlockIndex);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfileBufferBlockIndex& aBlockIndex) {
+    aEW.WriteBytes(&aBlockIndex, sizeof(aBlockIndex));
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<ProfileBufferBlockIndex> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       ProfileBufferBlockIndex& aBlockIndex) {
+    aER.ReadBytes(&aBlockIndex, sizeof(aBlockIndex));
+  }
+
+  static ProfileBufferBlockIndex Read(ProfileBufferEntryReader& aER) {
+    ProfileBufferBlockIndex blockIndex;
+    ReadInto(aER, blockIndex);
+    return blockIndex;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Literal C string pointer
+
+// Wrapper around a pointer to a literal C string.
+template <size_t NonTerminalCharacters>
+struct ProfileBufferLiteralCStringPointer {
+  const char* mCString;
+};
+
+// Wrap a pointer to a literal C string.
+template <size_t CharactersIncludingTerminal>
+ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal - 1>
+WrapProfileBufferLiteralCStringPointer(
+    const char (&aCString)[CharactersIncludingTerminal]) {
+  return {aCString};
+}
+
+// Literal C strings, serialized as the raw pointer because it is unique and
+// valid for the whole program lifetime.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferLiteralCStringPointer("hi"));`.
+//
+// No deserializer is provided for this type, instead it must be deserialized as
+// a raw pointer: `aER.ReadObject<const char*>();`
+template <size_t CharactersIncludingTerminal>
+struct ProfileBufferEntryReader::Deserializer<
+    ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>> {
+  static constexpr Length Bytes(
+      const ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>&) {
+    // We're only storing a pointer, its size is independent from the pointer
+    // value.
+    return sizeof(const char*);
+  }
+
+  static void Write(
+      ProfileBufferEntryWriter& aEW,
+      const ProfileBufferLiteralCStringPointer<CharactersIncludingTerminal>&
+          aWrapper) {
+    // Write the pointer *value*, not the string contents.
+    aEW.WriteBytes(aWrapper.mCString, sizeof(aWrapper.mCString));
+  }
+};
+
+// ----------------------------------------------------------------------------
+// C string contents
+
+// Wrapper around a pointer to a C string whose contents will be serialized.
+struct ProfileBufferUnownedCString {
+  const char* mCString;
+};
+
+// Wrap a pointer to a C string whose contents will be serialized.
+inline ProfileBufferUnownedCString WrapProfileBufferUnownedCString(
+    const char* aCString) {
+  return {aCString};
+}
+
+// The contents of a (probably) unowned C string are serialized as the number of
+// characters (encoded as ULEB128) and all the characters in the string. The
+// terminal '\0' is omitted.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferUnownedCString(str.c_str()))`.
+//
+// No deserializer is provided for this pointer type, instead it must be
+// deserialized as one of the other string types that manages its contents,
+// e.g.: `aER.ReadObject<std::string>();`
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferUnownedCString> {
+  static Length Bytes(const ProfileBufferUnownedCString& aS) {
+    const auto len = strlen(aS.mCString);
+    return ULEB128Size(len) + len;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfileBufferUnownedCString& aS) {
+    const auto len = strlen(aS.mCString);
+    aEW.WriteULEB128(len);
+    aEW.WriteBytes(aS.mCString, len);
+  }
+};
+
+// ----------------------------------------------------------------------------
+// Raw pointers
+
+// Wrapper around a pointer to be serialized as the raw pointer value.
+template <typename T>
+struct ProfileBufferRawPointer {
+  T* mRawPointer;
+};
+
+// Wrap a pointer to be serialized as the raw pointer value.
+template <typename T>
+ProfileBufferRawPointer<T> WrapProfileBufferRawPointer(T* aRawPointer) {
+  return {aRawPointer};
+}
+
+// Raw pointers are serialized as the raw pointer value.
+//
+// Usage: `aEW.WriteObject(WrapProfileBufferRawPointer(ptr));`
+//
+// The wrapper is compulsory when writing pointers (to avoid unexpected
+// leaks/UAFs), but reading can be done straight into a raw pointer object,
+// e.g.: `aER.ReadObject<Foo*>;`.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<ProfileBufferRawPointer<T>> {
+  template <typename U>
+  static constexpr Length Bytes(const U&) {
+    return sizeof(T*);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfileBufferRawPointer<T>& aWrapper) {
+    aEW.WriteBytes(&aWrapper.mRawPointer, sizeof(aWrapper.mRawPointer));
+  }
+};
+
+// Usage: `aER.ReadObject<Foo*>;` or `Foo* p; aER.ReadIntoObject(p);`, no
+// wrapper necessary.
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<ProfileBufferRawPointer<T>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       ProfileBufferRawPointer<T>& aPtr) {
+    aER.ReadBytes(&aPtr.mRawPointer, sizeof(aPtr));
+  }
+
+  static ProfileBufferRawPointer<T> Read(ProfileBufferEntryReader& aER) {
+    ProfileBufferRawPointer<T> rawPointer;
+    ReadInto(aER, rawPointer);
+    return rawPointer;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// std::string contents
+
+// std::string contents are serialized as the number of characters (encoded as
+// ULEB128) and all the characters in the string. The terminal '\0' is omitted.
+//
+// Usage: `std::string s = ...; aEW.WriteObject(s);`
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<std::basic_string<CHAR>> {
+  static Length Bytes(const std::basic_string<CHAR>& aS) {
+    const Length len = static_cast<Length>(aS.length());
+    return ULEB128Size(len) + len;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const std::basic_string<CHAR>& aS) {
+    const Length len = static_cast<Length>(aS.length());
+    aEW.WriteULEB128(len);
+    aEW.WriteBytes(aS.c_str(), len * sizeof(CHAR));
+  }
+};
+
+// Usage: `std::string s = aEW.ReadObject<std::string>(s);` or
+// `std::string s; aER.ReadIntoObject(s);`
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<std::basic_string<CHAR>> {
+  static void ReadCharsInto(ProfileBufferEntryReader& aER,
+                            std::basic_string<CHAR>& aS, size_t aLength) {
+    // Assign to `aS` by using iterators.
+    // (`aER+0` so we get the same iterator type as `aER+len`.)
+    aS.assign(aER, aER.EmptyIteratorAtOffset(aLength));
+    aER += aLength;
+  }
+
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       std::basic_string<CHAR>& aS) {
+    ReadCharsInto(
+        aER, aS,
+        aER.ReadULEB128<typename std::basic_string<CHAR>::size_type>());
+  }
+
+  static std::basic_string<CHAR> ReadChars(ProfileBufferEntryReader& aER,
+                                           size_t aLength) {
+    // Construct a string by using iterators.
+    // (`aER+0` so we get the same iterator type as `aER+len`.)
+    std::basic_string<CHAR> s(aER, aER.EmptyIteratorAtOffset(aLength));
+    aER += aLength;
+    return s;
+  }
+
+  static std::basic_string<CHAR> Read(ProfileBufferEntryReader& aER) {
+    return ReadChars(
+        aER, aER.ReadULEB128<typename std::basic_string<CHAR>::size_type>());
+  }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::UniqueFreePtr<CHAR>
+
+// UniqueFreePtr<CHAR>, which points at a string allocated with `malloc`
+// (typically generated by `strdup()`), is serialized as the number of
+// *bytes* (encoded as ULEB128) and all the characters in the string. The
+// null terminator is omitted.
+// `CHAR` can be any type that has a specialization for
+// `std::char_traits<CHAR>::length(const CHAR*)`.
+//
+// Note: A nullptr pointer will be serialized like an empty string, so when
+// deserializing it will result in an allocated buffer only containing a
+// single null terminator.
+template <typename CHAR>
+struct ProfileBufferEntryWriter::Serializer<UniqueFreePtr<CHAR>> {
+  static Length Bytes(const UniqueFreePtr<CHAR>& aS) {
+    if (!aS) {
+      // Null pointer, store it as if it was an empty string (so: 0 bytes).
+      return ULEB128Size(0u);
+    }
+    // Note that we store the size in *bytes*, not in number of characters.
+    const auto bytes = std::char_traits<CHAR>::length(aS.get()) * sizeof(CHAR);
+    return ULEB128Size(bytes) + bytes;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const UniqueFreePtr<CHAR>& aS) {
+    if (!aS) {
+      // Null pointer, store it as if it was an empty string (so we write a
+      // length of 0 bytes).
+      aEW.WriteULEB128(0u);
+      return;
+    }
+    // Note that we store the size in *bytes*, not in number of characters.
+    const auto bytes = std::char_traits<CHAR>::length(aS.get()) * sizeof(CHAR);
+    aEW.WriteULEB128(bytes);
+    aEW.WriteBytes(aS.get(), bytes);
+  }
+};
+
+template <typename CHAR>
+struct ProfileBufferEntryReader::Deserializer<UniqueFreePtr<CHAR>> {
+  static void ReadInto(ProfileBufferEntryReader& aER, UniqueFreePtr<CHAR>& aS) {
+    aS = Read(aER);
+  }
+
+  static UniqueFreePtr<CHAR> Read(ProfileBufferEntryReader& aER) {
+    // Read the number of *bytes* that follow.
+    const auto bytes = aER.ReadULEB128<size_t>();
+    // We need a buffer of the non-const character type.
+    using NC_CHAR = std::remove_const_t<CHAR>;
+    // We allocate the required number of bytes, plus one extra character for
+    // the null terminator.
+    NC_CHAR* buffer = static_cast<NC_CHAR*>(malloc(bytes + sizeof(NC_CHAR)));
+    // Copy the characters into the buffer.
+    aER.ReadBytes(buffer, bytes);
+    // And append a null terminator.
+    buffer[bytes / sizeof(NC_CHAR)] = NC_CHAR(0);
+    return UniqueFreePtr<CHAR>(buffer);
+  }
+};
+
+// ----------------------------------------------------------------------------
+// std::tuple
+
+// std::tuple is serialized as a sequence of each recursively-serialized item.
+//
+// This is equivalent to manually serializing each item, so reading/writing
+// tuples is equivalent to reading/writing their elements in order, e.g.:
+// ```
+// std::tuple<int, std::string> is = ...;
+// aEW.WriteObject(is); // Write the tuple, equivalent to:
+// aEW.WriteObject(/* int */ std::get<0>(is), /* string */ std::get<1>(is));
+// ...
+// // Reading back can be done directly into a tuple:
+// auto is = aER.ReadObject<std::tuple<int, std::string>>();
+// // Or each item could be read separately:
+// auto i = aER.ReadObject<int>(); auto s = aER.ReadObject<std::string>();
+// ```
+template <typename... Ts>
+struct ProfileBufferEntryWriter::Serializer<std::tuple<Ts...>> {
+ private:
+  template <size_t... Is>
+  static Length TupleBytes(const std::tuple<Ts...>& aTuple,
+                           std::index_sequence<Is...>) {
+    return (0 + ... + SumBytes(std::get<Is>(aTuple)));
+  }
+
+  template <size_t... Is>
+  static void TupleWrite(ProfileBufferEntryWriter& aEW,
+                         const std::tuple<Ts...>& aTuple,
+                         std::index_sequence<Is...>) {
+    (aEW.WriteObject(std::get<Is>(aTuple)), ...);
+  }
+
+ public:
+  static Length Bytes(const std::tuple<Ts...>& aTuple) {
+    // Generate a 0..N-1 index pack, we'll add the sizes of each item.
+    return TupleBytes(aTuple, std::index_sequence_for<Ts...>());
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const std::tuple<Ts...>& aTuple) {
+    // Generate a 0..N-1 index pack, we'll write each item.
+    TupleWrite(aEW, aTuple, std::index_sequence_for<Ts...>());
+  }
+};
+
+template <typename... Ts>
+struct ProfileBufferEntryReader::Deserializer<std::tuple<Ts...>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       std::tuple<Ts...>& aTuple) {
+    aER.ReadBytes(&aTuple, Bytes(aTuple));
+  }
+
+  static std::tuple<Ts...> Read(ProfileBufferEntryReader& aER) {
+    // Note that this creates default `Ts` first, and then overwrites them.
+    std::tuple<Ts...> ob;
+    ReadInto(aER, ob);
+    return ob;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Tuple
+
+// Tuple is serialized as a sequence of each recursively-serialized
+// item.
+//
+// This is equivalent to manually serializing each item, so reading/writing
+// tuples is equivalent to reading/writing their elements in order, e.g.:
+// ```
+// Tuple<int, std::string> is = ...;
+// aEW.WriteObject(is); // Write the Tuple, equivalent to:
+// aEW.WriteObject(/* int */ std::get<0>(is), /* string */ std::get<1>(is));
+// ...
+// // Reading back can be done directly into a Tuple:
+// auto is = aER.ReadObject<Tuple<int, std::string>>();
+// // Or each item could be read separately:
+// auto i = aER.ReadObject<int>(); auto s = aER.ReadObject<std::string>();
+// ```
+template <typename... Ts>
+struct ProfileBufferEntryWriter::Serializer<Tuple<Ts...>> {
+ private:
+  template <size_t... Is>
+  static Length TupleBytes(const Tuple<Ts...>& aTuple,
+                           std::index_sequence<Is...>) {
+    return (0 + ... + SumBytes(Get<Is>(aTuple)));
+  }
+
+  template <size_t... Is>
+  static void TupleWrite(ProfileBufferEntryWriter& aEW,
+                         const Tuple<Ts...>& aTuple,
+                         std::index_sequence<Is...>) {
+    (aEW.WriteObject(Get<Is>(aTuple)), ...);
+  }
+
+ public:
+  static Length Bytes(const Tuple<Ts...>& aTuple) {
+    // Generate a 0..N-1 index pack, we'll add the sizes of each item.
+    return TupleBytes(aTuple, std::index_sequence_for<Ts...>());
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const Tuple<Ts...>& aTuple) {
+    // Generate a 0..N-1 index pack, we'll write each item.
+    TupleWrite(aEW, aTuple, std::index_sequence_for<Ts...>());
+  }
+};
+
+template <typename... Ts>
+struct ProfileBufferEntryReader::Deserializer<Tuple<Ts...>> {
+  static void ReadInto(ProfileBufferEntryReader& aER, Tuple<Ts...>& aTuple) {
+    aER.ReadBytes(&aTuple, Bytes(aTuple));
+  }
+
+  static Tuple<Ts...> Read(ProfileBufferEntryReader& aER) {
+    // Note that this creates default `Ts` first, and then overwrites them.
+    Tuple<Ts...> ob;
+    ReadInto(aER, ob);
+    return ob;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Span
+
+// Span. All elements are serialized in sequence.
+// The caller is assumed to know the number of elements (they may manually
+// write&read it before the span if needed).
+// Similar to tuples, reading/writing spans is equivalent to reading/writing
+// their elements in order.
+template <class T, size_t N>
+struct ProfileBufferEntryWriter::Serializer<Span<T, N>> {
+  static Length Bytes(const Span<T, N>& aSpan) {
+    Length bytes = 0;
+    for (const T& element : aSpan) {
+      bytes += SumBytes(element);
+    }
+    return bytes;
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const Span<T, N>& aSpan) {
+    for (const T& element : aSpan) {
+      aEW.WriteObject(element);
+    }
+  }
+};
+
+template <class T, size_t N>
+struct ProfileBufferEntryReader::Deserializer<Span<T, N>> {
+  // Read elements back into span pointing at a pre-allocated buffer.
+  static void ReadInto(ProfileBufferEntryReader& aER, Span<T, N>& aSpan) {
+    for (T& element : aSpan) {
+      aER.ReadIntoObject(element);
+    }
+  }
+
+  // A Span does not own its data, this would probably leak so we forbid this.
+  static Span<T, N> Read(ProfileBufferEntryReader& aER) = delete;
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Maybe
+
+// Maybe<T> is serialized as one byte containing either 'm' (Nothing),
+// or 'M' followed by the recursively-serialized `T` object.
+template <typename T>
+struct ProfileBufferEntryWriter::Serializer<Maybe<T>> {
+  static Length Bytes(const Maybe<T>& aMaybe) {
+    // 1 byte to store nothing/something flag, then object size if present.
+    return aMaybe.isNothing() ? 1 : (1 + SumBytes(aMaybe.ref()));
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW, const Maybe<T>& aMaybe) {
+    // 'm'/'M' is just an arbitrary 1-byte value to distinguish states.
+    if (aMaybe.isNothing()) {
+      aEW.WriteObject<char>('m');
+    } else {
+      aEW.WriteObject<char>('M');
+      // Use the Serializer for the contained type.
+      aEW.WriteObject(aMaybe.ref());
+    }
+  }
+};
+
+template <typename T>
+struct ProfileBufferEntryReader::Deserializer<Maybe<T>> {
+  static void ReadInto(ProfileBufferEntryReader& aER, Maybe<T>& aMaybe) {
+    char c = aER.ReadObject<char>();
+    if (c == 'm') {
+      aMaybe.reset();
+    } else {
+      MOZ_ASSERT(c == 'M');
+      // If aMaybe is empty, create a default `T` first, to be overwritten.
+      // Otherwise we'll just overwrite whatever was already there.
+      if (aMaybe.isNothing()) {
+        aMaybe.emplace();
+      }
+      // Use the Deserializer for the contained type.
+      aER.ReadIntoObject(aMaybe.ref());
+    }
+  }
+
+  static Maybe<T> Read(ProfileBufferEntryReader& aER) {
+    Maybe<T> maybe;
+    char c = aER.ReadObject<char>();
+    MOZ_ASSERT(c == 'M' || c == 'm');
+    if (c == 'M') {
+      // Note that this creates a default `T` inside the Maybe first, and then
+      // overwrites it.
+      maybe = Some(T{});
+      // Use the Deserializer for the contained type.
+      aER.ReadIntoObject(maybe.ref());
+    }
+    return maybe;
+  }
+};
+
+// ----------------------------------------------------------------------------
+// mozilla::Variant
+
+// Variant is serialized as the tag (0-based index of the stored type, encoded
+// as ULEB128), and the recursively-serialized object.
+template <typename... Ts>
+struct ProfileBufferEntryWriter::Serializer<Variant<Ts...>> {
+ public:
+  static Length Bytes(const Variant<Ts...>& aVariantTs) {
+    return aVariantTs.match([](auto aIndex, const auto& aAlternative) {
+      return ULEB128Size(aIndex) + SumBytes(aAlternative);
+    });
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const Variant<Ts...>& aVariantTs) {
+    aVariantTs.match([&aEW](auto aIndex, const auto& aAlternative) {
+      aEW.WriteULEB128(aIndex);
+      aEW.WriteObject(aAlternative);
+    });
+  }
+};
+
+template <typename... Ts>
+struct ProfileBufferEntryReader::Deserializer<Variant<Ts...>> {
+ private:
+  // Called from the fold expression in `VariantReadInto()`, only the selected
+  // variant will deserialize the object.
+  template <size_t I>
+  static void VariantIReadInto(ProfileBufferEntryReader& aER,
+                               Variant<Ts...>& aVariantTs, unsigned aTag) {
+    if (I == aTag) {
+      // Ensure the variant contains the target type. Note that this may create
+      // a default object.
+      if (!aVariantTs.template is<I>()) {
+        aVariantTs = Variant<Ts...>(VariantIndex<I>{});
+      }
+      aER.ReadIntoObject(aVariantTs.template as<I>());
+    }
+  }
+
+  template <size_t... Is>
+  static void VariantReadInto(ProfileBufferEntryReader& aER,
+                              Variant<Ts...>& aVariantTs,
+                              std::index_sequence<Is...>) {
+    unsigned tag = aER.ReadULEB128<unsigned>();
+    (VariantIReadInto<Is>(aER, aVariantTs, tag), ...);
+  }
+
+ public:
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       Variant<Ts...>& aVariantTs) {
+    // Generate a 0..N-1 index pack, the selected variant will deserialize
+    // itself.
+    VariantReadInto(aER, aVariantTs, std::index_sequence_for<Ts...>());
+  }
+
+  static Variant<Ts...> Read(ProfileBufferEntryReader& aER) {
+    // Note that this creates a default `Variant` of the first type, and then
+    // overwrites it. Consider using `ReadInto` for more control if needed.
+    Variant<Ts...> variant(VariantIndex<0>{});
+    ReadInto(aER, variant);
+    return variant;
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferEntrySerialization_h
diff --git a/mozglue/baseprofiler/public/ProfileBufferIndex.h b/mozglue/baseprofiler/public/ProfileBufferIndex.h
new file mode 100644
index 0000000000..5cda6bd89e
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileBufferIndex.h
@@ -0,0 +1,97 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileBufferIndex_h
+#define ProfileBufferIndex_h
+
+#include "mozilla/Attributes.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace mozilla {
+
+// Generic index into a Profiler buffer, mostly for internal usage.
+// Intended to appear infinite (it should effectively never wrap).
+// 0 (zero) is reserved as nullptr-like value; it may indicate failure result,
+// or it may point at the earliest available block.
+using ProfileBufferIndex = uint64_t;
+
+// Externally-opaque class encapsulating a block index, i.e. a
+// ProfileBufferIndex that is guaranteed to point at the start of a Profile
+// buffer block (until it is destroyed, but then that index cannot be reused and
+// functions should gracefully handle expired blocks).
+// Users may get these from Profile buffer functions, to later access previous
+// blocks; they should avoid converting and operating on their value.
+class ProfileBufferBlockIndex {
+ public:
+  // Default constructor with internal 0 value, for which Profile buffers must
+  // guarantee that it is before any valid entries; All public APIs should
+  // fail gracefully, doing and/or returning Nothing.
+  ProfileBufferBlockIndex() : mBlockIndex(0) {}
+
+  // Implicit conversion from literal `nullptr` to internal 0 value, to allow
+  // convenient init/reset/comparison with 0 index.
+  MOZ_IMPLICIT ProfileBufferBlockIndex(std::nullptr_t) : mBlockIndex(0) {}
+
+  // Explicit conversion to bool, works in `if` and other tests.
+  // Only returns false for default `ProfileBufferBlockIndex{}` value.
+  explicit operator bool() const { return mBlockIndex != 0; }
+
+  // Comparison operators. Default `ProfileBufferBlockIndex{}` value is always
+  // the lowest.
+  [[nodiscard]] bool operator==(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex == aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator!=(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex != aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator<(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex < aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator<=(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex <= aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator>(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex > aRhs.mBlockIndex;
+  }
+  [[nodiscard]] bool operator>=(const ProfileBufferBlockIndex& aRhs) const {
+    return mBlockIndex >= aRhs.mBlockIndex;
+  }
+
+  // Explicit conversion to ProfileBufferIndex, mostly used by internal Profile
+  // buffer code.
+  [[nodiscard]] ProfileBufferIndex ConvertToProfileBufferIndex() const {
+    return mBlockIndex;
+  }
+
+  // Explicit creation from ProfileBufferIndex, mostly used by internal
+  // Profile buffer code.
+  [[nodiscard]] static ProfileBufferBlockIndex CreateFromProfileBufferIndex(
+      ProfileBufferIndex aIndex) {
+    return ProfileBufferBlockIndex(aIndex);
+  }
+
+ private:
+  // Private to prevent easy construction from any value. Use
+  // `CreateFromProfileBufferIndex()` instead.
+  // The main reason for this indirection is to make it harder to create these
+  // objects, because only the profiler code should need to do it. Ideally, this
+  // class should be used wherever a block index should be stored, but there is
+  // so much code that uses `uint64_t` that it would be a big task to change
+  // them all. So for now we allow conversions to/from numbers, but it's as ugly
+  // as possible to make sure it doesn't get too common; and if one day we want
+  // to tackle a global change, it should be easy to find all these locations
+  // thanks to the explicit conversion functions.
+  explicit ProfileBufferBlockIndex(ProfileBufferIndex aBlockIndex)
+      : mBlockIndex(aBlockIndex) {}
+
+  ProfileBufferIndex mBlockIndex;
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileBufferIndex_h
diff --git a/mozglue/baseprofiler/public/ProfileChunkedBuffer.h b/mozglue/baseprofiler/public/ProfileChunkedBuffer.h
new file mode 100644
index 0000000000..d4d55eafcb
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfileChunkedBuffer.h
@@ -0,0 +1,1872 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ProfileChunkedBuffer_h
+#define ProfileChunkedBuffer_h
+
+#include "mozilla/BaseProfilerDetail.h"
+#include "mozilla/NotNull.h"
+#include "mozilla/ProfileBufferChunkManager.h"
+#include "mozilla/ProfileBufferChunkManagerSingle.h"
+#include "mozilla/ProfileBufferEntrySerialization.h"
+#include "mozilla/RefCounted.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/Unused.h"
+
+#include <cstdio>
+#include <utility>
+
+namespace mozilla {
+
+namespace detail {
+
+// Internal accessor pointing at a position inside a chunk.
+// It can handle two groups of chunks (typically the extant chunks stored in
+// the store manager, and the current chunk).
+// The main operations are:
+// - ReadEntrySize() to read an entry size, 0 means failure.
+// - operator+=(Length) to skip a number of bytes.
+// - EntryReader() creates an entry reader at the current position for a given
+//   size (it may fail with an empty reader), and skips the entry.
+// Note that there is no "past-the-end" position -- as soon as InChunkPointer
+// reaches the end, it becomes effectively null.
+class InChunkPointer {
+ public:
+  using Byte = ProfileBufferChunk::Byte;
+  using Length = ProfileBufferChunk::Length;
+
+  // Nullptr-like InChunkPointer, may be used as end iterator.
+  InChunkPointer()
+      : mChunk(nullptr), mNextChunkGroup(nullptr), mOffsetInChunk(0) {}
+
+  // InChunkPointer over one or two chunk groups, pointing at the given
+  // block index (if still in range).
+  // This constructor should only be used with *trusted* block index values!
+  InChunkPointer(const ProfileBufferChunk* aChunk,
+                 const ProfileBufferChunk* aNextChunkGroup,
+                 ProfileBufferBlockIndex aBlockIndex)
+      : mChunk(aChunk), mNextChunkGroup(aNextChunkGroup) {
+    if (mChunk) {
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      Adjust();
+    } else if (mNextChunkGroup) {
+      mChunk = mNextChunkGroup;
+      mNextChunkGroup = nullptr;
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      Adjust();
+    } else {
+      mOffsetInChunk = 0;
+    }
+
+    // Try to advance to given position.
+    if (!AdvanceToGlobalRangePosition(aBlockIndex)) {
+      // Block does not exist anymore (or block doesn't look valid), reset the
+      // in-chunk pointer.
+      mChunk = nullptr;
+      mNextChunkGroup = nullptr;
+    }
+  }
+
+  // InChunkPointer over one or two chunk groups, will start at the first
+  // block (if any). This may be slow, so avoid using it too much.
+  InChunkPointer(const ProfileBufferChunk* aChunk,
+                 const ProfileBufferChunk* aNextChunkGroup,
+                 ProfileBufferIndex aIndex = ProfileBufferIndex(0))
+      : mChunk(aChunk), mNextChunkGroup(aNextChunkGroup) {
+    if (mChunk) {
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      Adjust();
+    } else if (mNextChunkGroup) {
+      mChunk = mNextChunkGroup;
+      mNextChunkGroup = nullptr;
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      Adjust();
+    } else {
+      mOffsetInChunk = 0;
+    }
+
+    // Try to advance to given position.
+    if (!AdvanceToGlobalRangePosition(aIndex)) {
+      // Block does not exist anymore, reset the in-chunk pointer.
+      mChunk = nullptr;
+      mNextChunkGroup = nullptr;
+    }
+  }
+
+  // Compute the current position in the global range.
+  // 0 if null (including if we're reached the end).
+  [[nodiscard]] ProfileBufferIndex GlobalRangePosition() const {
+    if (IsNull()) {
+      return 0;
+    }
+    return mChunk->RangeStart() + mOffsetInChunk;
+  }
+
+  // Move InChunkPointer forward to the block at the given global block
+  // position, which is assumed to be valid exactly -- but it may be obsolete.
+  // 0 stays where it is (if valid already).
+  // MOZ_ASSERTs if the index is invalid.
+  [[nodiscard]] bool AdvanceToGlobalRangePosition(
+      ProfileBufferBlockIndex aBlockIndex) {
+    if (IsNull()) {
+      // Pointer is null already. (Not asserting because it's acceptable.)
+      return false;
+    }
+    if (!aBlockIndex) {
+      // Special null position, just stay where we are.
+      return ShouldPointAtValidBlock();
+    }
+    if (aBlockIndex.ConvertToProfileBufferIndex() < GlobalRangePosition()) {
+      // Past the requested position, stay where we are (assuming the current
+      // position was valid).
+      return ShouldPointAtValidBlock();
+    }
+    for (;;) {
+      if (aBlockIndex.ConvertToProfileBufferIndex() <
+          mChunk->RangeStart() + mChunk->OffsetPastLastBlock()) {
+        // Target position is in this chunk's written space, move to it.
+        mOffsetInChunk =
+            aBlockIndex.ConvertToProfileBufferIndex() - mChunk->RangeStart();
+        return ShouldPointAtValidBlock();
+      }
+      // Position is after this chunk, try next chunk.
+      GoToNextChunk();
+      if (IsNull()) {
+        return false;
+      }
+      // Skip whatever block tail there is, we don't allow pointing in the
+      // middle of a block.
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+      if (aBlockIndex.ConvertToProfileBufferIndex() < GlobalRangePosition()) {
+        // Past the requested position, meaning that the given position was in-
+        // between blocks -> Failure.
+        MOZ_ASSERT(false, "AdvanceToGlobalRangePosition - In-between blocks");
+        return false;
+      }
+    }
+  }
+
+  // Move InChunkPointer forward to the block at or after the given global
+  // range position.
+  // 0 stays where it is (if valid already).
+  [[nodiscard]] bool AdvanceToGlobalRangePosition(
+      ProfileBufferIndex aPosition) {
+    if (aPosition == 0) {
+      // Special position '0', just stay where we are.
+      // Success if this position is already valid.
+      return !IsNull();
+    }
+    for (;;) {
+      ProfileBufferIndex currentPosition = GlobalRangePosition();
+      if (currentPosition == 0) {
+        // Pointer is null.
+        return false;
+      }
+      if (aPosition <= currentPosition) {
+        // At or past the requested position, stay where we are.
+        return true;
+      }
+      if (aPosition < mChunk->RangeStart() + mChunk->OffsetPastLastBlock()) {
+        // Target position is in this chunk's written space, move to it.
+        for (;;) {
+          // Skip the current block.
+          mOffsetInChunk += ReadEntrySize();
+          if (mOffsetInChunk >= mChunk->OffsetPastLastBlock()) {
+            // Reached the end of the chunk, this can happen for the last
+            // block, let's just continue to the next chunk.
+            break;
+          }
+          if (aPosition <= mChunk->RangeStart() + mOffsetInChunk) {
+            // We're at or after the position, return at this block position.
+            return true;
+          }
+        }
+      }
+      // Position is after this chunk, try next chunk.
+      GoToNextChunk();
+      if (IsNull()) {
+        return false;
+      }
+      // Skip whatever block tail there is, we don't allow pointing in the
+      // middle of a block.
+      mOffsetInChunk = mChunk->OffsetFirstBlock();
+    }
+  }
+
+  [[nodiscard]] Byte ReadByte() {
+    MOZ_ASSERT(!IsNull());
+    MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+    Byte byte = mChunk->ByteAt(mOffsetInChunk);
+    if (MOZ_UNLIKELY(++mOffsetInChunk == mChunk->OffsetPastLastBlock())) {
+      Adjust();
+    }
+    return byte;
+  }
+
+  // Read and skip a ULEB128-encoded size.
+  // 0 means failure (0-byte entries are not allowed.)
+  // Note that this doesn't guarantee that there are actually that many bytes
+  // available to read! (EntryReader() below may gracefully fail.)
+  [[nodiscard]] Length ReadEntrySize() {
+    ULEB128Reader<Length> reader;
+    if (IsNull()) {
+      return 0;
+    }
+    for (;;) {
+      const bool isComplete = reader.FeedByteIsComplete(ReadByte());
+      if (MOZ_UNLIKELY(IsNull())) {
+        // End of chunks, so there's no actual entry after this anyway.
+        return 0;
+      }
+      if (MOZ_LIKELY(isComplete)) {
+        if (MOZ_UNLIKELY(reader.Value() > mChunk->BufferBytes())) {
+          // Don't allow entries larger than a chunk.
+          return 0;
+        }
+        return reader.Value();
+      }
+    }
+  }
+
+  InChunkPointer& operator+=(Length aLength) {
+    MOZ_ASSERT(!IsNull());
+    mOffsetInChunk += aLength;
+    Adjust();
+    return *this;
+  }
+
+  [[nodiscard]] ProfileBufferEntryReader EntryReader(Length aLength) {
+    if (IsNull() || aLength == 0) {
+      return ProfileBufferEntryReader();
+    }
+
+    MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+
+    // We should be pointing at the entry, past the entry size.
+    const ProfileBufferIndex entryIndex = GlobalRangePosition();
+    // Verify that there's enough space before for the size (starting at index
+    // 1 at least).
+    MOZ_ASSERT(entryIndex >= 1u + ULEB128Size(aLength));
+
+    const Length remaining = mChunk->OffsetPastLastBlock() - mOffsetInChunk;
+    Span<const Byte> mem0 = mChunk->BufferSpan();
+    mem0 = mem0.From(mOffsetInChunk);
+    if (aLength <= remaining) {
+      // Move to the end of this block, which could make this null if we have
+      // reached the end of all buffers.
+      *this += aLength;
+      return ProfileBufferEntryReader(
+          mem0.To(aLength),
+          // Block starts before the entry size.
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              entryIndex - ULEB128Size(aLength)),
+          // Block ends right after the entry (could be null for last entry).
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              GlobalRangePosition()));
+    }
+
+    // We need to go to the next chunk for the 2nd part of this block.
+    GoToNextChunk();
+    if (IsNull()) {
+      return ProfileBufferEntryReader();
+    }
+
+    Span<const Byte> mem1 = mChunk->BufferSpan();
+    const Length tail = aLength - remaining;
+    MOZ_ASSERT(tail <= mChunk->BufferBytes());
+    MOZ_ASSERT(tail == mChunk->OffsetFirstBlock());
+    // We are in the correct chunk, move the offset to the end of the block.
+    mOffsetInChunk = tail;
+    // And adjust as needed, which could make this null if we have reached the
+    // end of all buffers.
+    Adjust();
+    return ProfileBufferEntryReader(
+        mem0, mem1.To(tail),
+        // Block starts before the entry size.
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            entryIndex - ULEB128Size(aLength)),
+        // Block ends right after the entry (could be null for last entry).
+        ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            GlobalRangePosition()));
+  }
+
+  [[nodiscard]] bool IsNull() const { return !mChunk; }
+
+  [[nodiscard]] bool operator==(const InChunkPointer& aOther) const {
+    if (IsNull() || aOther.IsNull()) {
+      return IsNull() && aOther.IsNull();
+    }
+    return mChunk == aOther.mChunk && mOffsetInChunk == aOther.mOffsetInChunk;
+  }
+
+  [[nodiscard]] bool operator!=(const InChunkPointer& aOther) const {
+    return !(*this == aOther);
+  }
+
+  [[nodiscard]] Byte operator*() const {
+    MOZ_ASSERT(!IsNull());
+    MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+    return mChunk->ByteAt(mOffsetInChunk);
+  }
+
+  InChunkPointer& operator++() {
+    MOZ_ASSERT(!IsNull());
+    MOZ_ASSERT(mOffsetInChunk < mChunk->OffsetPastLastBlock());
+    if (MOZ_UNLIKELY(++mOffsetInChunk == mChunk->OffsetPastLastBlock())) {
+      mOffsetInChunk = 0;
+      GoToNextChunk();
+      Adjust();
+    }
+    return *this;
+  }
+
+ private:
+  void GoToNextChunk() {
+    MOZ_ASSERT(!IsNull());
+    const ProfileBufferIndex expectedNextRangeStart =
+        mChunk->RangeStart() + mChunk->BufferBytes();
+
+    mChunk = mChunk->GetNext();
+    if (!mChunk) {
+      // Reached the end of the current chunk group, try the next one (which
+      // may be null too, especially on the 2nd try).
+      mChunk = mNextChunkGroup;
+      mNextChunkGroup = nullptr;
+    }
+
+    if (mChunk && mChunk->RangeStart() == 0) {
+      // Reached a chunk without a valid (non-null) range start, assume there
+      // are only unused chunks from here on.
+      mChunk = nullptr;
+    }
+
+    MOZ_ASSERT(!mChunk || mChunk->RangeStart() == expectedNextRangeStart,
+               "We don't handle discontinuous buffers (yet)");
+    // Non-DEBUG fallback: Stop reading past discontinuities.
+    // (They should be rare, only happening on temporary OOMs.)
+    // TODO: Handle discontinuities (by skipping over incomplete blocks).
+    if (mChunk && mChunk->RangeStart() != expectedNextRangeStart) {
+      mChunk = nullptr;
+    }
+  }
+
+  // We want `InChunkPointer` to always point at a valid byte (or be null).
+  // After some operations, `mOffsetInChunk` may point past the end of the
+  // current `mChunk`, in which case we need to adjust our position to be inside
+  // the appropriate chunk. E.g., if we're 10 bytes after the end of the current
+  // chunk, we should end up at offset 10 in the next chunk.
+  // Note that we may "fall off" the last chunk and make this `InChunkPointer`
+  // effectively null.
+  void Adjust() {
+    while (mChunk && mOffsetInChunk >= mChunk->OffsetPastLastBlock()) {
+      // TODO: Try to adjust offset between chunks relative to mRangeStart
+      // differences. But we don't handle discontinuities yet.
+      if (mOffsetInChunk < mChunk->BufferBytes()) {
+        mOffsetInChunk -= mChunk->BufferBytes();
+      } else {
+        mOffsetInChunk -= mChunk->OffsetPastLastBlock();
+      }
+      GoToNextChunk();
+    }
+  }
+
+  // Check if the current position is likely to point at a valid block.
+  // (Size should be reasonable, and block should fully fit inside buffer.)
+  // MOZ_ASSERTs on failure, to catch incorrect uses of block indices (which
+  // should only point at valid blocks if still in range). Non-asserting build
+  // fallback should still be handled.
+  [[nodiscard]] bool ShouldPointAtValidBlock() const {
+    if (IsNull()) {
+      // Pointer is null, no blocks here.
+      MOZ_ASSERT(false, "ShouldPointAtValidBlock - null pointer");
+      return false;
+    }
+    // Use a copy, so we don't modify `*this`.
+    InChunkPointer pointer = *this;
+    // Try to read the entry size.
+    Length entrySize = pointer.ReadEntrySize();
+    if (entrySize == 0) {
+      // Entry size of zero means we read 0 or a way-too-big value.
+      MOZ_ASSERT(false, "ShouldPointAtValidBlock - invalid size");
+      return false;
+    }
+    // See if the last byte of the entry is still inside the buffer.
+    pointer += entrySize - 1;
+    MOZ_ASSERT(!IsNull(), "ShouldPointAtValidBlock - past end of buffer");
+    return !IsNull();
+  }
+
+  const ProfileBufferChunk* mChunk;
+  const ProfileBufferChunk* mNextChunkGroup;
+  Length mOffsetInChunk;
+};
+
+}  // namespace detail
+
+// Thread-safe buffer that can store blocks of different sizes during defined
+// sessions, using Chunks (from a ChunkManager) as storage.
+//
+// Each *block* contains an *entry* and the entry size:
+// [ entry_size | entry ] [ entry_size | entry ] ...
+//
+// *In-session* is a period of time during which `ProfileChunkedBuffer` allows
+// reading and writing.
+// *Out-of-session*, the `ProfileChunkedBuffer` object is still valid, but
+// contains no data, and gracefully denies accesses.
+//
+// To write an entry, the buffer reserves a block of sufficient size (to contain
+// user data of predetermined size), writes the entry size, and lets the caller
+// fill the entry contents using a ProfileBufferEntryWriter. E.g.:
+// ```
+// ProfileChunkedBuffer cb(...);
+// cb.ReserveAndPut([]() { return sizeof(123); },
+//                  [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+//                    if (aEW) { aEW->WriteObject(123); }
+//                  });
+// ```
+// Other `Put...` functions may be used as shortcuts for simple entries.
+// The objects given to the caller's callbacks should only be used inside the
+// callbacks and not stored elsewhere, because they keep their own references to
+// chunk memory and therefore should not live longer.
+// Different type of objects may be serialized into an entry, see
+// `ProfileBufferEntryWriter::Serializer` for more information.
+//
+// When reading data, the buffer iterates over blocks (it knows how to read the
+// entry size, and therefore move to the next block), and lets the caller read
+// the entry inside of each block. E.g.:
+// ```
+// cb.ReadEach([](ProfileBufferEntryReader& aER) {
+//   /* Use ProfileBufferEntryReader functions to read serialized objects. */
+//   int n = aER.ReadObject<int>();
+// });
+// ```
+// Different type of objects may be deserialized from an entry, see
+// `ProfileBufferEntryReader::Deserializer` for more information.
+//
+// Writers may retrieve the block index corresponding to an entry
+// (`ProfileBufferBlockIndex` is an opaque type preventing the user from easily
+// modifying it). That index may later be used with `ReadAt` to get back to the
+// entry in that particular block -- if it still exists.
+class ProfileChunkedBuffer {
+ public:
+  using Byte = ProfileBufferChunk::Byte;
+  using Length = ProfileBufferChunk::Length;
+
+  enum class ThreadSafety { WithoutMutex, WithMutex };
+
+  // Default constructor starts out-of-session (nothing to read or write).
+  explicit ProfileChunkedBuffer(ThreadSafety aThreadSafety)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {}
+
+  // Start in-session with external chunk manager.
+  ProfileChunkedBuffer(ThreadSafety aThreadSafety,
+                       ProfileBufferChunkManager& aChunkManager)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {
+    SetChunkManager(aChunkManager);
+  }
+
+  // Start in-session with owned chunk manager.
+  ProfileChunkedBuffer(ThreadSafety aThreadSafety,
+                       UniquePtr<ProfileBufferChunkManager>&& aChunkManager)
+      : mMutex(aThreadSafety != ThreadSafety::WithoutMutex) {
+    SetChunkManager(std::move(aChunkManager));
+  }
+
+  ~ProfileChunkedBuffer() {
+    // Do proper clean-up by resetting the chunk manager.
+    ResetChunkManager();
+  }
+
+  // This cannot change during the lifetime of this buffer, so there's no need
+  // to lock.
+  [[nodiscard]] bool IsThreadSafe() const { return mMutex.IsActivated(); }
+
+  [[nodiscard]] bool IsInSession() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return !!mChunkManager;
+  }
+
+  // Stop using the current chunk manager.
+  // If we own the current chunk manager, it will be destroyed.
+  // This will always clear currently-held chunks, if any.
+  void ResetChunkManager() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    Unused << ResetChunkManager(lock);
+  }
+
+  // Set the current chunk manager.
+  // The caller is responsible for keeping the chunk manager alive as along as
+  // it's used here (until the next (Re)SetChunkManager, or
+  // ~ProfileChunkedBuffer).
+  void SetChunkManager(ProfileBufferChunkManager& aChunkManager) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    Unused << ResetChunkManager(lock);
+    SetChunkManager(aChunkManager, lock);
+  }
+
+  // Set the current chunk manager, and keep ownership of it.
+  void SetChunkManager(UniquePtr<ProfileBufferChunkManager>&& aChunkManager) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    Unused << ResetChunkManager(lock);
+    mOwnedChunkManager = std::move(aChunkManager);
+    if (mOwnedChunkManager) {
+      SetChunkManager(*mOwnedChunkManager, lock);
+    }
+  }
+
+  // Stop using the current chunk manager, and return it if owned here.
+  [[nodiscard]] UniquePtr<ProfileBufferChunkManager> ExtractChunkManager() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return ResetChunkManager(lock);
+  }
+
+  // Clear the contents of this buffer, ready to receive new chunks.
+  // Note that memory is not freed: No chunks are destroyed, they are all
+  // receycled.
+  // Also the range doesn't reset, instead it continues at some point after the
+  // previous range. This may be useful if the caller may be keeping indexes
+  // into old chunks that have now been cleared, using these indexes will fail
+  // gracefully (instead of potentially pointing into new data).
+  void Clear() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return;
+    }
+
+    mRangeStart = mRangeEnd = mNextChunkRangeStart;
+    mPushedBlockCount = 0;
+    mClearedBlockCount = 0;
+    mFailedPutBytes = 0;
+
+    // Recycle all released chunks as "next" chunks. This will reduce the number
+    // of future allocations. Also, when using ProfileBufferChunkManagerSingle,
+    // this retrieves the one chunk if it was released.
+    UniquePtr<ProfileBufferChunk> releasedChunks =
+        mChunkManager->GetExtantReleasedChunks();
+    if (releasedChunks) {
+      // Released chunks should be in the "Done" state, they need to be marked
+      // "recycled" before they can be reused.
+      for (ProfileBufferChunk* chunk = releasedChunks.get(); chunk;
+           chunk = chunk->GetNext()) {
+        chunk->MarkRecycled();
+      }
+      mNextChunks = ProfileBufferChunk::Join(std::move(mNextChunks),
+                                             std::move(releasedChunks));
+    }
+
+    if (mCurrentChunk) {
+      // We already have a current chunk (empty or in-use), mark it "done" and
+      // then "recycled", ready to be reused.
+      mCurrentChunk->MarkDone();
+      mCurrentChunk->MarkRecycled();
+    } else {
+      if (!mNextChunks) {
+        // No current chunk, and no next chunks to recycle, nothing more to do.
+        // The next "Put" operation will try to allocate a chunk as needed.
+        return;
+      }
+
+      // No current chunk, take a next chunk.
+      mCurrentChunk = std::exchange(mNextChunks, mNextChunks->ReleaseNext());
+    }
+
+    // Here, there was already a current chunk, or one has just been taken.
+    // Make sure it's ready to receive new entries.
+    InitializeCurrentChunk(lock);
+  }
+
+  // Buffer maximum length in bytes.
+  Maybe<size_t> BufferLength() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (!mChunkManager) {
+      return Nothing{};
+    }
+    return Some(mChunkManager->MaxTotalSize());
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return SizeOfExcludingThis(aMallocSizeOf, lock);
+  }
+
+  [[nodiscard]] size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf, lock);
+  }
+
+  // Snapshot of the buffer state.
+  struct State {
+    // Index to/before the first block.
+    ProfileBufferIndex mRangeStart = 1;
+
+    // Index past the last block. Equals mRangeStart if empty.
+    ProfileBufferIndex mRangeEnd = 1;
+
+    // Number of blocks that have been pushed into this buffer.
+    uint64_t mPushedBlockCount = 0;
+
+    // Number of blocks that have been removed from this buffer.
+    // Note: Live entries = pushed - cleared.
+    uint64_t mClearedBlockCount = 0;
+
+    // Number of bytes that could not be put into this buffer.
+    uint64_t mFailedPutBytes = 0;
+  };
+
+  // Get a snapshot of the current state.
+  // When out-of-session, mFirstReadIndex==mNextWriteIndex, and
+  // mPushedBlockCount==mClearedBlockCount==0.
+  // Note that these may change right after this thread-safe call, so they
+  // should only be used for statistical purposes.
+  [[nodiscard]] State GetState() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return {mRangeStart, mRangeEnd, mPushedBlockCount, mClearedBlockCount,
+            mFailedPutBytes};
+  }
+
+  [[nodiscard]] bool IsEmpty() const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return mRangeStart == mRangeEnd;
+  }
+
+  // True if this buffer is already locked on this thread.
+  // This should be used if some functions may call an already-locked buffer,
+  // e.g.: Put -> memory hook -> profiler_add_native_allocation_marker -> Put.
+  [[nodiscard]] bool IsThreadSafeAndLockedOnCurrentThread() const {
+    return mMutex.IsActivatedAndLockedOnCurrentThread();
+  }
+
+  // Lock the buffer mutex and run the provided callback.
+  // This can be useful when the caller needs to explicitly lock down this
+  // buffer, but not do anything else with it.
+  template <typename Callback>
+  auto LockAndRun(Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return std::forward<Callback>(aCallback)();
+  }
+
+  // Reserve a block that can hold an entry of the given `aCallbackEntryBytes()`
+  // size, write the entry size (ULEB128-encoded), and invoke and return
+  // `aCallback(Maybe<ProfileBufferEntryWriter>&)`.
+  // Note: `aCallbackEntryBytes` is a callback instead of a simple value, to
+  // delay this potentially-expensive computation until after we're checked that
+  // we're in-session; use `Put(Length, Callback)` below if you know the size
+  // already.
+  template <typename CallbackEntryBytes, typename Callback>
+  auto ReserveAndPut(CallbackEntryBytes&& aCallbackEntryBytes,
+                     Callback&& aCallback)
+      -> decltype(std::forward<Callback>(aCallback)(
+          std::declval<Maybe<ProfileBufferEntryWriter>&>())) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+
+    // This can only be read in the 2nd lambda below after it has been written
+    // by the first lambda.
+    Length entryBytes;
+
+    return ReserveAndPutRaw(
+        [&]() {
+          entryBytes = std::forward<CallbackEntryBytes>(aCallbackEntryBytes)();
+          MOZ_ASSERT(entryBytes != 0, "Empty entries are not allowed");
+          return ULEB128Size(entryBytes) + entryBytes;
+        },
+        [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+          if (aMaybeEntryWriter.isSome()) {
+            aMaybeEntryWriter->WriteULEB128(entryBytes);
+            MOZ_ASSERT(aMaybeEntryWriter->RemainingBytes() == entryBytes);
+          }
+          return std::forward<Callback>(aCallback)(aMaybeEntryWriter);
+        },
+        lock);
+  }
+
+  template <typename Callback>
+  auto Put(Length aEntryBytes, Callback&& aCallback) {
+    return ReserveAndPut([aEntryBytes]() { return aEntryBytes; },
+                         std::forward<Callback>(aCallback));
+  }
+
+  // Add a new entry copied from the given buffer, return block index.
+  ProfileBufferBlockIndex PutFrom(const void* aSrc, Length aBytes) {
+    return ReserveAndPut(
+        [aBytes]() { return aBytes; },
+        [aSrc, aBytes](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+          if (aMaybeEntryWriter.isNothing()) {
+            return ProfileBufferBlockIndex{};
+          }
+          aMaybeEntryWriter->WriteBytes(aSrc, aBytes);
+          return aMaybeEntryWriter->CurrentBlockIndex();
+        });
+  }
+
+  // Add a new single entry with *all* given object (using a Serializer for
+  // each), return block index.
+  template <typename... Ts>
+  ProfileBufferBlockIndex PutObjects(const Ts&... aTs) {
+    static_assert(sizeof...(Ts) > 0,
+                  "PutObjects must be given at least one object.");
+    return ReserveAndPut(
+        [&]() { return ProfileBufferEntryWriter::SumBytes(aTs...); },
+        [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+          if (aMaybeEntryWriter.isNothing()) {
+            return ProfileBufferBlockIndex{};
+          }
+          aMaybeEntryWriter->WriteObjects(aTs...);
+          return aMaybeEntryWriter->CurrentBlockIndex();
+        });
+  }
+
+  // Add a new entry copied from the given object, return block index.
+  template <typename T>
+  ProfileBufferBlockIndex PutObject(const T& aOb) {
+    return PutObjects(aOb);
+  }
+
+  // Get *all* chunks related to this buffer, including extant chunks in its
+  // ChunkManager, and yet-unused new/recycled chunks.
+  // We don't expect this buffer to be used again, though it's still possible
+  // and will allocate the first buffer when needed.
+  [[nodiscard]] UniquePtr<ProfileBufferChunk> GetAllChunks() {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return nullptr;
+    }
+    UniquePtr<ProfileBufferChunk> chunks =
+        mChunkManager->GetExtantReleasedChunks();
+    Unused << HandleRequestedChunk_IsPending(lock);
+    if (MOZ_LIKELY(!!mCurrentChunk)) {
+      mCurrentChunk->MarkDone();
+      chunks =
+          ProfileBufferChunk::Join(std::move(chunks), std::move(mCurrentChunk));
+    }
+    chunks =
+        ProfileBufferChunk::Join(std::move(chunks), std::move(mNextChunks));
+    mChunkManager->ForgetUnreleasedChunks();
+    mRangeStart = mRangeEnd = mNextChunkRangeStart;
+    return chunks;
+  }
+
+  class Reader;
+
+  // Class that can iterate through blocks and provide
+  // `ProfileBufferEntryReader`s.
+  // Created through `Reader`, lives within a lock guard lifetime.
+  class BlockIterator {
+   public:
+#ifdef DEBUG
+    ~BlockIterator() {
+      // No BlockIterator should live outside of a mutexed call.
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+
+    // Comparison with other iterator, mostly used in range-for loops.
+    [[nodiscard]] bool operator==(const BlockIterator& aRhs) const {
+      MOZ_ASSERT(mBuffer == aRhs.mBuffer);
+      return mCurrentBlockIndex == aRhs.mCurrentBlockIndex;
+    }
+    [[nodiscard]] bool operator!=(const BlockIterator& aRhs) const {
+      MOZ_ASSERT(mBuffer == aRhs.mBuffer);
+      return mCurrentBlockIndex != aRhs.mCurrentBlockIndex;
+    }
+
+    // Advance to next BlockIterator.
+    BlockIterator& operator++() {
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+      mCurrentBlockIndex =
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              mNextBlockPointer.GlobalRangePosition());
+      mCurrentEntry =
+          mNextBlockPointer.EntryReader(mNextBlockPointer.ReadEntrySize());
+      return *this;
+    }
+
+    // Dereferencing creates a `ProfileBufferEntryReader` object for the entry
+    // inside this block.
+    // (Note: It would be possible to return a `const
+    // ProfileBufferEntryReader&`, but not useful in practice, because in most
+    // case the user will want to read, which is non-const.)
+    [[nodiscard]] ProfileBufferEntryReader operator*() const {
+      return mCurrentEntry;
+    }
+
+    // True if this iterator is just past the last entry.
+    [[nodiscard]] bool IsAtEnd() const {
+      return mCurrentEntry.RemainingBytes() == 0;
+    }
+
+    // Can be used as reference to come back to this entry with `GetEntryAt()`.
+    [[nodiscard]] ProfileBufferBlockIndex CurrentBlockIndex() const {
+      return mCurrentBlockIndex;
+    }
+
+    // Index past the end of this block, which is the start of the next block.
+    [[nodiscard]] ProfileBufferBlockIndex NextBlockIndex() const {
+      MOZ_ASSERT(!IsAtEnd());
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mNextBlockPointer.GlobalRangePosition());
+    }
+
+    // Index of the first block in the whole buffer.
+    [[nodiscard]] ProfileBufferBlockIndex BufferRangeStart() const {
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBuffer->mRangeStart);
+    }
+
+    // Index past the last block in the whole buffer.
+    [[nodiscard]] ProfileBufferBlockIndex BufferRangeEnd() const {
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBuffer->mRangeEnd);
+    }
+
+   private:
+    // Only a Reader can instantiate a BlockIterator.
+    friend class Reader;
+
+    BlockIterator(const ProfileChunkedBuffer& aBuffer,
+                  const ProfileBufferChunk* aChunks0,
+                  const ProfileBufferChunk* aChunks1,
+                  ProfileBufferBlockIndex aBlockIndex)
+        : mNextBlockPointer(aChunks0, aChunks1, aBlockIndex),
+          mCurrentBlockIndex(
+              ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                  mNextBlockPointer.GlobalRangePosition())),
+          mCurrentEntry(
+              mNextBlockPointer.EntryReader(mNextBlockPointer.ReadEntrySize())),
+          mBuffer(WrapNotNull(&aBuffer)) {
+      // No BlockIterator should live outside of a mutexed call.
+      mBuffer->mMutex.AssertCurrentThreadOwns();
+    }
+
+    detail::InChunkPointer mNextBlockPointer;
+
+    ProfileBufferBlockIndex mCurrentBlockIndex;
+
+    ProfileBufferEntryReader mCurrentEntry;
+
+    // Using a non-null pointer instead of a reference, to allow copying.
+    // This BlockIterator should only live inside one of the thread-safe
+    // ProfileChunkedBuffer functions, for this reference to stay valid.
+    NotNull<const ProfileChunkedBuffer*> mBuffer;
+  };
+
+  // Class that can create `BlockIterator`s (e.g., for range-for), or just
+  // iterate through entries; lives within a lock guard lifetime.
+  class MOZ_RAII Reader {
+   public:
+    Reader(const Reader&) = delete;
+    Reader& operator=(const Reader&) = delete;
+    Reader(Reader&&) = delete;
+    Reader& operator=(Reader&&) = delete;
+
+#ifdef DEBUG
+    ~Reader() {
+      // No Reader should live outside of a mutexed call.
+      mBuffer.mMutex.AssertCurrentThreadOwns();
+    }
+#endif  // DEBUG
+
+    // Index of the first block in the whole buffer.
+    [[nodiscard]] ProfileBufferBlockIndex BufferRangeStart() const {
+      mBuffer.mMutex.AssertCurrentThreadOwns();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBuffer.mRangeStart);
+    }
+
+    // Index past the last block in the whole buffer.
+    [[nodiscard]] ProfileBufferBlockIndex BufferRangeEnd() const {
+      mBuffer.mMutex.AssertCurrentThreadOwns();
+      return ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+          mBuffer.mRangeEnd);
+    }
+
+    // Iterators to the first and past-the-last blocks.
+    // Compatible with range-for (see `ForEach` below as example).
+    [[nodiscard]] BlockIterator begin() const {
+      return BlockIterator(mBuffer, mChunks0, mChunks1, nullptr);
+    }
+    // Note that a `BlockIterator` at the `end()` should not be dereferenced, as
+    // there is no actual block there!
+    [[nodiscard]] BlockIterator end() const {
+      return BlockIterator(mBuffer, nullptr, nullptr, nullptr);
+    }
+
+    // Get a `BlockIterator` at the given `ProfileBufferBlockIndex`, clamped to
+    // the stored range. Note that a `BlockIterator` at the `end()` should not
+    // be dereferenced, as there is no actual block there!
+    [[nodiscard]] BlockIterator At(ProfileBufferBlockIndex aBlockIndex) const {
+      if (aBlockIndex < BufferRangeStart()) {
+        // Anything before the range (including null ProfileBufferBlockIndex) is
+        // clamped at the beginning.
+        return begin();
+      }
+      // Otherwise we at least expect the index to be valid (pointing exactly at
+      // a live block, or just past the end.)
+      return BlockIterator(mBuffer, mChunks0, mChunks1, aBlockIndex);
+    }
+
+    // Run `aCallback(ProfileBufferEntryReader&)` on each entry from first to
+    // last. Callback should not store `ProfileBufferEntryReader`, as it may
+    // become invalid after this thread-safe call.
+    template <typename Callback>
+    void ForEach(Callback&& aCallback) const {
+      for (ProfileBufferEntryReader reader : *this) {
+        aCallback(reader);
+      }
+    }
+
+    // If this reader only points at one chunk with some data, this data will be
+    // exposed as a single entry.
+    [[nodiscard]] ProfileBufferEntryReader SingleChunkDataAsEntry() {
+      const ProfileBufferChunk* onlyNonEmptyChunk = nullptr;
+      for (const ProfileBufferChunk* chunkList : {mChunks0, mChunks1}) {
+        for (const ProfileBufferChunk* chunk = chunkList; chunk;
+             chunk = chunk->GetNext()) {
+          if (chunk->OffsetFirstBlock() != chunk->OffsetPastLastBlock()) {
+            if (onlyNonEmptyChunk) {
+              // More than one non-empty chunk.
+              return ProfileBufferEntryReader();
+            }
+            onlyNonEmptyChunk = chunk;
+          }
+        }
+      }
+      if (!onlyNonEmptyChunk) {
+        // No non-empty chunks.
+        return ProfileBufferEntryReader();
+      }
+      // Here, we have found one chunk that had some data.
+      return ProfileBufferEntryReader(
+          onlyNonEmptyChunk->BufferSpan().FromTo(
+              onlyNonEmptyChunk->OffsetFirstBlock(),
+              onlyNonEmptyChunk->OffsetPastLastBlock()),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              onlyNonEmptyChunk->RangeStart()),
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              onlyNonEmptyChunk->RangeStart() +
+              (onlyNonEmptyChunk->OffsetPastLastBlock() -
+               onlyNonEmptyChunk->OffsetFirstBlock())));
+    }
+
+   private:
+    friend class ProfileChunkedBuffer;
+
+    explicit Reader(const ProfileChunkedBuffer& aBuffer,
+                    const ProfileBufferChunk* aChunks0,
+                    const ProfileBufferChunk* aChunks1)
+        : mBuffer(aBuffer), mChunks0(aChunks0), mChunks1(aChunks1) {
+      // No Reader should live outside of a mutexed call.
+      mBuffer.mMutex.AssertCurrentThreadOwns();
+    }
+
+    // This Reader should only live inside one of the thread-safe
+    // ProfileChunkedBuffer functions, for this reference to stay valid.
+    const ProfileChunkedBuffer& mBuffer;
+    const ProfileBufferChunk* mChunks0;
+    const ProfileBufferChunk* mChunks1;
+  };
+
+  // In in-session, call `aCallback(ProfileChunkedBuffer::Reader&)` and return
+  // true. Callback should not store `Reader`, because it may become invalid
+  // after this call.
+  // If out-of-session, return false (callback is not invoked).
+  template <typename Callback>
+  [[nodiscard]] auto Read(Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return std::forward<Callback>(aCallback)(static_cast<Reader*>(nullptr));
+    }
+    return mChunkManager->PeekExtantReleasedChunks(
+        [&](const ProfileBufferChunk* aOldestChunk) {
+          Reader reader(*this, aOldestChunk, mCurrentChunk.get());
+          return std::forward<Callback>(aCallback)(&reader);
+        });
+  }
+
+  // Invoke `aCallback(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])`
+  // on each entry, it must read or at least skip everything. Either/both chunk
+  // pointers may be null.
+  template <typename Callback>
+  static void ReadEach(const ProfileBufferChunk* aChunks0,
+                       const ProfileBufferChunk* aChunks1,
+                       Callback&& aCallback) {
+    static_assert(std::is_invocable_v<Callback, ProfileBufferEntryReader&> ||
+                      std::is_invocable_v<Callback, ProfileBufferEntryReader&,
+                                          ProfileBufferBlockIndex>,
+                  "ReadEach callback must take ProfileBufferEntryReader& and "
+                  "optionally a ProfileBufferBlockIndex");
+    detail::InChunkPointer p{aChunks0, aChunks1};
+    while (!p.IsNull()) {
+      // The position right before an entry size *is* a block index.
+      const ProfileBufferBlockIndex blockIndex =
+          ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+              p.GlobalRangePosition());
+      Length entrySize = p.ReadEntrySize();
+      if (entrySize == 0) {
+        return;
+      }
+      ProfileBufferEntryReader entryReader = p.EntryReader(entrySize);
+      if (entryReader.RemainingBytes() == 0) {
+        return;
+      }
+      MOZ_ASSERT(entryReader.RemainingBytes() == entrySize);
+      if constexpr (std::is_invocable_v<Callback, ProfileBufferEntryReader&,
+                                        ProfileBufferBlockIndex>) {
+        aCallback(entryReader, blockIndex);
+      } else {
+        Unused << blockIndex;
+        aCallback(entryReader);
+      }
+      MOZ_ASSERT(entryReader.RemainingBytes() == 0);
+    }
+  }
+
+  // Invoke `aCallback(ProfileBufferEntryReader& [, ProfileBufferBlockIndex])`
+  // on each entry, it must read or at least skip everything.
+  template <typename Callback>
+  void ReadEach(Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return;
+    }
+    mChunkManager->PeekExtantReleasedChunks(
+        [&](const ProfileBufferChunk* aOldestChunk) {
+          ReadEach(aOldestChunk, mCurrentChunk.get(),
+                   std::forward<Callback>(aCallback));
+        });
+  }
+
+  // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at
+  // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if
+  // out-of-session, or if that entry doesn't exist anymore, or if we've reached
+  // just past the last entry. Return whatever `aCallback` returns. Callback
+  // should not store `ProfileBufferEntryReader`, because it may become invalid
+  // after this call.
+  // Either/both chunk pointers may be null.
+  template <typename Callback>
+  [[nodiscard]] static auto ReadAt(ProfileBufferBlockIndex aMinimumBlockIndex,
+                                   const ProfileBufferChunk* aChunks0,
+                                   const ProfileBufferChunk* aChunks1,
+                                   Callback&& aCallback) {
+    static_assert(
+        std::is_invocable_v<Callback, Maybe<ProfileBufferEntryReader>&&>,
+        "ReadAt callback must take a Maybe<ProfileBufferEntryReader>&&");
+    Maybe<ProfileBufferEntryReader> maybeEntryReader;
+    if (detail::InChunkPointer p{aChunks0, aChunks1}; !p.IsNull()) {
+      // If the pointer position is before the given position, try to advance.
+      if (p.GlobalRangePosition() >=
+              aMinimumBlockIndex.ConvertToProfileBufferIndex() ||
+          p.AdvanceToGlobalRangePosition(
+              aMinimumBlockIndex.ConvertToProfileBufferIndex())) {
+        MOZ_ASSERT(p.GlobalRangePosition() >=
+                   aMinimumBlockIndex.ConvertToProfileBufferIndex());
+        // Here we're pointing at the start of a block, try to read the entry
+        // size. (Entries cannot be empty, so 0 means failure.)
+        if (Length entrySize = p.ReadEntrySize(); entrySize != 0) {
+          maybeEntryReader.emplace(p.EntryReader(entrySize));
+          if (maybeEntryReader->RemainingBytes() == 0) {
+            // An empty entry reader means there was no complete block at the
+            // given index.
+            maybeEntryReader.reset();
+          } else {
+            MOZ_ASSERT(maybeEntryReader->RemainingBytes() == entrySize);
+          }
+        }
+      }
+    }
+#ifdef DEBUG
+    auto assertAllRead = MakeScopeExit([&]() {
+      MOZ_ASSERT(!maybeEntryReader || maybeEntryReader->RemainingBytes() == 0);
+    });
+#endif  // DEBUG
+    return std::forward<Callback>(aCallback)(std::move(maybeEntryReader));
+  }
+
+  // Call `aCallback(Maybe<ProfileBufferEntryReader>&&)` on the entry at
+  // the given ProfileBufferBlockIndex; The `Maybe` will be `Nothing` if
+  // out-of-session, or if that entry doesn't exist anymore, or if we've reached
+  // just past the last entry. Return whatever `aCallback` returns. Callback
+  // should not store `ProfileBufferEntryReader`, because it may become invalid
+  // after this call.
+  template <typename Callback>
+  [[nodiscard]] auto ReadAt(ProfileBufferBlockIndex aBlockIndex,
+                            Callback&& aCallback) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return std::forward<Callback>(aCallback)(Nothing{});
+    }
+    return mChunkManager->PeekExtantReleasedChunks(
+        [&](const ProfileBufferChunk* aOldestChunk) {
+          return ReadAt(aBlockIndex, aOldestChunk, mCurrentChunk.get(),
+                        std::forward<Callback>(aCallback));
+        });
+  }
+
+  // Append the contents of another ProfileChunkedBuffer to this one.
+  ProfileBufferBlockIndex AppendContents(const ProfileChunkedBuffer& aSrc) {
+    ProfileBufferBlockIndex firstBlockIndex;
+    // If we start failing, we'll stop writing.
+    bool failed = false;
+    aSrc.ReadEach([&](ProfileBufferEntryReader& aER) {
+      if (failed) {
+        return;
+      }
+      failed =
+          !Put(aER.RemainingBytes(), [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+            if (aEW.isNothing()) {
+              return false;
+            }
+            if (!firstBlockIndex) {
+              firstBlockIndex = aEW->CurrentBlockIndex();
+            }
+            aEW->WriteFromReader(aER, aER.RemainingBytes());
+            return true;
+          });
+    });
+    return failed ? nullptr : firstBlockIndex;
+  }
+
+#ifdef DEBUG
+  void Dump(std::FILE* aFile = stdout) const {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    fprintf(aFile,
+            "ProfileChunkedBuffer[%p] State: range %u-%u pushed=%u cleared=%u "
+            "(live=%u) failed-puts=%u bytes",
+            this, unsigned(mRangeStart), unsigned(mRangeEnd),
+            unsigned(mPushedBlockCount), unsigned(mClearedBlockCount),
+            unsigned(mPushedBlockCount) - unsigned(mClearedBlockCount),
+            unsigned(mFailedPutBytes));
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      fprintf(aFile, " - Out-of-session\n");
+      return;
+    }
+    fprintf(aFile, " - chunks:\n");
+    bool hasChunks = false;
+    mChunkManager->PeekExtantReleasedChunks(
+        [&](const ProfileBufferChunk* aOldestChunk) {
+          for (const ProfileBufferChunk* chunk = aOldestChunk; chunk;
+               chunk = chunk->GetNext()) {
+            fprintf(aFile, "R ");
+            chunk->Dump(aFile);
+            hasChunks = true;
+          }
+        });
+    if (mCurrentChunk) {
+      fprintf(aFile, "C ");
+      mCurrentChunk->Dump(aFile);
+      hasChunks = true;
+    }
+    for (const ProfileBufferChunk* chunk = mNextChunks.get(); chunk;
+         chunk = chunk->GetNext()) {
+      fprintf(aFile, "N ");
+      chunk->Dump(aFile);
+      hasChunks = true;
+    }
+    switch (mRequestedChunkHolder->GetState()) {
+      case RequestedChunkRefCountedHolder::State::Unused:
+        fprintf(aFile, " - No request pending.\n");
+        break;
+      case RequestedChunkRefCountedHolder::State::Requested:
+        fprintf(aFile, " - Request pending.\n");
+        break;
+      case RequestedChunkRefCountedHolder::State::Fulfilled:
+        fprintf(aFile, " - Request fulfilled.\n");
+        break;
+    }
+    if (!hasChunks) {
+      fprintf(aFile, " No chunks.\n");
+    }
+  }
+#endif  // DEBUG
+
+ private:
+  // Used to de/serialize a ProfileChunkedBuffer (e.g., containing a backtrace).
+  friend ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer>;
+  friend ProfileBufferEntryReader::Deserializer<ProfileChunkedBuffer>;
+  friend ProfileBufferEntryWriter::Serializer<UniquePtr<ProfileChunkedBuffer>>;
+  friend ProfileBufferEntryReader::Deserializer<
+      UniquePtr<ProfileChunkedBuffer>>;
+
+  [[nodiscard]] UniquePtr<ProfileBufferChunkManager> ResetChunkManager(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock&) {
+    UniquePtr<ProfileBufferChunkManager> chunkManager;
+    if (mChunkManager) {
+      mRequestedChunkHolder = nullptr;
+      mChunkManager->ForgetUnreleasedChunks();
+#ifdef DEBUG
+      mChunkManager->DeregisteredFrom(this);
+#endif
+      mChunkManager = nullptr;
+      chunkManager = std::move(mOwnedChunkManager);
+      if (mCurrentChunk) {
+        mCurrentChunk->MarkDone();
+        mCurrentChunk = nullptr;
+      }
+      mNextChunks = nullptr;
+      mNextChunkRangeStart = mRangeEnd;
+      mRangeStart = mRangeEnd;
+      mPushedBlockCount = 0;
+      mClearedBlockCount = 0;
+      mFailedPutBytes = 0;
+    }
+    return chunkManager;
+  }
+
+  void SetChunkManager(
+      ProfileBufferChunkManager& aChunkManager,
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    MOZ_ASSERT(!mChunkManager);
+    mChunkManager = &aChunkManager;
+#ifdef DEBUG
+    mChunkManager->RegisteredWith(this);
+#endif
+
+    mChunkManager->SetChunkDestroyedCallback(
+        [this](const ProfileBufferChunk& aChunk) {
+          for (;;) {
+            ProfileBufferIndex rangeStart = mRangeStart;
+            if (MOZ_LIKELY(rangeStart <= aChunk.RangeStart())) {
+              if (MOZ_LIKELY(mRangeStart.compareExchange(
+                      rangeStart,
+                      aChunk.RangeStart() + aChunk.BufferBytes()))) {
+                break;
+              }
+            }
+          }
+          mClearedBlockCount += aChunk.BlockCount();
+        });
+
+    // We start with one chunk right away, and request a following one now
+    // so it should be available before the current chunk is full.
+    SetAndInitializeCurrentChunk(mChunkManager->GetChunk(), aLock);
+    mRequestedChunkHolder = MakeRefPtr<RequestedChunkRefCountedHolder>();
+    RequestChunk(aLock);
+  }
+
+  [[nodiscard]] size_t SizeOfExcludingThis(
+      MallocSizeOf aMallocSizeOf,
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock&) const {
+    if (MOZ_UNLIKELY(!mChunkManager)) {
+      // Out-of-session.
+      return 0;
+    }
+    size_t size = mChunkManager->SizeOfIncludingThis(aMallocSizeOf);
+    if (mCurrentChunk) {
+      size += mCurrentChunk->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    if (mNextChunks) {
+      size += mNextChunks->SizeOfIncludingThis(aMallocSizeOf);
+    }
+    return size;
+  }
+
+  void InitializeCurrentChunk(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock&) {
+    MOZ_ASSERT(!!mCurrentChunk);
+    mCurrentChunk->SetRangeStart(mNextChunkRangeStart);
+    mNextChunkRangeStart += mCurrentChunk->BufferBytes();
+    Unused << mCurrentChunk->ReserveInitialBlockAsTail(0);
+  }
+
+  void SetAndInitializeCurrentChunk(
+      UniquePtr<ProfileBufferChunk>&& aChunk,
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    mCurrentChunk = std::move(aChunk);
+    if (mCurrentChunk) {
+      InitializeCurrentChunk(aLock);
+    }
+  }
+
+  void RequestChunk(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    if (HandleRequestedChunk_IsPending(aLock)) {
+      // There is already a pending request, don't start a new one.
+      return;
+    }
+
+    // Ensure the `RequestedChunkHolder` knows we're starting a request.
+    mRequestedChunkHolder->StartRequest();
+
+    // Request a chunk, the callback carries a `RefPtr` of the
+    // `RequestedChunkHolder`, so it's guaranteed to live until it's invoked,
+    // even if this `ProfileChunkedBuffer` changes its `ChunkManager` or is
+    // destroyed.
+    mChunkManager->RequestChunk(
+        [requestedChunkHolder = RefPtr<RequestedChunkRefCountedHolder>(
+             mRequestedChunkHolder)](UniquePtr<ProfileBufferChunk> aChunk) {
+          requestedChunkHolder->AddRequestedChunk(std::move(aChunk));
+        });
+  }
+
+  [[nodiscard]] bool HandleRequestedChunk_IsPending(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    MOZ_ASSERT(!!mChunkManager);
+    MOZ_ASSERT(!!mRequestedChunkHolder);
+
+    if (mRequestedChunkHolder->GetState() ==
+        RequestedChunkRefCountedHolder::State::Unused) {
+      return false;
+    }
+
+    // A request is either in-flight or fulfilled.
+    Maybe<UniquePtr<ProfileBufferChunk>> maybeChunk =
+        mRequestedChunkHolder->GetChunkIfFulfilled();
+    if (maybeChunk.isNothing()) {
+      // Request is still pending.
+      return true;
+    }
+
+    // Since we extracted the provided chunk, the holder should now be unused.
+    MOZ_ASSERT(mRequestedChunkHolder->GetState() ==
+               RequestedChunkRefCountedHolder::State::Unused);
+
+    // Request has been fulfilled.
+    UniquePtr<ProfileBufferChunk>& chunk = *maybeChunk;
+    if (chunk) {
+      // Try to use as current chunk if needed.
+      if (!mCurrentChunk) {
+        SetAndInitializeCurrentChunk(std::move(chunk), aLock);
+        // We've just received a chunk and made it current, request a next chunk
+        // for later.
+        MOZ_ASSERT(!mNextChunks);
+        RequestChunk(aLock);
+        return true;
+      }
+
+      if (!mNextChunks) {
+        mNextChunks = std::move(chunk);
+      } else {
+        mNextChunks->InsertNext(std::move(chunk));
+      }
+    }
+
+    return false;
+  }
+
+  // Get a pointer to the next chunk available
+  [[nodiscard]] ProfileBufferChunk* GetOrCreateCurrentChunk(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    ProfileBufferChunk* current = mCurrentChunk.get();
+    if (MOZ_UNLIKELY(!current)) {
+      // No current chunk ready.
+      MOZ_ASSERT(!mNextChunks,
+                 "There shouldn't be next chunks when there is no current one");
+      // See if a request has recently been fulfilled, ignore pending status.
+      Unused << HandleRequestedChunk_IsPending(aLock);
+      current = mCurrentChunk.get();
+      if (MOZ_UNLIKELY(!current)) {
+        // There was no pending chunk, try to get one right now.
+        // This may still fail, but we can't do anything else about it, the
+        // caller must handle the nullptr case.
+        // Attempt a request for later.
+        SetAndInitializeCurrentChunk(mChunkManager->GetChunk(), aLock);
+        current = mCurrentChunk.get();
+      }
+    }
+    return current;
+  }
+
+  // Get a pointer to the next chunk available
+  [[nodiscard]] ProfileBufferChunk* GetOrCreateNextChunk(
+      const baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock) {
+    MOZ_ASSERT(!!mCurrentChunk,
+               "Why ask for a next chunk when there isn't even a current one?");
+    ProfileBufferChunk* next = mNextChunks.get();
+    if (MOZ_UNLIKELY(!next)) {
+      // No next chunk ready, see if a request has recently been fulfilled,
+      // ignore pending status.
+      Unused << HandleRequestedChunk_IsPending(aLock);
+      next = mNextChunks.get();
+      if (MOZ_UNLIKELY(!next)) {
+        // There was no pending chunk, try to get one right now.
+        mNextChunks = mChunkManager->GetChunk();
+        next = mNextChunks.get();
+        // This may still fail, but we can't do anything else about it, the
+        // caller must handle the nullptr case.
+        if (MOZ_UNLIKELY(!next)) {
+          // Attempt a request for later.
+          RequestChunk(aLock);
+        }
+      }
+    }
+    return next;
+  }
+
+  // Reserve a block of `aCallbackBlockBytes()` size, and invoke and return
+  // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. Note that this is the "raw"
+  // version that doesn't write the entry size at the beginning of the block.
+  // Note: `aCallbackBlockBytes` is a callback instead of a simple value, to
+  // delay this potentially-expensive computation until after we're checked that
+  // we're in-session; use `Put(Length, Callback)` below if you know the size
+  // already.
+  template <typename CallbackBlockBytes, typename Callback>
+  auto ReserveAndPutRaw(CallbackBlockBytes&& aCallbackBlockBytes,
+                        Callback&& aCallback,
+                        baseprofiler::detail::BaseProfilerMaybeAutoLock& aLock,
+                        uint64_t aBlockCount = 1) {
+    // The entry writer that will point into one or two chunks to write
+    // into, empty by default (failure).
+    Maybe<ProfileBufferEntryWriter> maybeEntryWriter;
+
+    // The current chunk will be filled if we need to write more than its
+    // remaining space.
+    bool currentChunkFilled = false;
+
+    // If the current chunk gets filled, we may or may not initialize the next
+    // chunk!
+    bool nextChunkInitialized = false;
+
+    if (MOZ_LIKELY(mChunkManager)) {
+      // In-session.
+
+      const Length blockBytes =
+          std::forward<CallbackBlockBytes>(aCallbackBlockBytes)();
+
+      if (ProfileBufferChunk* current = GetOrCreateCurrentChunk(aLock);
+          MOZ_LIKELY(current)) {
+        if (blockBytes <= current->RemainingBytes()) {
+          // Block fits in current chunk with only one span.
+          currentChunkFilled = blockBytes == current->RemainingBytes();
+          const auto [mem0, blockIndex] = current->ReserveBlock(blockBytes);
+          MOZ_ASSERT(mem0.LengthBytes() == blockBytes);
+          maybeEntryWriter.emplace(
+              mem0, blockIndex,
+              ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                  blockIndex.ConvertToProfileBufferIndex() + blockBytes));
+          MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == blockBytes);
+          mRangeEnd += blockBytes;
+          mPushedBlockCount += aBlockCount;
+        } else {
+          // Block doesn't fit fully in current chunk, it needs to overflow into
+          // the next one.
+          // Whether or not we can write this entry, the current chunk is now
+          // considered full, so it will be released. (Otherwise we could refuse
+          // this entry, but later accept a smaller entry into this chunk, which
+          // would be somewhat inconsistent.)
+          currentChunkFilled = true;
+          // Make sure the next chunk is available (from a previous request),
+          // otherwise create one on the spot.
+          if (ProfileBufferChunk* next = GetOrCreateNextChunk(aLock);
+              MOZ_LIKELY(next)) {
+            // Here, we know we have a current and a next chunk.
+            // Reserve head of block at the end of the current chunk.
+            const auto [mem0, blockIndex] =
+                current->ReserveBlock(current->RemainingBytes());
+            MOZ_ASSERT(mem0.LengthBytes() < blockBytes);
+            MOZ_ASSERT(current->RemainingBytes() == 0);
+            // Set the next chunk range, and reserve the needed space for the
+            // tail of the block.
+            next->SetRangeStart(mNextChunkRangeStart);
+            mNextChunkRangeStart += next->BufferBytes();
+            const auto mem1 = next->ReserveInitialBlockAsTail(
+                blockBytes - mem0.LengthBytes());
+            MOZ_ASSERT(next->RemainingBytes() != 0);
+            nextChunkInitialized = true;
+            // Block is split in two spans.
+            maybeEntryWriter.emplace(
+                mem0, mem1, blockIndex,
+                ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+                    blockIndex.ConvertToProfileBufferIndex() + blockBytes));
+            MOZ_ASSERT(maybeEntryWriter->RemainingBytes() == blockBytes);
+            mRangeEnd += blockBytes;
+            mPushedBlockCount += aBlockCount;
+          } else {
+            // Cannot get a new chunk. Record put failure.
+            mFailedPutBytes += blockBytes;
+          }
+        }
+      } else {
+        // Cannot get a current chunk. Record put failure.
+        mFailedPutBytes += blockBytes;
+      }
+    }  // end of `if (MOZ_LIKELY(mChunkManager))`
+
+    // Here, we either have a `Nothing` (failure), or a non-empty entry writer
+    // pointing at the start of the block.
+
+    // After we invoke the callback and return, we may need to handle the
+    // current chunk being filled.
+    auto handleFilledChunk = MakeScopeExit([&]() {
+      // If the entry writer was not already empty, the callback *must* have
+      // filled the full entry.
+      MOZ_ASSERT(!maybeEntryWriter || maybeEntryWriter->RemainingBytes() == 0);
+
+      if (currentChunkFilled) {
+        // Extract current (now filled) chunk.
+        UniquePtr<ProfileBufferChunk> filled = std::move(mCurrentChunk);
+
+        if (mNextChunks) {
+          // Cycle to the next chunk.
+          mCurrentChunk =
+              std::exchange(mNextChunks, mNextChunks->ReleaseNext());
+
+          // Make sure it is initialized (it is now the current chunk).
+          if (!nextChunkInitialized) {
+            InitializeCurrentChunk(aLock);
+          }
+        }
+
+        // And finally mark filled chunk done and release it.
+        filled->MarkDone();
+        mChunkManager->ReleaseChunk(std::move(filled));
+
+        // Request another chunk if needed.
+        // In most cases, here we should have one current chunk and no next
+        // chunk, so we want to do a request so there hopefully will be a next
+        // chunk available when the current one gets filled.
+        // But we also for a request if we don't even have a current chunk (if
+        // it's too late, it's ok because the next `ReserveAndPutRaw` wil just
+        // allocate one on the spot.)
+        // And if we already have a next chunk, there's no need for more now.
+        if (!mCurrentChunk || !mNextChunks) {
+          RequestChunk(aLock);
+        }
+      }
+    });
+
+    return std::forward<Callback>(aCallback)(maybeEntryWriter);
+  }
+
+  // Reserve a block of `aBlockBytes` size, and invoke and return
+  // `aCallback(Maybe<ProfileBufferEntryWriter>&)`. Note that this is the "raw"
+  // version that doesn't write the entry size at the beginning of the block.
+  template <typename Callback>
+  auto ReserveAndPutRaw(Length aBlockBytes, Callback&& aCallback,
+                        uint64_t aBlockCount) {
+    baseprofiler::detail::BaseProfilerMaybeAutoLock lock(mMutex);
+    return ReserveAndPutRaw([aBlockBytes]() { return aBlockBytes; },
+                            std::forward<Callback>(aCallback), lock,
+                            aBlockCount);
+  }
+
+  // Mutex guarding the following members.
+  mutable baseprofiler::detail::BaseProfilerMaybeMutex mMutex;
+
+  // Pointer to the current Chunk Manager (or null when out-of-session.)
+  // It may be owned locally (see below) or externally.
+  ProfileBufferChunkManager* mChunkManager = nullptr;
+
+  // Only non-null when we own the current Chunk Manager.
+  UniquePtr<ProfileBufferChunkManager> mOwnedChunkManager;
+
+  UniquePtr<ProfileBufferChunk> mCurrentChunk;
+
+  UniquePtr<ProfileBufferChunk> mNextChunks;
+
+  // Class used to transfer requested chunks from a `ChunkManager` to a
+  // `ProfileChunkedBuffer`.
+  // It needs to be ref-counted because the request may be fulfilled
+  // asynchronously, and either side may be destroyed during the request.
+  // It cannot use the `ProfileChunkedBuffer` mutex, because that buffer and its
+  // mutex could be destroyed during the request.
+  class RequestedChunkRefCountedHolder
+      : public external::AtomicRefCounted<RequestedChunkRefCountedHolder> {
+   public:
+    MOZ_DECLARE_REFCOUNTED_TYPENAME(RequestedChunkRefCountedHolder)
+
+    enum class State { Unused, Requested, Fulfilled };
+
+    // Get the current state. Note that it may change after the function
+    // returns, so it should be used carefully, e.g., `ProfileChunkedBuffer` can
+    // see if a request is pending or fulfilled, to avoid starting another
+    // request.
+    [[nodiscard]] State GetState() const {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+      return mState;
+    }
+
+    // Must be called by `ProfileChunkedBuffer` when it requests a chunk.
+    // There cannot be more than one request in-flight.
+    void StartRequest() {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+      MOZ_ASSERT(mState == State::Unused, "Already requested or fulfilled");
+      mState = State::Requested;
+    }
+
+    // Must be called by the `ChunkManager` with a chunk.
+    // If the `ChunkManager` cannot provide a chunk (because of memory limits,
+    // or it gets destroyed), it must call this anyway with a nullptr.
+    void AddRequestedChunk(UniquePtr<ProfileBufferChunk>&& aChunk) {
+      baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+      MOZ_ASSERT(mState == State::Requested);
+      mState = State::Fulfilled;
+      mRequestedChunk = std::move(aChunk);
+    }
+
+    // The `ProfileChunkedBuffer` can try to extract the provided chunk after a
+    // request:
+    // - Nothing -> Request is not fulfilled yet.
+    // - Some(nullptr) -> The `ChunkManager` was not able to provide a chunk.
+    // - Some(chunk) -> Requested chunk.
+    [[nodiscard]] Maybe<UniquePtr<ProfileBufferChunk>> GetChunkIfFulfilled() {
+      Maybe<UniquePtr<ProfileBufferChunk>> maybeChunk;
+      baseprofiler::detail::BaseProfilerAutoLock lock(mRequestMutex);
+      MOZ_ASSERT(mState == State::Requested || mState == State::Fulfilled);
+      if (mState == State::Fulfilled) {
+        mState = State::Unused;
+        maybeChunk.emplace(std::move(mRequestedChunk));
+      }
+      return maybeChunk;
+    }
+
+   private:
+    // Mutex guarding the following members.
+    mutable baseprofiler::detail::BaseProfilerMutex mRequestMutex;
+    State mState = State::Unused;
+    UniquePtr<ProfileBufferChunk> mRequestedChunk;
+  };
+
+  // Requested-chunk holder, kept alive when in-session, but may also live
+  // longer if a request is in-flight.
+  RefPtr<RequestedChunkRefCountedHolder> mRequestedChunkHolder;
+
+  // Range start of the next chunk to become current. Starting at 1 because
+  // 0 is a reserved index similar to nullptr.
+  ProfileBufferIndex mNextChunkRangeStart = 1;
+
+  // Index to the first block.
+  // Atomic because it may be increased when a Chunk is destroyed, and the
+  // callback may be invoked from anywhere, including from inside one of our
+  // locked section, so we cannot protect it with a mutex.
+  Atomic<ProfileBufferIndex, MemoryOrdering::ReleaseAcquire> mRangeStart{1};
+
+  // Index past the last block. Equals mRangeStart if empty.
+  ProfileBufferIndex mRangeEnd = 1;
+
+  // Number of blocks that have been pushed into this buffer.
+  uint64_t mPushedBlockCount = 0;
+
+  // Number of blocks that have been removed from this buffer.
+  // Note: Live entries = pushed - cleared.
+  // Atomic because it may be updated when a Chunk is destroyed, and the
+  // callback may be invoked from anywhere, including from inside one of our
+  // locked section, so we cannot protect it with a mutex.
+  Atomic<uint64_t, MemoryOrdering::ReleaseAcquire> mClearedBlockCount{0};
+
+  // Number of bytes that could not be put into this buffer.
+  uint64_t mFailedPutBytes = 0;
+};
+
+// ----------------------------------------------------------------------------
+// ProfileChunkedBuffer serialization
+
+// A ProfileChunkedBuffer can hide another one!
+// This will be used to store marker backtraces; They can be read back into a
+// UniquePtr<ProfileChunkedBuffer>.
+// Format: len (ULEB128) | start | end | buffer (len bytes) | pushed | cleared
+// len==0 marks an out-of-session buffer, or empty buffer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer> {
+  static Length Bytes(const ProfileChunkedBuffer& aBuffer) {
+    return aBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+      if (!aReader) {
+        // Out-of-session, we only need 1 byte to store a length of 0.
+        return ULEB128Size<Length>(0);
+      }
+      ProfileBufferEntryReader reader = aReader->SingleChunkDataAsEntry();
+      const ProfileBufferIndex start =
+          reader.CurrentBlockIndex().ConvertToProfileBufferIndex();
+      const ProfileBufferIndex end =
+          reader.NextBlockIndex().ConvertToProfileBufferIndex();
+      MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max());
+      const Length len = static_cast<Length>(end - start);
+      if (len == 0) {
+        // In-session but empty, also store a length of 0.
+        return ULEB128Size<Length>(0);
+      }
+      // In-session.
+      return static_cast<Length>(ULEB128Size(len) + sizeof(start) + len +
+                                 sizeof(aBuffer.mPushedBlockCount) +
+                                 sizeof(aBuffer.mClearedBlockCount));
+    });
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const ProfileChunkedBuffer& aBuffer) {
+    aBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+      if (!aReader) {
+        // Out-of-session, only store a length of 0.
+        aEW.WriteULEB128<Length>(0);
+        return;
+      }
+      ProfileBufferEntryReader reader = aReader->SingleChunkDataAsEntry();
+      const ProfileBufferIndex start =
+          reader.CurrentBlockIndex().ConvertToProfileBufferIndex();
+      const ProfileBufferIndex end =
+          reader.NextBlockIndex().ConvertToProfileBufferIndex();
+      MOZ_ASSERT(end - start <= std::numeric_limits<Length>::max());
+      const Length len = static_cast<Length>(end - start);
+      MOZ_ASSERT(len <= aEW.RemainingBytes());
+      if (len == 0) {
+        // In-session but empty, only store a length of 0.
+        aEW.WriteULEB128<Length>(0);
+        return;
+      }
+      // In-session.
+      // Store buffer length, and start index.
+      aEW.WriteULEB128(len);
+      aEW.WriteObject(start);
+      // Write all the bytes.
+      aEW.WriteFromReader(reader, reader.RemainingBytes());
+      // And write stats.
+      aEW.WriteObject(static_cast<uint64_t>(aBuffer.mPushedBlockCount));
+      aEW.WriteObject(static_cast<uint64_t>(aBuffer.mClearedBlockCount));
+      // Note: Failed pushes are not important to serialize.
+    });
+  }
+};
+
+// A serialized ProfileChunkedBuffer can be read into an empty buffer (either
+// out-of-session, or in-session with enough room).
+template <>
+struct ProfileBufferEntryReader::Deserializer<ProfileChunkedBuffer> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       ProfileChunkedBuffer& aBuffer) {
+    // Expect an empty buffer, as we're going to overwrite it.
+    MOZ_ASSERT(aBuffer.GetState().mRangeStart == aBuffer.GetState().mRangeEnd);
+    // Read the stored buffer length.
+    const auto len = aER.ReadULEB128<ProfileChunkedBuffer::Length>();
+    if (len == 0) {
+      // 0-length means an "uninteresting" buffer, just return now.
+      return;
+    }
+    // We have a non-empty buffer to read.
+
+    // Read start and end indices.
+    const auto start = aER.ReadObject<ProfileBufferIndex>();
+    aBuffer.mRangeStart = start;
+    // For now, set the end to be the start (the buffer is still empty). It will
+    // be updated in `ReserveAndPutRaw()` below.
+    aBuffer.mRangeEnd = start;
+
+    if (aBuffer.IsInSession()) {
+      // Output buffer is in-session (i.e., it already has a memory buffer
+      // attached). Make sure the caller allocated enough space.
+      MOZ_RELEASE_ASSERT(aBuffer.BufferLength().value() >= len);
+    } else {
+      // Output buffer is out-of-session, set a new chunk manager that will
+      // provide a single chunk of just the right size.
+      aBuffer.SetChunkManager(MakeUnique<ProfileBufferChunkManagerSingle>(len));
+      MOZ_ASSERT(aBuffer.BufferLength().value() >= len);
+    }
+
+    // Copy bytes into the buffer.
+    aBuffer.ReserveAndPutRaw(
+        len,
+        [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+          MOZ_RELEASE_ASSERT(aEW.isSome());
+          aEW->WriteFromReader(aER, len);
+        },
+        0);
+    // Finally copy stats.
+    aBuffer.mPushedBlockCount = aER.ReadObject<uint64_t>();
+    aBuffer.mClearedBlockCount = aER.ReadObject<uint64_t>();
+    // Failed puts are not important to keep.
+    aBuffer.mFailedPutBytes = 0;
+  }
+
+  // We cannot output a ProfileChunkedBuffer object (not copyable), use
+  // `ReadInto()` or `aER.ReadObject<UniquePtr<BlocksRinbBuffer>>()` instead.
+  static ProfileChunkedBuffer Read(ProfileBufferEntryReader& aER) = delete;
+};
+
+// A ProfileChunkedBuffer is usually refererenced through a UniquePtr, for
+// convenience we support (de)serializing that UniquePtr directly.
+// This is compatible with the non-UniquePtr serialization above, with a null
+// pointer being treated like an out-of-session or empty buffer; and any of
+// these would be deserialized into a null pointer.
+template <>
+struct ProfileBufferEntryWriter::Serializer<UniquePtr<ProfileChunkedBuffer>> {
+  static Length Bytes(const UniquePtr<ProfileChunkedBuffer>& aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+    // out-of-session or empty.)
+    return SumBytes(*aBufferUPtr);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    const UniquePtr<ProfileChunkedBuffer>& aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+    // out-of-session or empty.)
+    aEW.WriteObject(*aBufferUPtr);
+  }
+};
+
+// Serialization of a raw pointer to ProfileChunkedBuffer.
+// Use Deserializer<UniquePtr<ProfileChunkedBuffer>> to read it back.
+template <>
+struct ProfileBufferEntryWriter::Serializer<ProfileChunkedBuffer*> {
+  static Length Bytes(ProfileChunkedBuffer* aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      return ULEB128Size<Length>(0);
+    }
+    // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+    // out-of-session or empty.)
+    return SumBytes(*aBufferUPtr);
+  }
+
+  static void Write(ProfileBufferEntryWriter& aEW,
+                    ProfileChunkedBuffer* aBufferUPtr) {
+    if (!aBufferUPtr) {
+      // Null pointer, treat it like an empty buffer, i.e., write length of 0.
+      aEW.WriteULEB128<Length>(0);
+      return;
+    }
+    // Otherwise write the pointed-at ProfileChunkedBuffer (which could be
+    // out-of-session or empty.)
+    aEW.WriteObject(*aBufferUPtr);
+  }
+};
+
+template <>
+struct ProfileBufferEntryReader::Deserializer<UniquePtr<ProfileChunkedBuffer>> {
+  static void ReadInto(ProfileBufferEntryReader& aER,
+                       UniquePtr<ProfileChunkedBuffer>& aBuffer) {
+    aBuffer = Read(aER);
+  }
+
+  static UniquePtr<ProfileChunkedBuffer> Read(ProfileBufferEntryReader& aER) {
+    UniquePtr<ProfileChunkedBuffer> bufferUPtr;
+    // Keep a copy of the reader before reading the length, so we can restart
+    // from here below.
+    ProfileBufferEntryReader readerBeforeLen = aER;
+    // Read the stored buffer length.
+    const auto len = aER.ReadULEB128<ProfileChunkedBuffer::Length>();
+    if (len == 0) {
+      // 0-length means an "uninteresting" buffer, just return nullptr.
+      return bufferUPtr;
+    }
+    // We have a non-empty buffer.
+    // allocate an empty ProfileChunkedBuffer without mutex.
+    bufferUPtr = MakeUnique<ProfileChunkedBuffer>(
+        ProfileChunkedBuffer::ThreadSafety::WithoutMutex);
+    // Rewind the reader before the length and deserialize the contents, using
+    // the non-UniquePtr Deserializer.
+    aER = readerBeforeLen;
+    aER.ReadIntoObject(*bufferUPtr);
+    return bufferUPtr;
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // ProfileChunkedBuffer_h
diff --git a/mozglue/baseprofiler/public/ProfilingCategoryList.h b/mozglue/baseprofiler/public/ProfilingCategoryList.h
new file mode 100644
index 0000000000..437f24aaa1
--- /dev/null
+++ b/mozglue/baseprofiler/public/ProfilingCategoryList.h
@@ -0,0 +1,122 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef baseprofiler_ProfilingCategoryList_h
+#define baseprofiler_ProfilingCategoryList_h
+
+// Profiler sub-categories are applied to each sampled stack to describe the
+// type of workload that the CPU is busy with. Only one sub-category can be
+// assigned so be mindful that these are non-overlapping. The active category is
+// set by pushing a label to the profiling stack, or by the unwinder in cases
+// such as JITs. A profile sample in arbitrary C++/Rust will typically be
+// categorized based on the top of the label stack.
+//
+// The list of available color names for categories is:
+//    transparent
+//    blue
+//    green
+//    grey
+//    lightblue
+//    magenta
+//    orange
+//    purple
+//    yellow
+
+// clang-format off
+
+#define MOZ_PROFILING_CATEGORY_LIST(BEGIN_CATEGORY, SUBCATEGORY, END_CATEGORY) \
+  BEGIN_CATEGORY(IDLE, "Idle", "transparent") \
+    SUBCATEGORY(IDLE, IDLE, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(OTHER, "Other", "grey") \
+    SUBCATEGORY(OTHER, OTHER, "Other") \
+    SUBCATEGORY(OTHER, OTHER_PreferenceRead, "Preference Read") \
+    SUBCATEGORY(OTHER, OTHER_Profiling, "Profiling") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(TEST, "Test", "darkgray") \
+    SUBCATEGORY(TEST, TEST, "Test") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(LAYOUT, "Layout", "purple") \
+    SUBCATEGORY(LAYOUT, LAYOUT, "Other") \
+    SUBCATEGORY(LAYOUT, LAYOUT_FrameConstruction, "Frame construction") \
+    SUBCATEGORY(LAYOUT, LAYOUT_Reflow, "Reflow") \
+    SUBCATEGORY(LAYOUT, LAYOUT_CSSParsing, "CSS parsing") \
+    SUBCATEGORY(LAYOUT, LAYOUT_SelectorQuery, "Selector query") \
+    SUBCATEGORY(LAYOUT, LAYOUT_StyleComputation, "Style computation") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JS, "JavaScript", "yellow") \
+    SUBCATEGORY(JS, JS, "Other") \
+    SUBCATEGORY(JS, JS_Parsing, "Parsing") \
+    SUBCATEGORY(JS, JS_BaselineCompilation, "JIT Compile (baseline)") \
+    SUBCATEGORY(JS, JS_IonCompilation, "JIT Compile (ion)") \
+    SUBCATEGORY(JS, JS_Interpreter, "Interpreter") \
+    SUBCATEGORY(JS, JS_BaselineInterpret, "JIT (baseline-interpreter)") \
+    SUBCATEGORY(JS, JS_Baseline, "JIT (baseline)") \
+    SUBCATEGORY(JS, JS_IonMonkey, "JIT (ion)") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(GCCC, "GC / CC", "orange") \
+    SUBCATEGORY(GCCC, GCCC, "Other") \
+    SUBCATEGORY(GCCC, GCCC_MinorGC, "Minor GC") \
+    SUBCATEGORY(GCCC, GCCC_MajorGC, "Major GC (Other)") \
+    SUBCATEGORY(GCCC, GCCC_MajorGC_Mark, "Major GC (Mark)") \
+    SUBCATEGORY(GCCC, GCCC_MajorGC_Sweep, "Major GC (Sweep)") \
+    SUBCATEGORY(GCCC, GCCC_MajorGC_Compact, "Major GC (Compact)") \
+    SUBCATEGORY(GCCC, GCCC_UnmarkGray, "Unmark Gray") \
+    SUBCATEGORY(GCCC, GCCC_Barrier, "Barrier") \
+    SUBCATEGORY(GCCC, GCCC_FreeSnowWhite, "CC (Free Snow White)") \
+    SUBCATEGORY(GCCC, GCCC_BuildGraph, "CC (Build Graph)") \
+    SUBCATEGORY(GCCC, GCCC_ScanRoots, "CC (Scan Roots)") \
+    SUBCATEGORY(GCCC, GCCC_CollectWhite, "CC (Collect White)") \
+    SUBCATEGORY(GCCC, GCCC_Finalize, "CC (Finalize)") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(NETWORK, "Network", "lightblue") \
+    SUBCATEGORY(NETWORK, NETWORK, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(GRAPHICS, "Graphics", "green") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS, "Other") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_DisplayListBuilding, "DisplayList building") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_DisplayListMerging, "DisplayList merging") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_LayerBuilding, "Layer building") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_TileAllocation, "Tile allocation") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_WRDisplayList, "WebRender display list") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_Rasterization, "Rasterization") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_FlushingAsyncPaints, "Flushing async paints") \
+    SUBCATEGORY(GRAPHICS, GRAPHICS_ImageDecoding, "Image decoding") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(DOM, "DOM", "blue") \
+    SUBCATEGORY(DOM, DOM, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_ANDROID, "Android", "yellow") \
+    SUBCATEGORY(JAVA_ANDROID, JAVA_ANDROID, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_ANDROIDX, "AndroidX", "orange") \
+    SUBCATEGORY(JAVA_ANDROIDX, JAVA_ANDROIDX, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_LANGUAGE, "Java", "blue") \
+    SUBCATEGORY(JAVA_LANGUAGE, JAVA_LANGUAGE, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_MOZILLA, "Mozilla", "green") \
+    SUBCATEGORY(JAVA_MOZILLA, JAVA_MOZILLA, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_KOTLIN, "Kotlin", "purple") \
+    SUBCATEGORY(JAVA_KOTLIN, JAVA_KOTLIN, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(JAVA_BLOCKED, "Blocked", "lightblue") \
+    SUBCATEGORY(JAVA_BLOCKED, JAVA_BLOCKED, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(IPC, "IPC", "lightgreen") \
+    SUBCATEGORY(IPC, IPC, "Other") \
+  END_CATEGORY \
+  BEGIN_CATEGORY(MEDIA, "Media", "orange") \
+    SUBCATEGORY(MEDIA, MEDIA, "Other") \
+    SUBCATEGORY(MEDIA, MEDIA_CUBEB, "Cubeb") \
+    SUBCATEGORY(MEDIA, MEDIA_PLAYBACK, "Playback") \
+    SUBCATEGORY(MEDIA, MEDIA_RT, "Real-time rendering") \
+  END_CATEGORY
+
+// clang-format on
+
+#endif  // baseprofiler_ProfilingCategoryList_h
diff --git a/mozglue/baseprofiler/public/leb128iterator.h b/mozglue/baseprofiler/public/leb128iterator.h
new file mode 100644
index 0000000000..636baf916f
--- /dev/null
+++ b/mozglue/baseprofiler/public/leb128iterator.h
@@ -0,0 +1,207 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// LEB128 utilities that can read/write unsigned LEB128 numbers from/to
+// iterators.
+//
+// LEB128 = Little Endian Base 128, where small numbers take few bytes, but
+// large numbers are still allowed, which is ideal when serializing numbers that
+// are likely to be small.
+// Each byte contains 7 bits from the number, starting at the "little end", the
+// top bit is 0 for the last byte, 1 otherwise.
+// Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc.
+//
+// Iterators only need to provide:
+// - `*it` to return a reference to the next byte to be read from or written to.
+// - `++it` to advance the iterator after a byte is written.
+//
+// The caller must always provide sufficient space to write any number, by:
+// - pre-allocating a large enough buffer, or
+// - allocating more space when `++it` reaches the end and/or `*it` is invoked
+//   after the end, or
+// - moving the underlying pointer to an appropriate location (e.g., wrapping
+//   around a circular buffer).
+// The caller must also provide enough bytes to read a full value (i.e., at
+// least one byte should have its top bit unset), and a type large enough to
+// hold the stored value.
+//
+// Note: There are insufficient checks for validity! These functions are
+// intended to be used together, i.e., the user should only `ReadULEB128()` from
+// a sufficiently-large buffer that the same user filled with `WriteULEB128()`.
+// Using with externally-sourced data (e.g., DWARF) is *not* recommended.
+//
+// https://en.wikipedia.org/wiki/LEB128
+
+#ifndef leb128iterator_h
+#define leb128iterator_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Likely.h"
+
+#include <climits>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+namespace mozilla {
+
+// Number of bytes needed to represent `aValue`.
+template <typename T>
+constexpr uint_fast8_t ULEB128Size(T aValue) {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "ULEB128Size only takes unsigned types");
+  // We need one output byte per 7 bits of non-zero value. So we just remove
+  // 7 least significant bits at a time until the value becomes zero.
+  // Note the special case of 0, which still needs 1 output byte; this is done
+  // by starting the first loop before we check for 0.
+  uint_fast8_t size = 0;
+  for (;;) {
+    size += 1;
+    aValue >>= 7;
+    // Expecting small values, so it should be more likely that `aValue == 0`.
+    if (MOZ_LIKELY(aValue == 0)) {
+      return size;
+    }
+  }
+}
+
+// Maximum number of bytes needed to represent any value of type `T`.
+template <typename T>
+constexpr uint_fast8_t ULEB128MaxSize() {
+  return ULEB128Size<T>(std::numeric_limits<T>::max());
+}
+
+// Write `aValue` in LEB128 to `aIterator`.
+// The iterator will be moved past the last byte.
+template <typename T, typename It>
+void WriteULEB128(T aValue, It& aIterator) {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "WriteULEB128 only takes unsigned types");
+  using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
+  static_assert(sizeof(IteratorValue) == 1,
+                "WriteULEB128 expects an iterator to single bytes");
+  // 0. Don't test for 0 yet, as we want to output one byte for it.
+  for (;;) {
+    // 1. Extract the 7 least significant bits.
+    const uint_fast8_t byte = aValue & 0x7Fu;
+    // 2. Remove them from `aValue`.
+    aValue >>= 7;
+    // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet
+    // (meaning there will be more bytes after this one.)
+    // Expecting small values, so it should be more likely that `aValue == 0`.
+    // Note: No absolute need to force-cast to IteratorValue, because we have
+    // only changed the bottom 8 bits above. However the compiler could warn
+    // about a narrowing conversion from potentially-multibyte uint_fast8_t down
+    // to whatever single-byte type `*iterator* expects, so we make it explicit.
+    *aIterator = static_cast<IteratorValue>(
+        MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u));
+    // 4. Always advance the iterator to the next byte.
+    ++aIterator;
+    // 5. We're done if `aValue` is 0.
+    // Expecting small values, so it should be more likely that `aValue == 0`.
+    if (MOZ_LIKELY(aValue == 0)) {
+      return;
+    }
+  }
+}
+
+// Read an LEB128 value from `aIterator`.
+// The iterator will be moved past the last byte.
+template <typename T, typename It>
+T ReadULEB128(It& aIterator) {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "ReadULEB128 must return an unsigned type");
+  using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
+  static_assert(sizeof(IteratorValue) == 1,
+                "ReadULEB128 expects an iterator to single bytes");
+  // Incoming bits will be added to `result`...
+  T result = 0;
+  // ... starting with the least significant bits.
+  uint_fast8_t shift = 0;
+  for (;;) {
+    // 1. Read one byte from the iterator.
+    // `static_cast` just in case IteratorValue is not implicitly convertible to
+    // uint_fast8_t. It wouldn't matter if the sign was extended, we're only
+    // dealing with the bottom 8 bits below.
+    const uint_fast8_t byte = static_cast<uint_fast8_t>(*aIterator);
+    // 2. Always advance the iterator.
+    ++aIterator;
+    // 3. Extract the 7 bits of value, and shift them in place into `result`.
+    result |= static_cast<T>(byte & 0x7fu) << shift;
+    // 4. If the 8th bit is *not* set, this was the last byte.
+    // Expecting small values, so it should be more likely that the bit is off.
+    if (MOZ_LIKELY((byte & 0x80u) == 0)) {
+      return result;
+    }
+    // There are more bytes to read.
+    // 5. Next byte will contain more significant bits above the past 7.
+    shift += 7;
+    // Safety check that we're not going to shift by >= than the type size,
+    // which is Undefined Behavior in C++.
+    MOZ_ASSERT(shift < CHAR_BIT * sizeof(T));
+  }
+}
+
+// constexpr ULEB128 reader class.
+// Mostly useful when dealing with non-trivial byte feeds.
+template <typename T>
+class ULEB128Reader {
+  static_assert(!std::numeric_limits<T>::is_signed,
+                "ULEB128Reader must handle an unsigned type");
+
+ public:
+  constexpr ULEB128Reader() = default;
+
+  // Don't allow copy/assignment, it doesn't make sense for a stateful parser.
+  constexpr ULEB128Reader(const ULEB128Reader&) = delete;
+  constexpr ULEB128Reader& operator=(const ULEB128Reader&) = delete;
+
+  // Feed a byte into the parser.
+  // Returns true if this was the last byte.
+  [[nodiscard]] constexpr bool FeedByteIsComplete(unsigned aByte) {
+    MOZ_ASSERT(!IsComplete());
+    // Extract the 7 bits of value, and shift them in place into the value.
+    mValue |= static_cast<T>(aByte & 0x7fu) << mShift;
+    // If the 8th bit is *not* set, this was the last byte.
+    // Expecting small values, so it should be more likely that the bit is off.
+    if (MOZ_LIKELY((aByte & 0x80u) == 0)) {
+      mShift = mCompleteShift;
+      return true;
+    }
+    // There are more bytes to read.
+    // Next byte will contain more significant bits above the past 7.
+    mShift += 7;
+    // Safety check that we're not going to shift by >= than the type size,
+    // which is Undefined Behavior in C++.
+    MOZ_ASSERT(mShift < CHAR_BIT * sizeof(T));
+    return false;
+  }
+
+  constexpr void Reset() {
+    mValue = 0;
+    mShift = 0;
+  }
+
+  [[nodiscard]] constexpr bool IsComplete() const {
+    return mShift == mCompleteShift;
+  }
+
+  [[nodiscard]] constexpr T Value() const {
+    MOZ_ASSERT(IsComplete());
+    return mValue;
+  }
+
+ private:
+  // Special value of `mShift` indicating that parsing is complete.
+  constexpr static unsigned mCompleteShift = 0x10000u;
+
+  T mValue = 0;
+  unsigned mShift = 0;
+};
+
+}  // namespace mozilla
+
+#endif  // leb128iterator_h
diff --git a/mozglue/build/AsanOptions.cpp b/mozglue/build/AsanOptions.cpp
new file mode 100644
index 0000000000..79ff503a68
--- /dev/null
+++ b/mozglue/build/AsanOptions.cpp
@@ -0,0 +1,186 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Attributes.h"
+
+#ifndef _MSC_VER  // Not supported by clang-cl yet
+
+// When running with AddressSanitizer, we need to explicitly set some
+// options specific to our codebase to prevent errors during runtime.
+// To override these, set the ASAN_OPTIONS environment variable.
+//
+// Currently, these are:
+//
+//   allow_user_segv_handler=1 - Tell ASan to allow our code to use its
+//   own SIGSEGV handlers. This is required by ASM.js internally.
+//
+//   alloc_dealloc_mismatch=0 - Disable alloc-dealloc mismatch checking
+//   in ASan. This is required because we define our own new/delete
+//   operators that are backed by malloc/free. If one of them gets inlined
+//   while the other doesn't, ASan will report false positives.
+//
+//   detect_leaks=0 - Disable LeakSanitizer. This is required because
+//   otherwise leak checking will be enabled for various building and
+//   testing executables where we don't care much about leaks.
+//
+//   allocator_may_return_null=1 - Tell ASan to return NULL when an allocation
+//   fails instead of aborting the program. This allows us to handle failing
+//   allocations the same way we would handle them with a regular allocator and
+//   also uncovers potential bugs that might occur in these situations.
+//
+//   max_malloc_fill_size - Tell ASan to initialize memory to a certain value
+//   when it is allocated. This option specifies the maximum allocation size
+//   for which ASan should still initialize the memory. The value we specify
+//   here is exactly 256MiB.
+//
+//   max_free_fill_size - Similar to max_malloc_fill_size, tell ASan to
+//   overwrite memory with a certain value when it is freed. Again, the value
+//   here specifies the maximum allocation size, larger allocations will
+//   skipped.
+//
+//   malloc_fill_byte / free_fill_byte - These values specify the byte values
+//   used to initialize/overwrite memory in conjunction with the previous
+//   options max_malloc_fill_size and max_free_fill_size. The values used here
+//   are 0xe4 and 0xe5 to match the kAllocPoison and kAllocJunk constants used
+//   by mozjemalloc.
+//
+//   malloc_context_size - This value specifies how many stack frames are
+//   stored for each malloc and free call. Since Firefox can have lots of deep
+//   stacks with allocations, we limit the default size here further to save
+//   some memory.
+//
+//   fast_unwind_on_check - Use the fast (frame-pointer-based) stack unwinder
+//   for internal CHECK failures. The slow unwinder doesn't work on Android.
+//
+//   fast_unwind_on_fatal - Use the fast (frame-pointer-based) stack unwinder
+//   to print fatal error reports. The slow unwinder doesn't work on Android.
+//
+// !! Note: __asan_default_options is not used on Android! (bug 1576213)
+// These should be updated in:
+//   mobile/android/geckoview/src/asan/resources/lib/*/wrap.sh
+//
+extern "C" MOZ_ASAN_BLACKLIST const char* __asan_default_options() {
+  return "allow_user_segv_handler=1:alloc_dealloc_mismatch=0:detect_leaks=0"
+#  ifdef MOZ_ASAN_REPORTER
+         ":malloc_context_size=20"
+#  endif
+#  ifdef __ANDROID__
+         ":fast_unwind_on_check=1:fast_unwind_on_fatal=1"
+#  endif
+         ":max_free_fill_size=268435456:max_malloc_fill_size=268435456"
+         ":malloc_fill_byte=228:free_fill_byte=229"
+         ":handle_sigill=1"
+         ":allocator_may_return_null=1";
+}
+
+// !!! Please do not add suppressions for new leaks in Gecko code, unless they
+// are intentional !!!
+extern "C" const char* __lsan_default_suppressions() {
+  return "# Add your suppressions below\n"
+
+         // LSan runs with a shallow stack depth and no debug symbols, so some
+         // small intentional leaks in system libraries show up with this.  You
+         // do not want this enabled when running locally with a deep stack, as
+         // it can catch too much.
+         "leak:libc.so\n"
+
+         // nsComponentManagerImpl intentionally leaks factory entries, and
+         // probably some other stuff.
+         "leak:nsComponentManagerImpl\n"
+         // These two variants are needed when fast unwind is disabled and stack
+         // depth is limited.
+         "leak:mozJSComponentLoader::LoadModule\n"
+         "leak:nsNativeModuleLoader::LoadModule\n"
+
+         // Bug 981220 - Pixman fails to free TLS memory.
+         "leak:pixman_implementation_lookup_composite\n"
+
+         // Bug 987918 - Font shutdown leaks when CLEANUP_MEMORY is not enabled.
+         "leak:libfontconfig.so\n"
+         "leak:libfreetype.so\n"
+         "leak:GI___strdup\n"
+         // The symbol is really __GI___strdup, but if you have the leading _,
+         // it doesn't suppress it.
+
+         // Bug 1078015 - If the process terminates during a PR_Sleep, LSAN
+         // detects a leak
+         "leak:PR_Sleep\n"
+
+         // Bug 1363976 - Stylo holds some global data alive forever.
+         "leak:style::global_style_data\n"
+
+         //
+         // Many leaks only affect some test suites.  The suite annotations are
+         // not checked.
+         //
+
+         // Bug 979928 - WebRTC leaks in different mochitest suites.
+         "leak:NR_reg_init\n"
+         // nr_reg_local_init should be redundant with NR_reg_init, but on
+         // Aurora we get fewer stack frames for some reason.
+         "leak:nr_reg_local_init\n"
+         "leak:r_log_register\n"
+         "leak:nr_reg_set\n"
+
+         // This is a one-time leak in mochitest-bc, so it is probably okay to
+         // ignore.
+         "leak:GlobalPrinters::InitializeGlobalPrinters\n"
+         "leak:nsPSPrinterList::GetPrinterList\n"
+
+         // Bug 1028456 - Various NSPR fd-related leaks in different mochitest
+         // suites.
+         "leak:_PR_Getfd\n"
+
+         // Bug 1028483 - The XML parser sometimes leaks an object. Mostly
+         // happens in toolkit/components/thumbnails.
+         "leak:processInternalEntity\n"
+
+         // Bug 1187421 - NSS does not always free the error stack in different
+         // mochitest suites.
+         "leak:nss_ClearErrorStack\n"
+
+         // Bug 1602689 - leak at mozilla::NotNull, RacyRegisteredThread,
+         // RegisteredThread::RegisteredThread, mozilla::detail::UniqueSelector
+         "leak:RegisteredThread::RegisteredThread\n"
+
+         //
+         // Leaks with system libraries in their stacks. These show up across a
+         // number of tests. Better symbols and disabling fast stackwalking may
+         // help diagnose these.
+         //
+         "leak:libcairo.so\n"
+         "leak:libdl.so\n"
+         "leak:libdricore.so\n"
+         "leak:libdricore9.2.1.so\n"
+         "leak:libGL.so\n"
+         "leak:libglib-2.0.so\n"
+         "leak:libglsl.so\n"
+         "leak:libp11-kit.so\n"
+         "leak:libpixman-1.so\n"
+         "leak:libpulse.so\n"
+         // lubpulsecommon 1.1 is Ubuntu 12.04
+         "leak:libpulsecommon-1.1.so\n"
+         // lubpulsecommon 1.1 is Ubuntu 16.04
+         "leak:libpulsecommon-8.0.so\n"
+         "leak:libresolv.so\n"
+         "leak:libstdc++.so\n"
+         "leak:libXrandr.so\n"
+         "leak:libX11.so\n"
+         "leak:pthread_setspecific_internal\n"
+         "leak:swrast_dri.so\n"
+
+         "leak:js::frontend::BytecodeEmitter:\n"
+         "leak:js::frontend::GeneralParser\n"
+         "leak:js::frontend::Parse\n"
+         "leak:xpc::CIGSHelper\n"
+         "leak:mozJSComponentLoader\n"
+         "leak:mozilla::xpcom::ConstructJSMComponent\n"
+         "leak:XPCWrappedNativeJSOps\n"
+
+      // End of suppressions.
+      ;  // Please keep this semicolon.
+}
+
+#endif  // _MSC_VER
diff --git a/mozglue/build/BionicGlue.cpp b/mozglue/build/BionicGlue.cpp
new file mode 100644
index 0000000000..8cc21b54e5
--- /dev/null
+++ b/mozglue/build/BionicGlue.cpp
@@ -0,0 +1,37 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#define NS_EXPORT __attribute__((visibility("default")))
+
+extern "C" NS_EXPORT int raise(int sig) {
+  // Bug 741272: Bionic incorrectly uses kill(), which signals the
+  // process, and thus could signal another thread (and let this one
+  // return "successfully" from raising a fatal signal).
+  //
+  // Bug 943170: POSIX specifies pthread_kill(pthread_self(), sig) as
+  // equivalent to raise(sig), but Bionic also has a bug with these
+  // functions, where a forked child will kill its parent instead.
+
+  extern pid_t gettid(void);
+  return syscall(__NR_tgkill, getpid(), gettid(), sig);
+}
+
+/* Flash plugin uses symbols that are not present in Android >= 4.4 */
+namespace android {
+namespace VectorImpl {
+NS_EXPORT void reservedVectorImpl1(void) {}
+NS_EXPORT void reservedVectorImpl2(void) {}
+NS_EXPORT void reservedVectorImpl3(void) {}
+NS_EXPORT void reservedVectorImpl4(void) {}
+NS_EXPORT void reservedVectorImpl5(void) {}
+NS_EXPORT void reservedVectorImpl6(void) {}
+NS_EXPORT void reservedVectorImpl7(void) {}
+NS_EXPORT void reservedVectorImpl8(void) {}
+}  // namespace VectorImpl
+}  // namespace android
diff --git a/mozglue/build/Makefile.in b/mozglue/build/Makefile.in
new file mode 100644
index 0000000000..558ceba066
--- /dev/null
+++ b/mozglue/build/Makefile.in
@@ -0,0 +1,15 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# For FORCE_SHARED_LIB
+include $(topsrcdir)/config/config.mk
+
+ifeq (WINNT,$(OS_TARGET))
+# Rebuild mozglue.dll if the manifest changes - it's included by mozglue.rc.
+# (this dependency should really be just for mozglue.dll, not other targets)
+# Note the manifest file exists in the tree, so we use the explicit filename
+# here.
+EXTRA_DEPS += $(srcdir)/mozglue.dll.manifest
+endif
diff --git a/mozglue/build/SSE.cpp b/mozglue/build/SSE.cpp
new file mode 100644
index 0000000000..b3c9a57224
--- /dev/null
+++ b/mozglue/build/SSE.cpp
@@ -0,0 +1,196 @@
+/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use SSE instructions */
+
+#include "SSE.h"
+
+#ifdef HAVE_CPUID_H
+// cpuid.h is available on gcc 4.3 and higher on i386 and x86_64
+#  include <cpuid.h>
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
+// MSVC 2005 or newer on x86-32 or x86-64
+#  include <intrin.h>
+#endif
+
+namespace {
+
+// SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION.
+// We can't declare these functions in the header file, however, because
+// <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to
+// include both SSE.h and <windows.h>.
+
+#ifdef HAVE_CPUID_H
+
+enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
+
+static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
+                           unsigned int bits) {
+  unsigned int regs[4];
+  unsigned int eax, ebx, ecx, edx;
+  unsigned max = __get_cpuid_max(0, NULL);
+  if (level > max) return false;
+  __cpuid_count(level, 0, eax, ebx, ecx, edx);
+  regs[0] = eax;
+  regs[1] = ebx;
+  regs[2] = ecx;
+  regs[3] = edx;
+  return (regs[reg] & bits) == bits;
+}
+
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
+
+enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
+
+static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
+                           unsigned int bits) {
+  // Check that the level in question is supported.
+  int regs[4];
+  __cpuid(regs, level & 0x80000000u);
+  if (unsigned(regs[0]) < level) return false;
+
+  // "The __cpuid intrinsic clears the ECX register before calling the cpuid
+  // instruction."
+  __cpuid(regs, level);
+  return (unsigned(regs[reg]) & bits) == bits;
+}
+
+#elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && \
+    (defined(__i386) || defined(__x86_64__))
+
+enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
+
+#  ifdef __i386
+static void moz_cpuid(int CPUInfo[4], int InfoType) {
+  asm("xchg %esi, %ebx\n"
+      "xor %ecx, %ecx\n"  // ecx is the sub-leaf (we only ever need 0)
+      "cpuid\n"
+      "movl %eax, (%edi)\n"
+      "movl %ebx, 4(%edi)\n"
+      "movl %ecx, 8(%edi)\n"
+      "movl %edx, 12(%edi)\n"
+      "xchg %esi, %ebx\n"
+      :
+      : "a"(InfoType),  // %eax
+        "D"(CPUInfo)    // %edi
+      : "%ecx", "%edx", "%esi");
+}
+#  else
+static void moz_cpuid(int CPUInfo[4], int InfoType) {
+  asm("xchg %rsi, %rbx\n"
+      "xor %ecx, %ecx\n"  // ecx is the sub-leaf (we only ever need 0)
+      "cpuid\n"
+      "movl %eax, (%rdi)\n"
+      "movl %ebx, 4(%rdi)\n"
+      "movl %ecx, 8(%rdi)\n"
+      "movl %edx, 12(%rdi)\n"
+      "xchg %rsi, %rbx\n"
+      :
+      : "a"(InfoType),  // %eax
+        "D"(CPUInfo)    // %rdi
+      : "%ecx", "%edx", "%rsi");
+}
+#  endif
+
+static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
+                           unsigned int bits) {
+  // Check that the level in question is supported.
+  volatile int regs[4];
+  moz_cpuid((int*)regs, level & 0x80000000u);
+  if (unsigned(regs[0]) < level) return false;
+
+  moz_cpuid((int*)regs, level);
+  return (unsigned(regs[reg]) & bits) == bits;
+}
+
+#endif  // end CPUID declarations
+
+}  // namespace
+
+namespace mozilla {
+
+namespace sse_private {
+
+#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+
+#  if !defined(MOZILLA_PRESUME_MMX)
+bool mmx_enabled = has_cpuid_bits(1u, edx, (1u << 23));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_SSE)
+bool sse_enabled = has_cpuid_bits(1u, edx, (1u << 25));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_SSE2)
+bool sse2_enabled = has_cpuid_bits(1u, edx, (1u << 26));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_SSE3)
+bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u << 0));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_SSSE3)
+bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u << 9));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_SSE4A)
+bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u << 6));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_SSE4_1)
+bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u << 19));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_SSE4_2)
+bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u << 20));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2)
+static bool has_avx() {
+#    if defined(MOZILLA_PRESUME_AVX)
+  return true;
+#    else
+  const unsigned AVX = 1u << 28;
+  const unsigned OSXSAVE = 1u << 27;
+  const unsigned XSAVE = 1u << 26;
+
+  const unsigned XMM_STATE = 1u << 1;
+  const unsigned YMM_STATE = 1u << 2;
+  const unsigned AVX_STATE = XMM_STATE | YMM_STATE;
+
+  return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) &&
+         // ensure the OS supports XSAVE of YMM registers
+         (xgetbv(0) & AVX_STATE) == AVX_STATE;
+#    endif  // MOZILLA_PRESUME_AVX
+}
+#  endif  // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2
+
+#  if !defined(MOZILLA_PRESUME_AVX)
+bool avx_enabled = has_avx();
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_AVX2)
+bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u << 5));
+#  endif
+
+#  if !defined(MOZILLA_PRESUME_AES)
+bool aes_enabled = has_cpuid_bits(1u, ecx, (1u << 25));
+#  endif
+
+#endif
+
+}  // namespace sse_private
+
+#ifdef HAVE_CPUID_H
+
+uint64_t xgetbv(uint32_t xcr) {
+  uint32_t eax, edx;
+  __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr));
+  return (uint64_t)(edx) << 32 | eax;
+}
+
+#endif
+
+}  // namespace mozilla
diff --git a/mozglue/build/SSE.h b/mozglue/build/SSE.h
new file mode 100644
index 0000000000..8a2e668247
--- /dev/null
+++ b/mozglue/build/SSE.h
@@ -0,0 +1,350 @@
+/* vim: set shiftwidth=2 tabstop=8 autoindent cindent expandtab: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use SSE instructions */
+
+#ifndef mozilla_SSE_h_
+#define mozilla_SSE_h_
+
+// for definition of MFBT_DATA
+#include "mozilla/Types.h"
+
+/**
+ * The public interface of this header consists of a set of macros and
+ * functions for Intel CPU features.
+ *
+ * DETECTING ISA EXTENSIONS
+ * ========================
+ *
+ * This header provides the following functions for determining whether the
+ * current CPU supports a particular instruction set extension:
+ *
+ *    mozilla::supports_mmx
+ *    mozilla::supports_sse
+ *    mozilla::supports_sse2
+ *    mozilla::supports_sse3
+ *    mozilla::supports_ssse3
+ *    mozilla::supports_sse4a
+ *    mozilla::supports_sse4_1
+ *    mozilla::supports_sse4_2
+ *    mozilla::supports_avx
+ *    mozilla::supports_avx2
+ *    mozilla::supports_aes
+ *
+ * If you're writing code using inline assembly, you should guard it with a
+ * call to one of these functions.  For instance:
+ *
+ *   if (mozilla::supports_sse2()) {
+ *     asm(" ... ");
+ *   }
+ *   else {
+ *     ...
+ *   }
+ *
+ * Note that these functions depend on cpuid intrinsics only available in gcc
+ * 4.3 or later and MSVC 8.0 (Visual C++ 2005) or later, so they return false
+ * in older compilers.  (This could be fixed by replacing the code with inline
+ * assembly.)
+ *
+ *
+ * USING INTRINSICS
+ * ================
+ *
+ * This header also provides support for coding using CPU intrinsics.
+ *
+ * For each mozilla::supports_abc function, we define a MOZILLA_MAY_SUPPORT_ABC
+ * macro which indicates that the target/compiler combination we're using is
+ * compatible with the ABC extension.  For instance, x86_64 with MSVC 2003 is
+ * compatible with SSE2 but not SSE3, since although there exist x86_64 CPUs
+ * with SSE3 support, MSVC 2003 only supports through SSE2.
+ *
+ * Until gcc fixes #pragma target [1] [2] or our x86 builds require SSE2,
+ * you'll need to separate code using intrinsics into a file separate from your
+ * regular code.  Here's the recommended pattern:
+ *
+ *  #ifdef MOZILLA_MAY_SUPPORT_ABC
+ *    namespace mozilla {
+ *      namespace ABC {
+ *        void foo();
+ *      }
+ *    }
+ *  #endif
+ *
+ *  void foo() {
+ *    #ifdef MOZILLA_MAY_SUPPORT_ABC
+ *      if (mozilla::supports_abc()) {
+ *        mozilla::ABC::foo(); // in a separate file
+ *        return;
+ *      }
+ *    #endif
+ *
+ *    foo_unvectorized();
+ *  }
+ *
+ * You'll need to define mozilla::ABC::foo() in a separate file and add the
+ * -mabc flag when using gcc.
+ *
+ * [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39787 and
+ * [2] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41201 being fixed.
+ *
+ */
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+
+#  ifdef __MMX__
+// It's ok to use MMX instructions based on the -march option (or
+// the default for x86_64 or for Intel Mac).
+#    define MOZILLA_PRESUME_MMX 1
+#  endif
+#  ifdef __SSE__
+// It's ok to use SSE instructions based on the -march option (or
+// the default for x86_64 or for Intel Mac).
+#    define MOZILLA_PRESUME_SSE 1
+#  endif
+#  ifdef __SSE2__
+// It's ok to use SSE2 instructions based on the -march option (or
+// the default for x86_64 or for Intel Mac).
+#    define MOZILLA_PRESUME_SSE2 1
+#  endif
+#  ifdef __SSE3__
+// It's ok to use SSE3 instructions based on the -march option (or the
+// default for Intel Mac).
+#    define MOZILLA_PRESUME_SSE3 1
+#  endif
+#  ifdef __SSSE3__
+// It's ok to use SSSE3 instructions based on the -march option.
+#    define MOZILLA_PRESUME_SSSE3 1
+#  endif
+#  ifdef __SSE4A__
+// It's ok to use SSE4A instructions based on the -march option.
+#    define MOZILLA_PRESUME_SSE4A 1
+#  endif
+#  ifdef __SSE4_1__
+// It's ok to use SSE4.1 instructions based on the -march option.
+#    define MOZILLA_PRESUME_SSE4_1 1
+#  endif
+#  ifdef __SSE4_2__
+// It's ok to use SSE4.2 instructions based on the -march option.
+#    define MOZILLA_PRESUME_SSE4_2 1
+#  endif
+#  ifdef __AVX__
+// It's ok to use AVX instructions based on the -march option.
+#    define MOZILLA_PRESUME_AVX 1
+#  endif
+#  ifdef __AVX2__
+// It's ok to use AVX instructions based on the -march option.
+#    define MOZILLA_PRESUME_AVX2 1
+#  endif
+#  ifdef __AES__
+// It's ok to use AES instructions based on the -march option.
+#    define MOZILLA_PRESUME_AES 1
+#  endif
+
+#  ifdef HAVE_CPUID_H
+#    define MOZILLA_SSE_HAVE_CPUID_DETECTION
+#  endif
+
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
+
+#  define MOZILLA_SSE_HAVE_CPUID_DETECTION
+
+#  if defined(_M_IX86_FP)
+
+#    if _M_IX86_FP >= 1
+// It's ok to use SSE instructions based on the /arch option
+#      define MOZILLA_PRESUME_SSE
+#    endif
+#    if _M_IX86_FP >= 2
+// It's ok to use SSE2 instructions based on the /arch option
+#      define MOZILLA_PRESUME_SSE2
+#    endif
+
+#  elif defined(_M_AMD64)
+// MSVC for AMD64 doesn't support MMX, so don't presume it here.
+
+// SSE is always available on AMD64.
+#    define MOZILLA_PRESUME_SSE
+// SSE2 is always available on AMD64.
+#    define MOZILLA_PRESUME_SSE2
+#  endif
+
+#elif defined(__SUNPRO_CC) && (defined(__i386) || defined(__x86_64__))
+// Sun Studio on x86 or amd64
+
+#  define MOZILLA_SSE_HAVE_CPUID_DETECTION
+
+#  if defined(__x86_64__)
+// MMX is always available on AMD64.
+#    define MOZILLA_PRESUME_MMX
+// SSE is always available on AMD64.
+#    define MOZILLA_PRESUME_SSE
+// SSE2 is always available on AMD64.
+#    define MOZILLA_PRESUME_SSE2
+#  endif
+
+#endif
+
+namespace mozilla {
+
+namespace sse_private {
+#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  if !defined(MOZILLA_PRESUME_MMX)
+extern bool MFBT_DATA mmx_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_SSE)
+extern bool MFBT_DATA sse_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_SSE2)
+extern bool MFBT_DATA sse2_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_SSE3)
+extern bool MFBT_DATA sse3_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_SSSE3)
+extern bool MFBT_DATA ssse3_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_SSE4A)
+extern bool MFBT_DATA sse4a_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_SSE4_1)
+extern bool MFBT_DATA sse4_1_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_SSE4_2)
+extern bool MFBT_DATA sse4_2_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_AVX)
+extern bool MFBT_DATA avx_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_AVX2)
+extern bool MFBT_DATA avx2_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_AES)
+extern bool MFBT_DATA aes_enabled;
+#  endif
+
+#endif
+}  // namespace sse_private
+
+#ifdef HAVE_CPUID_H
+MOZ_EXPORT uint64_t xgetbv(uint32_t xcr);
+#endif
+
+#if defined(MOZILLA_PRESUME_MMX)
+#  define MOZILLA_MAY_SUPPORT_MMX 1
+inline bool supports_mmx() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  if !(defined(_MSC_VER) && defined(_M_AMD64))
+// Define MOZILLA_MAY_SUPPORT_MMX only if we're not on MSVC for
+// AMD64, since that compiler doesn't support MMX.
+#    define MOZILLA_MAY_SUPPORT_MMX 1
+#  endif
+inline bool supports_mmx() { return sse_private::mmx_enabled; }
+#else
+inline bool supports_mmx() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE)
+#  define MOZILLA_MAY_SUPPORT_SSE 1
+inline bool supports_sse() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_SSE 1
+inline bool supports_sse() { return sse_private::sse_enabled; }
+#else
+inline bool supports_sse() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE2)
+#  define MOZILLA_MAY_SUPPORT_SSE2 1
+inline bool supports_sse2() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_SSE2 1
+inline bool supports_sse2() { return sse_private::sse2_enabled; }
+#else
+inline bool supports_sse2() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE3)
+#  define MOZILLA_MAY_SUPPORT_SSE3 1
+inline bool supports_sse3() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_SSE3 1
+inline bool supports_sse3() { return sse_private::sse3_enabled; }
+#else
+inline bool supports_sse3() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSSE3)
+#  define MOZILLA_MAY_SUPPORT_SSSE3 1
+inline bool supports_ssse3() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_SSSE3 1
+inline bool supports_ssse3() { return sse_private::ssse3_enabled; }
+#else
+inline bool supports_ssse3() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE4A)
+#  define MOZILLA_MAY_SUPPORT_SSE4A 1
+inline bool supports_sse4a() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_SSE4A 1
+inline bool supports_sse4a() { return sse_private::sse4a_enabled; }
+#else
+inline bool supports_sse4a() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE4_1)
+#  define MOZILLA_MAY_SUPPORT_SSE4_1 1
+inline bool supports_sse4_1() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_SSE4_1 1
+inline bool supports_sse4_1() { return sse_private::sse4_1_enabled; }
+#else
+inline bool supports_sse4_1() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE4_2)
+#  define MOZILLA_MAY_SUPPORT_SSE4_2 1
+inline bool supports_sse4_2() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_SSE4_2 1
+inline bool supports_sse4_2() { return sse_private::sse4_2_enabled; }
+#else
+inline bool supports_sse4_2() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_AVX)
+#  define MOZILLA_MAY_SUPPORT_AVX 1
+inline bool supports_avx() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_AVX 1
+inline bool supports_avx() { return sse_private::avx_enabled; }
+#else
+inline bool supports_avx() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_AVX2)
+#  define MOZILLA_MAY_SUPPORT_AVX2 1
+inline bool supports_avx2() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_AVX2 1
+inline bool supports_avx2() { return sse_private::avx2_enabled; }
+#else
+inline bool supports_avx2() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_AES)
+#  define MOZILLA_MAY_SUPPORT_AES 1
+inline bool supports_aes() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+#  define MOZILLA_MAY_SUPPORT_AES 1
+inline bool supports_aes() { return sse_private::aes_enabled; }
+#else
+inline bool supports_aes() { return false; }
+#endif
+
+}  // namespace mozilla
+
+#endif /* !defined(mozilla_SSE_h_) */
diff --git a/mozglue/build/TsanOptions.cpp b/mozglue/build/TsanOptions.cpp
new file mode 100644
index 0000000000..160cfc12d1
--- /dev/null
+++ b/mozglue/build/TsanOptions.cpp
@@ -0,0 +1,305 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Attributes.h"
+#include "mozilla/TsanOptions.h"
+
+#ifndef _MSC_VER  // Not supported by clang-cl yet
+
+//
+// When running with ThreadSanitizer, we sometimes need to suppress existing
+// races. However, in any case, it should be either because
+//
+//    1) a bug is on file. In this case, the bug number should always be
+//       included with the suppression.
+//
+// or 2) this is an intentional race. Please be very careful with judging
+//       races as intentional and benign. Races in C++ are undefined behavior
+//       and compilers increasingly rely on exploiting this for optimizations.
+//       Hence, many seemingly benign races cause harmful or unexpected
+//       side-effects.
+//
+//       See also:
+//       https://software.intel.com/en-us/blogs/2013/01/06/benign-data-races-what-could-possibly-go-wrong
+//
+//
+// Also, when adding any race suppressions here, make sure to always add
+// a signature for each of the two race stacks. Sometimes, TSan fails to
+// symbolize one of the two traces and this can cause suppressed races to
+// show up intermittently.
+//
+// clang-format off
+extern "C" const char* __tsan_default_suppressions() {
+  return "# Add your suppressions below\n"
+
+         // External uninstrumented libraries
+         MOZ_TSAN_DEFAULT_EXTLIB_SUPPRESSIONS
+
+         // TSan internals
+         "race:__tsan::ProcessPendingSignals\n"
+         "race:__tsan::CallUserSignalHandler\n"
+
+
+
+
+
+         // Uninstrumented code causing false positives
+
+         // These libraries are uninstrumented and cause mutex false positives.
+         // However, they can be unloaded by GTK early which we cannot avoid.
+         "mutex:libGL.so\n"
+         "mutex:libGLdispatch\n"
+         "mutex:libGLX\n"
+         // Bug 1637707 - permanent
+         "mutex:libEGL_mesa.so\n"
+         // ~GLContextGLX unlocks a libGL mutex.
+         "mutex:GLContextGLX::~GLContextGLX\n"
+         // Bug 1651446 - permanent (ffmpeg)
+         "race:libavcodec.so*\n"
+         "race:libavutil.so*\n"
+         // For some reason, the suppressions on libpulse.so
+         // through `called_from_lib` only work partially.
+         "race:libpulse.so\n"
+         "race:pa_context_suspend_source_by_index\n"
+         "race:pa_context_unref\n"
+         "race:pa_format_info_set_prop_string_array\n"
+         "race:pa_stream_get_index\n"
+         "race:pa_stream_update_timing_info\n"
+         // This is a callback from libglib-2 that is apparently
+         // not fully suppressed through `called_from_lib`.
+         "race:g_main_context_dispatch\n"
+         // This is likely a false positive involving a mutex from GTK.
+         // See also bug 1642653 - permanent.
+         "mutex:GetMaiAtkType\n"
+
+
+
+
+
+         // Deadlock reports on single-threaded runtime.
+         //
+         // This is a known false positive from TSan where it reports
+         // a potential deadlock even though all mutexes are only
+         // taken by a single thread. For applications/tasks where we
+         // are absolutely sure that no second thread will be involved
+         // we should suppress these issues.
+         //
+         // See also https://github.com/google/sanitizers/issues/488
+
+         // Bug 1614605 - permanent
+         "deadlock:SanctionsTestServer\n"
+         "deadlock:OCSPStaplingServer\n"
+         // Bug 1643087 - permanent
+         "deadlock:BadCertAndPinningServer\n"
+         // Bug 1606804 - permanent
+         "deadlock:cert_storage::SecurityState::open_db\n"
+         "deadlock:cert_storage::SecurityState::add_certs\n"
+         // Bug 1651770 - permanent
+         "deadlock:mozilla::camera::LockAndDispatch\n"
+         // Bug 1606804 - permanent
+         "deadlock:third_party/rust/rkv/src/env.rs\n"
+         // Bug 1680655 - permanent
+         "deadlock:EncryptedClientHelloServer\n"
+         // Bug 1682861 - permanent
+         "deadlock:nsDOMWindowUtils::CompareCanvases\n"
+
+
+
+
+
+         // Benign races in third-party code (should be fixed upstream)
+
+         // No Bug - permanent
+         // No Upstream Bug Filed!
+         //
+         // SIMD Initialization in libjpeg, potentially runs
+         // initialization twice, but otherwise benign. Init
+         // routine itself is in native assembler.
+         "race:init_simd\n"
+         "race:simd_support\n"
+         "race:jsimd_can_ycc_rgb\n"
+         // Bug 1615228 - permanent
+         // No Upstream Bug Filed!
+         //
+         // Likely benign race in ipc/chromium/ where we set
+         // `message_loop_` to `NULL` on two threads when stopping
+         // a thread at the same time it is already finishing.
+         "race:base::Thread::Stop\n"
+         // Bug 1615569 - permanent
+         // No Upstream Bug Filed!
+         //
+         // NSS is using freebl from two different threads but freebl isn't
+         // that threadsafe.
+         "race:mp_exptmod.max_window_bits\n"
+         // Bug 1652499 - permanent
+         // No Upstream Bug Filed!
+         //
+         // Likely benign race in webrtc.org code - race while updating the
+         // minimum log severity.
+         "race:Loggable\n"
+         "race:UpdateMinLogSeverity\n"
+         // Bug 1652174 - permanent
+         // Upstream Bug: https://github.com/libevent/libevent/issues/777
+         //
+         // Likely benign write-write race in libevent to set a sticky boolean
+         // flag to true.
+         "race:event_debug_mode_too_late\n"
+         // Bug 1648606 - permanent
+         // No Upstream Bug Filed!
+         //
+         // Race on some flag being checking in libusrsctp.
+         "race:sctp_close\n"
+         "race:sctp_iterator_work\n"
+         // Bug 1653618 - permanent
+         // Upstream Bug: https://github.com/sctplab/usrsctp/issues/507
+         //
+         // Might lead to scheduled timers in libusrsctp getting dropped?
+         "race:sctp_handle_tick\n"
+         "race:sctp_handle_sack\n"
+         // Bug 1648604 - permanent
+         // Upstream Bug: https://github.com/sctplab/usrsctp/issues/482
+         //
+         // Likely benign race in libusrsctp allocator during a free.
+         "race:system_base_info\n"
+         // Bug 1153409 - permanent
+         // No Upstream Bug Filed!
+         //
+         // Probably benign - sqlite has a few optimizations where it does
+         // racy reads and then does properly synchornized integrity checks
+         // afterwards. Some concern of compiler optimizations messing this
+         // up due to "volatile" being too weak for this.
+         "race:third_party/sqlite3/*\n"
+         "deadlock:third_party/sqlite3/*\n"
+         // Bug 1674770 - permanent
+         // Upstream Bug: https://github.com/Amanieu/parking_lot/issues/257
+         //
+         // parking_lot using incorrect atomic orderings in RwLock, upstream
+         // fix already up for review.
+         "race:StrongRuleNode::ensure_child\n"
+         // No Bug - permanent
+         // Upstream Bug: https://github.com/rayon-rs/rayon/issues/812
+         //
+         // Probably a false-positive from crossbeam's deque not being
+         // understood by tsan.
+         "race:crossbeam_deque::Worker*::resize\n"
+         "race:crossbeam_deque::Worker*::push\n"
+         "race:crossbeam_deque::Buffer*::write\n"
+         "race:crossbeam_deque::Buffer*::read\n"
+
+
+
+
+
+         // The rest of these suppressions are miscellaneous issues in gecko
+         // that should be investigated and ideally fixed.
+
+         // Bug 1601600
+         "race:SkARGB32_Blitter\n"
+         "race:SkARGB32_Shader_Blitter\n"
+         "race:SkARGB32_Opaque_Blitter\n"
+         "race:SkRasterPipelineBlitter\n"
+         "race:Clamp_S32_D32_nofilter_trans_shaderproc\n"
+         "race:SkSpriteBlitter_Memcpy\n"
+
+         // Bug 1606651
+         "race:nsPluginTag::nsPluginTag\n"
+         "race:nsFakePluginTag\n"
+
+         // Bug 1606800
+         "race:CallInitFunc\n"
+
+         // Bug 1606803
+         "race:ipv6_is_present\n"
+
+         // Bug 1606864
+         "race:nsSocketTransport::Close\n"
+         "race:nsSocketTransport::OnSocketDetached\n"
+         "race:nsSocketTransport::OnMsgInputClosed\n"
+         "race:nsSocketTransport::OpenOutputStream\n"
+
+         // Bug 1615017
+         "race:CacheFileMetadata::SetHash\n"
+         "race:CacheFileMetadata::OnDataWritten\n"
+
+         // Bug 1615123
+         "race:_dl_deallocate_tls\n"
+         "race:__libc_memalign\n"
+
+         // Bug 1664535
+         "race:setNeedsIncrementalBarrier\n"
+         "race:needsIncrementalBarrier\n"
+
+         // Bug 1664803
+         "race:Sampler::sSigHandlerCoordinator\n"
+
+         // Bug 1656068
+         "race:WebRtcAec_Create\n"
+
+         // No Bug - Logging bug in Mochitests
+         "race:mochitest/ssltunnel/ssltunnel.cpp\n"
+
+         // This thread does not seem to be stopped/joined.
+         // ImageBridgeChild should be turned back into a background
+         // task queue in bug 1647628, in which case these suppressions
+         // can be removed.
+         "race:mozilla::layers::ImageBridgeChild::ShutDown\n"
+
+         // Bug 1652530
+         "mutex:XErrorTrap\n"
+
+         // Bug 1671572
+         "race:IdentifyTextureHost\n"
+         "race:GetCompositorBackendType\n"
+         "race:SupportsTextureDirectMapping\n"
+
+         // Bug 1671601
+         "race:CamerasParent::ActorDestroy\n"
+         "race:CamerasParent::DispatchToVideoCaptureThread\n"
+
+         // Bug 1623541
+         "race:VRShMem::PullSystemState\n"
+         "race:VRShMem::PushSystemState\n"
+         "race:VRShMem::PullBrowserState\n"
+         "race:VRShMem::PushBrowserState\n"
+
+         // Bug 1674776
+         "race:DocumentTimeline::GetCurrentTimeAsDuration\n"
+
+         // Bug 1674835
+         "race:nsHttpTransaction::ReadSegments\n"
+         "race:nsHttpTransaction::SecurityInfo\n"
+
+         // Bug 1680285
+         "race:style::traversal::note_children\n"
+         "race:style::matching::MatchMethods::apply_selector_flags\n"
+
+         // Bug 1607588
+         "race:nssToken_Destroy\n"
+         "race:nssSlot_GetToken\n"
+
+         // Bug 1683439
+         "race:AudioCallbackDriver::MixerCallback\n"
+         "race:AudioCallbackDriver::Init\n"
+
+         // Bug 1683417
+         "race:DataChannelConnection::SetSignals\n"
+         "race:DataChannelConnection::SetReady\n"
+
+         // Bug 1683404
+         "race:nsTimerImpl::Shutdown\n"
+         "race:nsTimerImpl::CancelImpl\n"
+
+         // Bug 1682951
+         "race:storage::Connection::Release\n"
+
+         // Bug 1683357
+         "race:image::ImageSurfaceCache::SuggestedSizeInternal\n"
+         "race:image::RasterImage::SetMetadata\n"
+
+      // End of suppressions.
+      ;  // Please keep this semicolon.
+}
+// clang-format on
+#endif  // _MSC_VER
diff --git a/mozglue/build/UbsanOptions.cpp b/mozglue/build/UbsanOptions.cpp
new file mode 100644
index 0000000000..547baa790c
--- /dev/null
+++ b/mozglue/build/UbsanOptions.cpp
@@ -0,0 +1,16 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Attributes.h"
+
+#ifndef _MSC_VER  // Not supported by clang-cl yet
+
+extern "C" const char* __ubsan_default_options() {
+  return "print_stacktrace=1";
+}
+
+extern "C" const char* __ubsan_default_suppressions() { return ""; }
+
+#endif
diff --git a/mozglue/build/arm-eabi-filter b/mozglue/build/arm-eabi-filter
new file mode 100644
index 0000000000..401454ee88
--- /dev/null
+++ b/mozglue/build/arm-eabi-filter
@@ -0,0 +1,4 @@
+{
+  local:
+    __aeabi*;
+};
diff --git a/mozglue/build/arm.cpp b/mozglue/build/arm.cpp
new file mode 100644
index 0000000000..a74a41f6c0
--- /dev/null
+++ b/mozglue/build/arm.cpp
@@ -0,0 +1,131 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use various ARM extensions */
+
+#include "arm.h"
+
+#if defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+
+// arm.h has parallel #ifs which declare MOZILLA_ARM_HAVE_CPUID_DETECTION.
+// We don't check it here so that we get compile errors if it's defined, but
+// we don't compile one of these detection methods. The detection code here is
+// based on the CPU detection in libtheora.
+
+#  if defined(__linux__) || defined(ANDROID)
+#    include <stdio.h>
+#    include <stdlib.h>
+#    include <string.h>
+
+enum {
+  MOZILLA_HAS_EDSP_FLAG = 1,
+  MOZILLA_HAS_ARMV6_FLAG = 2,
+  MOZILLA_HAS_ARMV7_FLAG = 4,
+  MOZILLA_HAS_NEON_FLAG = 8
+};
+
+static unsigned get_arm_cpu_flags(void) {
+  unsigned flags;
+  FILE* fin;
+  bool armv6_processor = false;
+  flags = 0;
+  /*Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+    Android. This also means that detection will fail in Scratchbox, which is
+    desirable, as NEON does not work in the qemu shipped with the Maemo 5 SDK.
+    I don't know if /proc/self/auxv would do any better in that case, anyway,
+    or if it would return random flags from the host CPU.*/
+  fin = fopen("/proc/cpuinfo", "r");
+  if (fin != nullptr) {
+    /*512 should be enough for anybody (it's even enough for all the flags that
+      x86 has accumulated... so far).*/
+    char buf[512];
+    while (fgets(buf, 511, fin) != nullptr) {
+      if (memcmp(buf, "Features", 8) == 0) {
+        char* p;
+        p = strstr(buf, " edsp");
+        if (p != nullptr && (p[5] == ' ' || p[5] == '\n'))
+          flags |= MOZILLA_HAS_EDSP_FLAG;
+        p = strstr(buf, " neon");
+        if (p != nullptr && (p[5] == ' ' || p[5] == '\n'))
+          flags |= MOZILLA_HAS_NEON_FLAG;
+      }
+      if (memcmp(buf, "CPU architecture:", 17) == 0) {
+        int version;
+        version = atoi(buf + 17);
+        if (version >= 6) flags |= MOZILLA_HAS_ARMV6_FLAG;
+        if (version >= 7) flags |= MOZILLA_HAS_ARMV7_FLAG;
+      }
+      /* media/webrtc/trunk/src/system_wrappers/source/cpu_features_arm.c
+       * Unfortunately, it seems that certain ARMv6-based CPUs
+       * report an incorrect architecture number of 7!
+       *
+       * We try to correct this by looking at the 'elf_format'
+       * field reported by the 'Processor' field, which is of the
+       * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for
+       * an ARMv6-one.
+       */
+      if (memcmp(buf, "Processor\t:", 11) == 0) {
+        if (strstr(buf, "(v6l)") != 0) {
+          armv6_processor = true;
+        }
+      }
+    }
+    fclose(fin);
+  }
+  if (armv6_processor) {
+    // ARMv6 pretending to be ARMv7? clear flag
+    if (flags & MOZILLA_HAS_ARMV7_FLAG) {
+      flags &= ~MOZILLA_HAS_ARMV7_FLAG;
+    }
+  }
+  return flags;
+}
+
+// Cache a local copy so we only have to read /proc/cpuinfo once.
+static unsigned arm_cpu_flags = get_arm_cpu_flags();
+
+#    if !defined(MOZILLA_PRESUME_EDSP)
+static bool check_edsp(void) {
+  return (arm_cpu_flags & MOZILLA_HAS_EDSP_FLAG) != 0;
+}
+#    endif
+
+#    if !defined(MOZILLA_PRESUME_ARMV6)
+static bool check_armv6(void) {
+  return (arm_cpu_flags & MOZILLA_HAS_ARMV6_FLAG) != 0;
+}
+#    endif
+
+#    if !defined(MOZILLA_PRESUME_ARMV7)
+static bool check_armv7(void) {
+  return (arm_cpu_flags & MOZILLA_HAS_ARMV7_FLAG) != 0;
+}
+#    endif
+
+#    if !defined(MOZILLA_PRESUME_NEON)
+static bool check_neon(void) {
+  return (arm_cpu_flags & MOZILLA_HAS_NEON_FLAG) != 0;
+}
+#    endif
+
+#  endif  // defined(__linux__) || defined(ANDROID)
+
+namespace mozilla {
+namespace arm_private {
+#  if !defined(MOZILLA_PRESUME_EDSP)
+bool edsp_enabled = check_edsp();
+#  endif
+#  if !defined(MOZILLA_PRESUME_ARMV6)
+bool armv6_enabled = check_armv6();
+#  endif
+#  if !defined(MOZILLA_PRESUME_ARMV7)
+bool armv7_enabled = check_armv7();
+#  endif
+#  if !defined(MOZILLA_PRESUME_NEON)
+bool neon_enabled = check_neon();
+#  endif
+}  // namespace arm_private
+}  // namespace mozilla
+
+#endif  // MOZILLA_ARM_HAVE_CPUID_DETECTION
diff --git a/mozglue/build/arm.h b/mozglue/build/arm.h
new file mode 100644
index 0000000000..8600329931
--- /dev/null
+++ b/mozglue/build/arm.h
@@ -0,0 +1,145 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use SSE instructions */
+
+#ifndef mozilla_arm_h_
+#define mozilla_arm_h_
+
+// for definition of MFBT_DATA
+#include "mozilla/Types.h"
+
+/* This is patterned after SSE.h, but provides ARMv5E, ARMv6, and NEON
+   detection. For reasons similar to the SSE code, code using NEON (even just
+   in inline asm) needs to be in a separate compilation unit from the regular
+   code, because it requires an ".fpu neon" directive which can't be undone.
+   ARMv5E and ARMv6 code may also require an .arch directive, since by default
+   the assembler refuses to generate code for opcodes outside of its current
+   .arch setting.
+
+   TODO: Add Thumb, Thumb2, VFP, iwMMX, etc. detection, if we need it. */
+
+#if defined(__GNUC__) && defined(__arm__)
+
+#  define MOZILLA_ARM_ARCH 3
+
+#  if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) || \
+      defined(_ARM_ARCH_4)
+#    undef MOZILLA_ARM_ARCH
+#    define MOZILLA_ARM_ARCH 4
+#  endif
+
+#  if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) ||   \
+      defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
+      defined(__ARM_ARCH_5TEJ__) || defined(_ARM_ARCH_5)
+#    undef MOZILLA_ARM_ARCH
+#    define MOZILLA_ARM_ARCH 5
+#  endif
+
+#  if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) ||    \
+      defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) ||   \
+      defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
+      defined(__ARM_ARCH_6M__) || defined(_ARM_ARCH_6)
+#    undef MOZILLA_ARM_ARCH
+#    define MOZILLA_ARM_ARCH 6
+#  endif
+
+#  if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) ||  \
+      defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
+      defined(__ARM_ARCH_7EM__) || defined(_ARM_ARCH_7)
+#    undef MOZILLA_ARM_ARCH
+#    define MOZILLA_ARM_ARCH 7
+#  endif
+
+#  ifdef __GNUC__
+#    define MOZILLA_MAY_SUPPORT_EDSP 1
+
+#    if defined(HAVE_ARM_SIMD)
+#      define MOZILLA_MAY_SUPPORT_ARMV6 1
+#    endif
+
+#    if defined(HAVE_ARM_NEON)
+#      define MOZILLA_MAY_SUPPORT_NEON 1
+#    endif
+
+#    if defined(HAVE_ARM_SIMD)
+#      define MOZILLA_MAY_SUPPORT_ARMV7 1
+#    endif
+#  endif
+
+// Currently we only have CPU detection for Linux via /proc/cpuinfo
+#  if defined(__linux__) || defined(ANDROID)
+#    define MOZILLA_ARM_HAVE_CPUID_DETECTION 1
+#  endif
+
+#endif
+
+// When using -mfpu=neon on arm gcc, or using default on aarch64,
+// the compiler generates neon instructions.
+#if defined(__ARM_NEON)
+#  define MOZILLA_PRESUME_NEON 1
+#endif
+
+namespace mozilla {
+
+namespace arm_private {
+#if defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+#  if !defined(MOZILLA_PRESUME_EDSP)
+extern bool MFBT_DATA edsp_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_ARMV6)
+extern bool MFBT_DATA armv6_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_ARMV7)
+extern bool MFBT_DATA armv7_enabled;
+#  endif
+#  if !defined(MOZILLA_PRESUME_NEON)
+extern bool MFBT_DATA neon_enabled;
+#  endif
+#endif
+}  // namespace arm_private
+
+#if defined(MOZILLA_PRESUME_EDSP)
+#  define MOZILLA_MAY_SUPPORT_EDSP 1
+inline bool supports_edsp() { return true; }
+#elif defined(MOZILLA_MAY_SUPPORT_EDSP) && \
+    defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+inline bool supports_edsp() { return arm_private::edsp_enabled; }
+#else
+inline bool supports_edsp() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_ARMV6)
+#  define MOZILLA_MAY_SUPPORT_ARMV6 1
+inline bool supports_armv6() { return true; }
+#elif defined(MOZILLA_MAY_SUPPORT_ARMV6) && \
+    defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+inline bool supports_armv6() { return arm_private::armv6_enabled; }
+#else
+inline bool supports_armv6() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_ARMV7)
+#  define MOZILLA_MAY_SUPPORT_ARMV7 1
+inline bool supports_armv7() { return true; }
+#elif defined(MOZILLA_MAY_SUPPORT_ARMV7) && \
+    defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+inline bool supports_armv7() { return arm_private::armv7_enabled; }
+#else
+inline bool supports_armv7() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_NEON)
+#  define MOZILLA_MAY_SUPPORT_NEON 1
+inline bool supports_neon() { return true; }
+#elif defined(MOZILLA_MAY_SUPPORT_NEON) && \
+    defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
+inline bool supports_neon() { return arm_private::neon_enabled; }
+#else
+inline bool supports_neon() { return false; }
+#endif
+
+}  // namespace mozilla
+
+#endif /* !defined(mozilla_arm_h_) */
diff --git a/mozglue/build/dummy.cpp b/mozglue/build/dummy.cpp
new file mode 100644
index 0000000000..c6b1ccd808
--- /dev/null
+++ b/mozglue/build/dummy.cpp
@@ -0,0 +1,5 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+void _dummy(void) {}
diff --git a/mozglue/build/mips.cpp b/mozglue/build/mips.cpp
new file mode 100644
index 0000000000..7166080f5e
--- /dev/null
+++ b/mozglue/build/mips.cpp
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use MIPS-specific extensions */
+
+#include "mips.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+enum {
+  MIPS_FLAG_LOONGSON3 = 1,
+};
+
+static unsigned get_mips_cpu_flags(void) {
+  unsigned flags = 0;
+  FILE* fin;
+
+  fin = fopen("/proc/cpuinfo", "r");
+  if (fin != nullptr) {
+    char buf[1024];
+    memset(buf, 0, sizeof(buf));
+    fread(buf, sizeof(char), sizeof(buf) - 1, fin);
+    fclose(fin);
+    if (strstr(buf, "Loongson-3")) flags |= MIPS_FLAG_LOONGSON3;
+  }
+  return flags;
+}
+
+static bool check_loongson3(void) {
+  // Cache a local copy so we only have to read /proc/cpuinfo once.
+  static unsigned mips_cpu_flags = get_mips_cpu_flags();
+  return (mips_cpu_flags & MIPS_FLAG_LOONGSON3) != 0;
+}
+
+namespace mozilla {
+namespace mips_private {
+bool isLoongson3 = check_loongson3();
+}  // namespace mips_private
+}  // namespace mozilla
diff --git a/mozglue/build/mips.h b/mozglue/build/mips.h
new file mode 100644
index 0000000000..a7c3ed5416
--- /dev/null
+++ b/mozglue/build/mips.h
@@ -0,0 +1,29 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use MIPS-specific extensions */
+
+#ifndef mozilla_mips_h_
+#define mozilla_mips_h_
+
+// for definition of MFBT_DATA
+#include "mozilla/Types.h"
+
+namespace mozilla {
+
+namespace mips_private {
+extern bool MFBT_DATA isLoongson3;
+}  // namespace mips_private
+
+inline bool supports_mmi() {
+#ifdef __mips__
+  return mips_private::isLoongson3;
+#else
+  return false;
+#endif
+}
+
+}  // namespace mozilla
+
+#endif /* !defined(mozilla_mips_h_) */
diff --git a/mozglue/build/moz.build b/mozglue/build/moz.build
new file mode 100644
index 0000000000..11c2e41e16
--- /dev/null
+++ b/mozglue/build/moz.build
@@ -0,0 +1,120 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Build mozglue as a shared lib on Windows, OSX and Android. But not for
+# embedders!
+# If this is ever changed, update MOZ_SHARED_MOZGLUE in browser/installer/Makefile.in
+if CONFIG["JS_STANDALONE"] and not CONFIG["MOZ_MEMORY"]:
+    Library("mozglue")
+elif CONFIG["OS_TARGET"] in ("WINNT", "Darwin", "Android"):
+    SharedLibrary("mozglue")
+else:
+    Library("mozglue")
+
+if CONFIG["OS_TARGET"] == "Android":
+    LDFLAGS += ["-Wl,--version-script,%s/mozglue.ver" % SRCDIR]
+    SOURCES += [
+        "BionicGlue.cpp",
+    ]
+
+if CONFIG["MOZ_ASAN"]:
+    SOURCES += [
+        "AsanOptions.cpp",
+    ]
+
+if CONFIG["MOZ_UBSAN"]:
+    SOURCES += [
+        "UbsanOptions.cpp",
+    ]
+
+if CONFIG["MOZ_TSAN"]:
+    SOURCES += [
+        "TsanOptions.cpp",
+    ]
+
+if CONFIG["OS_TARGET"] == "WINNT":
+    if CONFIG["MOZ_MEMORY"]:
+        DEFFILE = "mozglue.def"
+    # We'll break the DLL blocklist if we immediately load user32.dll.
+    # For the same reason, we delayload winmm.dll which statically links
+    # user32.dll.
+    DELAYLOAD_DLLS += [
+        "user32.dll",
+        "winmm.dll",
+    ]
+
+if CONFIG["MOZ_WIDGET_TOOLKIT"]:
+
+    if CONFIG["MOZ_MEMORY"] and FORCE_SHARED_LIB:
+        pass
+        # TODO: SHARED_LIBRARY_LIBS go here
+    else:
+        # Temporary, until bug 662814 lands
+        NoVisibilityFlags()
+        SOURCES += [
+            "dummy.cpp",
+        ]
+
+    if CONFIG["OS_TARGET"] == "WINNT":
+        LOCAL_INCLUDES += [
+            "/memory/build",
+        ]
+
+    EXPORTS.mozilla += [
+        "arm.h",
+        "mips.h",
+        "ppc.h",
+        "SSE.h",
+    ]
+
+    if CONFIG["CPU_ARCH"].startswith("x86"):
+        SOURCES += [
+            "SSE.cpp",
+        ]
+
+    if CONFIG["CPU_ARCH"] == "arm":
+        SOURCES += [
+            "arm.cpp",
+        ]
+
+    if CONFIG["CPU_ARCH"].startswith("mips"):
+        SOURCES += [
+            "mips.cpp",
+        ]
+
+    if CONFIG["CPU_ARCH"].startswith("ppc"):
+        SOURCES += [
+            "ppc.cpp",
+        ]
+
+    if CONFIG["MOZ_LINKER"]:
+        USE_LIBS += [
+            "zlib",
+        ]
+
+USE_LIBS += [
+    "mfbt",
+]
+
+LIBRARY_DEFINES["IMPL_MFBT"] = True
+LIBRARY_DEFINES["MOZ_HAS_MOZGLUE"] = True
+
+if CONFIG["OS_TARGET"] == "Darwin":
+    # On OSX 10.10.3, a dead lock happens in some cases involving dynamic
+    # symbol resolution for symbols that jemalloc itself uses. While it
+    # might be possible to find a way to avoid all such symbol resolutions,
+    # it's currently not possible because at the very least there's a call
+    # to pthread_self from tsd_init_check_recursion, which is necessary
+    # because somehow clang doesn't want to accept the __thread keyword
+    # for TLS.
+    LDFLAGS += ["-Wl,-bind_at_load"]
+
+if CONFIG["MOZ_LINKER"] and CONFIG["CPU_ARCH"] == "arm":
+    LDFLAGS += ["-Wl,-version-script,%s/arm-eabi-filter" % SRCDIR]
+
+DIST_INSTALL = True
+
+include("replace_malloc.mozbuild")
diff --git a/mozglue/build/mozglue.def b/mozglue/build/mozglue.def
new file mode 100644
index 0000000000..188037b9f1
--- /dev/null
+++ b/mozglue/build/mozglue.def
@@ -0,0 +1,22 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+LIBRARY mozglue.dll
+
+EXPORTS
+  ; symbols that are actually useful
+  malloc=je_malloc
+  calloc=je_calloc
+  realloc=je_realloc
+  free=je_free
+  posix_memalign=je_posix_memalign
+  malloc_usable_size=je_malloc_usable_size
+  malloc_good_size=je_malloc_good_size
+  _aligned_free=je_free
+  _aligned_malloc=wrap__aligned_malloc
+  strndup=wrap_strndup
+  strdup=wrap_strdup
+  _strdup=wrap_strdup
+  wcsdup=wrap_wcsdup
+  _wcsdup=wrap_wcsdup
diff --git a/mozglue/build/mozglue.dll.manifest b/mozglue/build/mozglue.dll.manifest
new file mode 100644
index 0000000000..037eae4f77
--- /dev/null
+++ b/mozglue/build/mozglue.dll.manifest
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+<assemblyIdentity
+    version="1.0.0.0"
+    name="mozglue"
+    type="win32"
+/>
+<file name="mozglue.dll"/>
+</assembly>
diff --git a/mozglue/build/mozglue.ver b/mozglue/build/mozglue.ver
new file mode 100644
index 0000000000..433d820b36
--- /dev/null
+++ b/mozglue/build/mozglue.ver
@@ -0,0 +1,4 @@
+libmozglue.so {
+global:
+  *;
+};
diff --git a/mozglue/build/ppc.cpp b/mozglue/build/ppc.cpp
new file mode 100644
index 0000000000..20ef121386
--- /dev/null
+++ b/mozglue/build/ppc.cpp
@@ -0,0 +1,64 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use Power ISA-specific
+ * extensions */
+
+#include "ppc.h"
+#include "mozilla/Unused.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(XP_LINUX)
+// Use the getauxval() function if available.
+// ARCH_3_00 wasn't defined until glibc 2.23, so include just in case.
+#  include <sys/auxv.h>
+#  ifndef PPC_FEATURE2_ARCH_3_00
+#    define PPC_FEATURE2_ARCH_3_00 0x00800000
+#  endif
+#endif
+
+const unsigned PPC_FLAG_VMX = 1;
+const unsigned PPC_FLAG_VSX = 2;
+const unsigned PPC_FLAG_VSX3 = 4;
+
+static signed get_ppc_cpu_flags(void) {
+  // This could be expensive, so cache the result.
+  static signed cpu_flags = -1;
+
+  if (cpu_flags > -1) {  // already checked
+    return cpu_flags;
+  }
+  cpu_flags = 0;
+
+#if defined(XP_LINUX)
+  // Try getauxval().
+  unsigned long int cap = getauxval(AT_HWCAP);
+  unsigned long int cap2 = getauxval(AT_HWCAP2);
+
+  if (cap & PPC_FEATURE_HAS_ALTIVEC) {
+    cpu_flags |= PPC_FLAG_VMX;
+  }
+  if (cap & PPC_FEATURE_HAS_VSX) {
+    cpu_flags |= PPC_FLAG_VSX;
+  }
+  if (cap2 & PPC_FEATURE2_ARCH_3_00) {
+    cpu_flags |= PPC_FLAG_VSX3;
+  }
+#else
+  // Non-Linux detection here. Currently, on systems other than Linux,
+  // no CPU SIMD features will be detected.
+#endif
+
+  return cpu_flags;
+}
+
+namespace mozilla {
+namespace ppc_private {
+bool vmx_enabled = !!(get_ppc_cpu_flags() & PPC_FLAG_VMX);
+bool vsx_enabled = !!(get_ppc_cpu_flags() & PPC_FLAG_VSX);
+bool vsx3_enabled = !!(get_ppc_cpu_flags() & PPC_FLAG_VSX3);
+}  // namespace ppc_private
+}  // namespace mozilla
diff --git a/mozglue/build/ppc.h b/mozglue/build/ppc.h
new file mode 100644
index 0000000000..9527d8dadd
--- /dev/null
+++ b/mozglue/build/ppc.h
@@ -0,0 +1,47 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use Power ISA-specific
+   extensions */
+
+#ifndef mozilla_ppc_h_
+#define mozilla_ppc_h_
+
+// for definition of MFBT_DATA
+#include "mozilla/Types.h"
+
+namespace mozilla {
+namespace ppc_private {
+extern bool MFBT_DATA vmx_enabled;
+extern bool MFBT_DATA vsx_enabled;
+extern bool MFBT_DATA vsx3_enabled;
+}  // namespace ppc_private
+
+inline bool supports_vmx() {
+#ifdef __powerpc__
+  return ppc_private::vmx_enabled;
+#else
+  return false;
+#endif
+}
+
+inline bool supports_vsx() {
+#ifdef __powerpc__
+  return ppc_private::vsx_enabled;
+#else
+  return false;
+#endif
+}
+
+inline bool supports_vsx3() {
+#ifdef __powerpc__
+  return ppc_private::vsx3_enabled;
+#else
+  return false;
+#endif
+}
+
+}  // namespace mozilla
+
+#endif /* !defined(mozilla_ppc_h_) */
diff --git a/mozglue/build/replace_malloc.mozbuild b/mozglue/build/replace_malloc.mozbuild
new file mode 100644
index 0000000000..16c8b7271f
--- /dev/null
+++ b/mozglue/build/replace_malloc.mozbuild
@@ -0,0 +1,6 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG['OS_TARGET'] == 'Darwin' and CONFIG['MOZ_REPLACE_MALLOC']:
+    LDFLAGS += ['-Wl,-U,_replace_init']
diff --git a/mozglue/dllservices/Authenticode.cpp b/mozglue/dllservices/Authenticode.cpp
new file mode 100644
index 0000000000..55ce487f20
--- /dev/null
+++ b/mozglue/dllservices/Authenticode.cpp
@@ -0,0 +1,432 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// We need Windows 8 functions and structures to be able to verify SHA-256.
+#if defined(_WIN32_WINNT)
+#  undef _WIN32_WINNT
+#  define _WIN32_WINNT _WIN32_WINNT_WIN8
+#endif  // defined(_WIN32_WINNT)
+#if defined(NTDDI_VERSION)
+#  undef NTDDI_VERSION
+#  define NTDDI_VERSION NTDDI_WIN8
+#endif  // defined(NTDDI_VERSION)
+
+#include "Authenticode.h"
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/DynamicallyLinkedFunctionPtr.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/WindowsVersion.h"
+#include "nsWindowsHelpers.h"
+
+#include <windows.h>
+#include <softpub.h>
+#include <wincrypt.h>
+#include <wintrust.h>
+#include <mscat.h>
+
+#include <string.h>
+
+namespace {
+
+struct CertStoreDeleter {
+  typedef HCERTSTORE pointer;
+  void operator()(pointer aStore) { ::CertCloseStore(aStore, 0); }
+};
+
+struct CryptMsgDeleter {
+  typedef HCRYPTMSG pointer;
+  void operator()(pointer aMsg) { ::CryptMsgClose(aMsg); }
+};
+
+struct CertContextDeleter {
+  void operator()(PCCERT_CONTEXT aCertContext) {
+    ::CertFreeCertificateContext(aCertContext);
+  }
+};
+
+struct CATAdminContextDeleter {
+  typedef HCATADMIN pointer;
+  void operator()(pointer aCtx) {
+    static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+        &::CryptCATAdminReleaseContext)>
+        pCryptCATAdminReleaseContext(L"wintrust.dll",
+                                     "CryptCATAdminReleaseContext");
+
+    MOZ_ASSERT(!!pCryptCATAdminReleaseContext);
+    if (!pCryptCATAdminReleaseContext) {
+      return;
+    }
+
+    pCryptCATAdminReleaseContext(aCtx, 0);
+  }
+};
+
+typedef mozilla::UniquePtr<HCERTSTORE, CertStoreDeleter> CertStoreUniquePtr;
+typedef mozilla::UniquePtr<HCRYPTMSG, CryptMsgDeleter> CryptMsgUniquePtr;
+typedef mozilla::UniquePtr<const CERT_CONTEXT, CertContextDeleter>
+    CertContextUniquePtr;
+typedef mozilla::UniquePtr<HCATADMIN, CATAdminContextDeleter>
+    CATAdminContextUniquePtr;
+
+static const DWORD kEncodingTypes = X509_ASN_ENCODING | PKCS_7_ASN_ENCODING;
+
+class SignedBinary final {
+ public:
+  SignedBinary(const wchar_t* aFilePath, mozilla::AuthenticodeFlags aFlags);
+
+  explicit operator bool() const { return mCertStore && mCryptMsg && mCertCtx; }
+
+  mozilla::UniquePtr<wchar_t[]> GetOrgName();
+
+  SignedBinary(const SignedBinary&) = delete;
+  SignedBinary(SignedBinary&&) = delete;
+  SignedBinary& operator=(const SignedBinary&) = delete;
+  SignedBinary& operator=(SignedBinary&&) = delete;
+
+ private:
+  bool VerifySignature(const wchar_t* aFilePath);
+  bool QueryObject(const wchar_t* aFilePath);
+  static bool VerifySignatureInternal(WINTRUST_DATA& aTrustData);
+
+ private:
+  enum class TrustSource { eNone, eEmbedded, eCatalog };
+
+ private:
+  const mozilla::AuthenticodeFlags mFlags;
+  TrustSource mTrustSource;
+  CertStoreUniquePtr mCertStore;
+  CryptMsgUniquePtr mCryptMsg;
+  CertContextUniquePtr mCertCtx;
+};
+
+SignedBinary::SignedBinary(const wchar_t* aFilePath,
+                           mozilla::AuthenticodeFlags aFlags)
+    : mFlags(aFlags), mTrustSource(TrustSource::eNone) {
+  if (!VerifySignature(aFilePath)) {
+    return;
+  }
+
+  DWORD certInfoLen = 0;
+  BOOL ok = CryptMsgGetParam(mCryptMsg.get(), CMSG_SIGNER_CERT_INFO_PARAM, 0,
+                             nullptr, &certInfoLen);
+  if (!ok) {
+    return;
+  }
+
+  auto certInfoBuf = mozilla::MakeUnique<char[]>(certInfoLen);
+
+  ok = CryptMsgGetParam(mCryptMsg.get(), CMSG_SIGNER_CERT_INFO_PARAM, 0,
+                        certInfoBuf.get(), &certInfoLen);
+  if (!ok) {
+    return;
+  }
+
+  auto certInfo = reinterpret_cast<CERT_INFO*>(certInfoBuf.get());
+
+  PCCERT_CONTEXT certCtx =
+      CertFindCertificateInStore(mCertStore.get(), kEncodingTypes, 0,
+                                 CERT_FIND_SUBJECT_CERT, certInfo, nullptr);
+  if (!certCtx) {
+    return;
+  }
+
+  mCertCtx.reset(certCtx);
+}
+
+bool SignedBinary::QueryObject(const wchar_t* aFilePath) {
+  DWORD encodingType, contentType, formatType;
+  HCERTSTORE rawCertStore;
+  HCRYPTMSG rawCryptMsg;
+  BOOL result = ::CryptQueryObject(CERT_QUERY_OBJECT_FILE, aFilePath,
+                                   CERT_QUERY_CONTENT_FLAG_PKCS7_SIGNED_EMBED,
+                                   CERT_QUERY_FORMAT_FLAG_BINARY, 0,
+                                   &encodingType, &contentType, &formatType,
+                                   &rawCertStore, &rawCryptMsg, nullptr);
+  if (!result) {
+    return false;
+  }
+
+  mCertStore.reset(rawCertStore);
+  mCryptMsg.reset(rawCryptMsg);
+
+  return true;
+}
+
+/**
+ * @param aTrustData must be a WINTRUST_DATA structure that has been zeroed out
+ *                   and then populated at least with its |cbStruct|,
+ *                   |dwUnionChoice|, and appropriate union field. This function
+ *                   will then populate the remaining fields as appropriate.
+ */
+/* static */
+bool SignedBinary::VerifySignatureInternal(WINTRUST_DATA& aTrustData) {
+  aTrustData.dwUIChoice = WTD_UI_NONE;
+  aTrustData.fdwRevocationChecks = WTD_REVOKE_NONE;
+  aTrustData.dwStateAction = WTD_STATEACTION_VERIFY;
+  aTrustData.dwProvFlags = WTD_CACHE_ONLY_URL_RETRIEVAL;
+
+  const HWND hwnd = (HWND)INVALID_HANDLE_VALUE;
+  GUID policyGUID = WINTRUST_ACTION_GENERIC_VERIFY_V2;
+  LONG result = ::WinVerifyTrust(hwnd, &policyGUID, &aTrustData);
+
+  aTrustData.dwStateAction = WTD_STATEACTION_CLOSE;
+  ::WinVerifyTrust(hwnd, &policyGUID, &aTrustData);
+
+  return result == ERROR_SUCCESS;
+}
+
+bool SignedBinary::VerifySignature(const wchar_t* aFilePath) {
+  // First, try the binary itself
+  if (QueryObject(aFilePath)) {
+    mTrustSource = TrustSource::eEmbedded;
+    if (mFlags & mozilla::AuthenticodeFlags::SkipTrustVerification) {
+      return true;
+    }
+
+    WINTRUST_FILE_INFO fileInfo = {sizeof(fileInfo)};
+    fileInfo.pcwszFilePath = aFilePath;
+
+    WINTRUST_DATA trustData = {sizeof(trustData)};
+    trustData.dwUnionChoice = WTD_CHOICE_FILE;
+    trustData.pFile = &fileInfo;
+
+    return VerifySignatureInternal(trustData);
+  }
+
+  // We didn't find anything in the binary, so now try a catalog file.
+
+  // First, we open a catalog admin context.
+  HCATADMIN rawCatAdmin;
+
+  // Windows 7 also exports the CryptCATAdminAcquireContext2 API, but it does
+  // *not* sign its binaries with SHA-256, so we use the old API in that case.
+  if (mozilla::IsWin8OrLater()) {
+    static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+        &::CryptCATAdminAcquireContext2)>
+        pCryptCATAdminAcquireContext2(L"wintrust.dll",
+                                      "CryptCATAdminAcquireContext2");
+    if (!pCryptCATAdminAcquireContext2) {
+      return false;
+    }
+
+    CERT_STRONG_SIGN_PARA policy = {sizeof(policy)};
+    policy.dwInfoChoice = CERT_STRONG_SIGN_OID_INFO_CHOICE;
+    policy.pszOID = const_cast<char*>(
+        szOID_CERT_STRONG_SIGN_OS_CURRENT);  // -Wwritable-strings
+
+    if (!pCryptCATAdminAcquireContext2(&rawCatAdmin, nullptr,
+                                       BCRYPT_SHA256_ALGORITHM, &policy, 0)) {
+      return false;
+    }
+  } else {
+    static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+        &::CryptCATAdminAcquireContext)>
+        pCryptCATAdminAcquireContext(L"wintrust.dll",
+                                     "CryptCATAdminAcquireContext");
+
+    if (!pCryptCATAdminAcquireContext ||
+        !pCryptCATAdminAcquireContext(&rawCatAdmin, nullptr, 0)) {
+      return false;
+    }
+  }
+
+  CATAdminContextUniquePtr catAdmin(rawCatAdmin);
+
+  // Now we need to hash the file at aFilePath.
+  // Since we're hashing this file, let's open it with a sequential scan hint.
+  HANDLE rawFile =
+      ::CreateFileW(aFilePath, GENERIC_READ,
+                    FILE_SHARE_READ | FILE_SHARE_DELETE | FILE_SHARE_WRITE,
+                    nullptr, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, nullptr);
+  if (rawFile == INVALID_HANDLE_VALUE) {
+    return false;
+  }
+
+  nsAutoHandle file(rawFile);
+  DWORD hashLen = 0;
+  mozilla::UniquePtr<BYTE[]> hashBuf;
+
+  static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+      &::CryptCATAdminCalcHashFromFileHandle2)>
+      pCryptCATAdminCalcHashFromFileHandle2(
+          L"wintrust.dll", "CryptCATAdminCalcHashFromFileHandle2");
+  if (pCryptCATAdminCalcHashFromFileHandle2) {
+    if (!pCryptCATAdminCalcHashFromFileHandle2(rawCatAdmin, rawFile, &hashLen,
+                                               nullptr, 0) &&
+        ::GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+      return false;
+    }
+
+    hashBuf = mozilla::MakeUnique<BYTE[]>(hashLen);
+
+    if (!pCryptCATAdminCalcHashFromFileHandle2(rawCatAdmin, rawFile, &hashLen,
+                                               hashBuf.get(), 0)) {
+      return false;
+    }
+  } else {
+    static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+        &::CryptCATAdminCalcHashFromFileHandle)>
+        pCryptCATAdminCalcHashFromFileHandle(
+            L"wintrust.dll", "CryptCATAdminCalcHashFromFileHandle");
+
+    if (!pCryptCATAdminCalcHashFromFileHandle) {
+      return false;
+    }
+
+    if (!pCryptCATAdminCalcHashFromFileHandle(rawFile, &hashLen, nullptr, 0) &&
+        ::GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+      return false;
+    }
+
+    hashBuf = mozilla::MakeUnique<BYTE[]>(hashLen);
+
+    if (!pCryptCATAdminCalcHashFromFileHandle(rawFile, &hashLen, hashBuf.get(),
+                                              0)) {
+      return false;
+    }
+  }
+
+  // Now that we've hashed the file, query the catalog system to see if any
+  // catalogs reference a binary with our hash.
+
+  static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+      &::CryptCATAdminEnumCatalogFromHash)>
+      pCryptCATAdminEnumCatalogFromHash(L"wintrust.dll",
+                                        "CryptCATAdminEnumCatalogFromHash");
+  if (!pCryptCATAdminEnumCatalogFromHash) {
+    return false;
+  }
+
+  static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+      &::CryptCATAdminReleaseCatalogContext)>
+      pCryptCATAdminReleaseCatalogContext(L"wintrust.dll",
+                                          "CryptCATAdminReleaseCatalogContext");
+  if (!pCryptCATAdminReleaseCatalogContext) {
+    return false;
+  }
+
+  HCATINFO catInfoHdl = pCryptCATAdminEnumCatalogFromHash(
+      rawCatAdmin, hashBuf.get(), hashLen, 0, nullptr);
+  if (!catInfoHdl) {
+    return false;
+  }
+
+  // We can't use UniquePtr for this because the deleter function requires two
+  // parameters.
+  auto cleanCatInfoHdl =
+      mozilla::MakeScopeExit([rawCatAdmin, catInfoHdl]() -> void {
+        pCryptCATAdminReleaseCatalogContext(rawCatAdmin, catInfoHdl, 0);
+      });
+
+  // We found a catalog! Now query for the path to the catalog file.
+
+  static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+      &::CryptCATCatalogInfoFromContext)>
+      pCryptCATCatalogInfoFromContext(L"wintrust.dll",
+                                      "CryptCATCatalogInfoFromContext");
+  if (!pCryptCATCatalogInfoFromContext) {
+    return false;
+  }
+
+  CATALOG_INFO_ catInfo = {sizeof(catInfo)};
+  if (!pCryptCATCatalogInfoFromContext(catInfoHdl, &catInfo, 0)) {
+    return false;
+  }
+
+  if (!QueryObject(catInfo.wszCatalogFile)) {
+    return false;
+  }
+
+  mTrustSource = TrustSource::eCatalog;
+
+  if (mFlags & mozilla::AuthenticodeFlags::SkipTrustVerification) {
+    return true;
+  }
+
+  // WINTRUST_CATALOG_INFO::pcwszMemberTag is commonly set to the string
+  // representation of the file hash, so we build that here.
+
+  DWORD strHashBufLen = (hashLen * 2) + 1;
+  auto strHashBuf = mozilla::MakeUnique<wchar_t[]>(strHashBufLen);
+  if (!::CryptBinaryToStringW(hashBuf.get(), hashLen,
+                              CRYPT_STRING_HEXRAW | CRYPT_STRING_NOCRLF,
+                              strHashBuf.get(), &strHashBufLen)) {
+    return false;
+  }
+
+  // Ensure that the tag is uppercase for WinVerifyTrust
+  // NB: CryptBinaryToStringW overwrites strHashBufLen with the length excluding
+  //     the null terminator, so we need to add it back for this call.
+  if (_wcsupr_s(strHashBuf.get(), strHashBufLen + 1)) {
+    return false;
+  }
+
+  // Now, given the path to the catalog, and the path to the member (ie, the
+  // binary whose hash we are validating), we may now validate. If the
+  // validation is successful, we then QueryObject on the *catalog file*
+  // instead of the binary.
+
+  WINTRUST_CATALOG_INFO wtCatInfo = {sizeof(wtCatInfo)};
+  wtCatInfo.pcwszCatalogFilePath = catInfo.wszCatalogFile;
+  wtCatInfo.pcwszMemberTag = strHashBuf.get();
+  wtCatInfo.pcwszMemberFilePath = aFilePath;
+  wtCatInfo.hMemberFile = rawFile;
+  if (mozilla::IsWin8OrLater()) {
+    wtCatInfo.hCatAdmin = rawCatAdmin;
+  }
+
+  WINTRUST_DATA trustData = {sizeof(trustData)};
+  trustData.dwUnionChoice = WTD_CHOICE_CATALOG;
+  trustData.pCatalog = &wtCatInfo;
+
+  return VerifySignatureInternal(trustData);
+}
+
+mozilla::UniquePtr<wchar_t[]> SignedBinary::GetOrgName() {
+  DWORD charCount = CertGetNameStringW(
+      mCertCtx.get(), CERT_NAME_SIMPLE_DISPLAY_TYPE, 0, nullptr, nullptr, 0);
+  if (charCount <= 1) {
+    // Not found
+    return nullptr;
+  }
+
+  auto result = mozilla::MakeUnique<wchar_t[]>(charCount);
+  charCount = CertGetNameStringW(mCertCtx.get(), CERT_NAME_SIMPLE_DISPLAY_TYPE,
+                                 0, nullptr, result.get(), charCount);
+  MOZ_ASSERT(charCount > 1);
+
+  return result;
+}
+
+}  // anonymous namespace
+
+namespace mozilla {
+
+class AuthenticodeImpl : public Authenticode {
+ public:
+  virtual UniquePtr<wchar_t[]> GetBinaryOrgName(
+      const wchar_t* aFilePath,
+      AuthenticodeFlags aFlags = AuthenticodeFlags::Default) override;
+};
+
+UniquePtr<wchar_t[]> AuthenticodeImpl::GetBinaryOrgName(
+    const wchar_t* aFilePath, AuthenticodeFlags aFlags) {
+  SignedBinary bin(aFilePath, aFlags);
+  if (!bin) {
+    return nullptr;
+  }
+
+  return bin.GetOrgName();
+}
+
+static AuthenticodeImpl sAuthenticodeImpl;
+
+Authenticode* GetAuthenticode() { return &sAuthenticodeImpl; }
+
+}  // namespace mozilla
diff --git a/mozglue/dllservices/Authenticode.h b/mozglue/dllservices/Authenticode.h
new file mode 100644
index 0000000000..182512da2c
--- /dev/null
+++ b/mozglue/dllservices/Authenticode.h
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_Authenticode_h
+#define mozilla_Authenticode_h
+
+#include "mozilla/Maybe.h"
+#include "mozilla/TypedEnumBits.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+
+enum class AuthenticodeFlags : uint32_t {
+  Default = 0,
+  SkipTrustVerification = 1,
+};
+
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(AuthenticodeFlags)
+
+class Authenticode {
+ public:
+  virtual UniquePtr<wchar_t[]> GetBinaryOrgName(
+      const wchar_t* aFilePath,
+      AuthenticodeFlags aFlags = AuthenticodeFlags::Default) = 0;
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_Authenticode_h
diff --git a/mozglue/dllservices/LoaderAPIInterfaces.h b/mozglue/dllservices/LoaderAPIInterfaces.h
new file mode 100644
index 0000000000..4546cf79bd
--- /dev/null
+++ b/mozglue/dllservices/LoaderAPIInterfaces.h
@@ -0,0 +1,120 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_LoaderAPIInterfaces_h
+#define mozilla_LoaderAPIInterfaces_h
+
+#include "nscore.h"
+#include "mozilla/ModuleLoadInfo.h"
+
+namespace mozilla {
+namespace nt {
+
+class NS_NO_VTABLE LoaderObserver {
+ public:
+  /**
+   * Notification that a DLL load has begun.
+   *
+   * @param aContext Outparam that allows this observer to store any context
+   *                 information pertaining to the current load.
+   * @param aRequestedDllName The DLL name requested by whatever invoked the
+   *                          loader. This name may not match the effective
+   *                          name of the DLL once the loader has completed
+   *                          its path search.
+   */
+  virtual void OnBeginDllLoad(void** aContext,
+                              PCUNICODE_STRING aRequestedDllName) = 0;
+
+  /**
+   * Query the observer to determine whether the DLL named |aLSPLeafName| needs
+   * to be substituted with another module, and substitute the module handle
+   * when necessary.
+   *
+   * @return true when substitution occurs, otherwise false
+   */
+  virtual bool SubstituteForLSP(PCUNICODE_STRING aLSPLeafName,
+                                PHANDLE aOutHandle) = 0;
+
+  /**
+   * Notification that a DLL load has ended.
+   *
+   * @param aContext The context that was set by the corresponding call to
+   *                 OnBeginDllLoad
+   * @param aNtStatus The NTSTATUS returned by LdrLoadDll
+   * @param aModuleLoadInfo Telemetry information that was gathered about the
+   *                        load.
+   */
+  virtual void OnEndDllLoad(void* aContext, NTSTATUS aNtStatus,
+                            ModuleLoadInfo&& aModuleLoadInfo) = 0;
+
+  /**
+   * Called to inform the observer that it is no longer active and, if
+   * necessary, call aNext->OnForward() with any accumulated telemetry
+   * information.
+   */
+  virtual void Forward(LoaderObserver* aNext) = 0;
+
+  /**
+   * Receives a vector of module load telemetry from a previous LoaderObserver.
+   */
+  virtual void OnForward(ModuleLoadInfoVec&& aInfo) = 0;
+};
+
+class NS_NO_VTABLE LoaderAPI {
+ public:
+  /**
+   * Construct a new ModuleLoadInfo structure and notify the LoaderObserver
+   * that a library load is beginning.
+   */
+  virtual ModuleLoadInfo ConstructAndNotifyBeginDllLoad(
+      void** aContext, PCUNICODE_STRING aRequestedDllName) = 0;
+
+  /**
+   * Query to determine whether the DLL named |aLSPLeafName| needs to be
+   * substituted with another module, and substitute the module handle when
+   * necessary.
+   *
+   * @return true when substitution occurs, otherwise false
+   */
+  virtual bool SubstituteForLSP(PCUNICODE_STRING aLSPLeafName,
+                                PHANDLE aOutHandle) = 0;
+
+  /**
+   * Notification that a DLL load has ended.
+   */
+  virtual void NotifyEndDllLoad(void* aContext, NTSTATUS aLoadNtStatus,
+                                ModuleLoadInfo&& aModuleLoadInfo) = 0;
+
+  /**
+   * Given the address of a mapped section, obtain the name of the file that is
+   * backing it.
+   */
+  virtual AllocatedUnicodeString GetSectionName(void* aSectionAddr) = 0;
+
+  using InitDllBlocklistOOPFnPtr = LauncherVoidResultWithLineInfo (*)(
+      const wchar_t*, HANDLE, const IMAGE_THUNK_DATA*);
+  using HandleLauncherErrorFnPtr = void (*)(const LauncherError&, const char*);
+
+  /**
+   * Return a pointer to winlauncher's function.
+   * Used by sandboxBroker::LaunchApp.
+   */
+  virtual InitDllBlocklistOOPFnPtr GetDllBlocklistInitFn() = 0;
+  virtual HandleLauncherErrorFnPtr GetHandleLauncherErrorFn() = 0;
+};
+
+struct WinLauncherFunctions final {
+  nt::LoaderAPI::InitDllBlocklistOOPFnPtr mInitDllBlocklistOOP;
+  nt::LoaderAPI::HandleLauncherErrorFnPtr mHandleLauncherError;
+
+  WinLauncherFunctions()
+      : mInitDllBlocklistOOP(nullptr), mHandleLauncherError(nullptr) {}
+};
+
+}  // namespace nt
+}  // namespace mozilla
+
+#endif  // mozilla_LoaderAPIInterfaces_h
diff --git a/mozglue/dllservices/LoaderObserver.cpp b/mozglue/dllservices/LoaderObserver.cpp
new file mode 100644
index 0000000000..9c4fa53d3b
--- /dev/null
+++ b/mozglue/dllservices/LoaderObserver.cpp
@@ -0,0 +1,165 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LoaderObserver.h"
+
+#include "mozilla/AutoProfilerLabel.h"
+#include "mozilla/BaseProfilerMarkers.h"
+#include "mozilla/glue/WindowsUnicode.h"
+#include "mozilla/StackWalk_windows.h"
+
+namespace {
+
+struct LoadContext {
+  LoadContext(mozilla::ProfilerLabel&& aLabel,
+              mozilla::UniquePtr<char[]>&& aDynamicStringStorage)
+      : mProfilerLabel(std::move(aLabel)),
+        mDynamicStringStorage(std::move(aDynamicStringStorage)),
+        mStartTime(mozilla::TimeStamp::Now()) {}
+  mozilla::ProfilerLabel mProfilerLabel;
+  mozilla::UniquePtr<char[]> mDynamicStringStorage;
+  mozilla::TimeStamp mStartTime;
+};
+
+}  // anonymous namespace
+
+namespace mozilla {
+
+extern glue::Win32SRWLock gDllServicesLock;
+extern glue::detail::DllServicesBase* gDllServices;
+
+namespace glue {
+
+void LoaderObserver::OnBeginDllLoad(void** aContext,
+                                    PCUNICODE_STRING aRequestedDllName) {
+  MOZ_ASSERT(aContext);
+  if (IsProfilerPresent()) {
+    UniquePtr<char[]> utf8RequestedDllName(WideToUTF8(aRequestedDllName));
+    const char* dynamicString = utf8RequestedDllName.get();
+    *aContext = new LoadContext(
+        ProfilerLabelBegin("mozilla::glue::LoaderObserver::OnBeginDllLoad",
+                           dynamicString, &aContext),
+        std::move(utf8RequestedDllName));
+  }
+
+#ifdef _M_AMD64
+  // Prevent the stack walker from suspending this thread when LdrLoadDll
+  // holds the RtlLookupFunctionEntry lock.
+  SuppressStackWalking();
+#endif
+}
+
+bool LoaderObserver::SubstituteForLSP(PCUNICODE_STRING aLSPLeafName,
+                                      PHANDLE aOutHandle) {
+  // Currently unsupported
+  return false;
+}
+
+void LoaderObserver::OnEndDllLoad(void* aContext, NTSTATUS aNtStatus,
+                                  ModuleLoadInfo&& aModuleLoadInfo) {
+#ifdef _M_AMD64
+  DesuppressStackWalking();
+#endif
+
+  UniquePtr<LoadContext> loadContext(static_cast<LoadContext*>(aContext));
+  if (loadContext && IsValidProfilerLabel(loadContext->mProfilerLabel)) {
+    ProfilerLabelEnd(loadContext->mProfilerLabel);
+    BASE_PROFILER_MARKER_TEXT(
+        "DllLoad", OTHER,
+        MarkerTiming::IntervalUntilNowFrom(loadContext->mStartTime),
+        mozilla::ProfilerString8View::WrapNullTerminatedString(
+            loadContext->mDynamicStringStorage.get()));
+  }
+
+  // We want to record a denied DLL load regardless of |aNtStatus| because
+  // |aNtStatus| is set to access-denied when DLL load was blocked.
+  if ((!NT_SUCCESS(aNtStatus) && !aModuleLoadInfo.WasDenied()) ||
+      !aModuleLoadInfo.WasMapped()) {
+    return;
+  }
+
+  {  // Scope for lock
+    AutoSharedLock lock(gDllServicesLock);
+    if (gDllServices) {
+      gDllServices->DispatchDllLoadNotification(std::move(aModuleLoadInfo));
+      return;
+    }
+  }
+
+  // No dll services, save for later
+  AutoExclusiveLock lock(mLock);
+  if (!mEnabled) {
+    return;
+  }
+
+  if (!mModuleLoads) {
+    mModuleLoads = new ModuleLoadInfoVec();
+  }
+
+  Unused << mModuleLoads->emplaceBack(
+      std::forward<ModuleLoadInfo>(aModuleLoadInfo));
+}
+
+void LoaderObserver::Forward(nt::LoaderObserver* aNext) {
+  MOZ_ASSERT_UNREACHABLE(
+      "This implementation does not forward to any more "
+      "nt::LoaderObserver objects");
+}
+
+void LoaderObserver::Forward(detail::DllServicesBase* aNext) {
+  MOZ_ASSERT(aNext);
+  if (!aNext) {
+    return;
+  }
+
+  ModuleLoadInfoVec* moduleLoads = nullptr;
+
+  {  // Scope for lock
+    AutoExclusiveLock lock(mLock);
+    moduleLoads = mModuleLoads;
+    mModuleLoads = nullptr;
+  }
+
+  if (!moduleLoads) {
+    return;
+  }
+
+  aNext->DispatchModuleLoadBacklogNotification(std::move(*moduleLoads));
+  delete moduleLoads;
+}
+
+void LoaderObserver::Disable() {
+  ModuleLoadInfoVec* moduleLoads = nullptr;
+
+  {  // Scope for lock
+    AutoExclusiveLock lock(mLock);
+    moduleLoads = mModuleLoads;
+    mModuleLoads = nullptr;
+    mEnabled = false;
+  }
+
+  delete moduleLoads;
+}
+
+void LoaderObserver::OnForward(ModuleLoadInfoVec&& aInfo) {
+  AutoExclusiveLock lock(mLock);
+  if (!mModuleLoads) {
+    mModuleLoads = new ModuleLoadInfoVec();
+  }
+
+  MOZ_ASSERT(mModuleLoads->empty());
+  if (mModuleLoads->empty()) {
+    *mModuleLoads = std::move(aInfo);
+  } else {
+    // This should not happen, but we can handle it
+    for (auto&& item : aInfo) {
+      Unused << mModuleLoads->append(std::move(item));
+    }
+  }
+}
+
+}  // namespace glue
+}  // namespace mozilla
diff --git a/mozglue/dllservices/LoaderObserver.h b/mozglue/dllservices/LoaderObserver.h
new file mode 100644
index 0000000000..39b13035a3
--- /dev/null
+++ b/mozglue/dllservices/LoaderObserver.h
@@ -0,0 +1,45 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_glue_LoaderObserver_h
+#define mozilla_glue_LoaderObserver_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/LoaderAPIInterfaces.h"
+#include "mozilla/glue/WindowsDllServices.h"
+#include "mozilla/glue/WinUtils.h"
+#include "mozilla/UniquePtr.h"
+
+namespace mozilla {
+namespace glue {
+
+class MOZ_ONLY_USED_TO_AVOID_STATIC_CONSTRUCTORS LoaderObserver final
+    : public nt::LoaderObserver {
+ public:
+  constexpr LoaderObserver() : mModuleLoads(nullptr), mEnabled(true) {}
+
+  void OnBeginDllLoad(void** aContext,
+                      PCUNICODE_STRING aPreliminaryDllName) final;
+  bool SubstituteForLSP(PCUNICODE_STRING aLspLeafName,
+                        PHANDLE aOutHandle) final;
+  void OnEndDllLoad(void* aContext, NTSTATUS aNtStatus,
+                    ModuleLoadInfo&& aModuleLoadInfo) final;
+  void Forward(nt::LoaderObserver* aNext) final;
+  void OnForward(ModuleLoadInfoVec&& aInfo) final;
+
+  void Forward(mozilla::glue::detail::DllServicesBase* aSvc);
+  void Disable();
+
+ private:
+  Win32SRWLock mLock;
+  ModuleLoadInfoVec* mModuleLoads;
+  bool mEnabled;
+};
+
+}  // namespace glue
+}  // namespace mozilla
+
+#endif  // mozilla_glue_LoaderObserver_h
diff --git a/mozglue/dllservices/ModuleLoadFrame.cpp b/mozglue/dllservices/ModuleLoadFrame.cpp
new file mode 100644
index 0000000000..ff972836c3
--- /dev/null
+++ b/mozglue/dllservices/ModuleLoadFrame.cpp
@@ -0,0 +1,105 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ModuleLoadFrame.h"
+#include "mozilla/NativeNt.h"
+#include "mozilla/UniquePtr.h"
+#include "NtLoaderAPI.h"
+
+#include <string.h>
+
+#include "WindowsFallbackLoaderAPI.h"
+
+static bool IsNullTerminated(PCUNICODE_STRING aStr) {
+  return aStr && (aStr->MaximumLength >= (aStr->Length + sizeof(WCHAR))) &&
+         aStr->Buffer && aStr->Buffer[aStr->Length / sizeof(WCHAR)] == 0;
+}
+
+static mozilla::FallbackLoaderAPI gFallbackLoaderAPI;
+
+namespace mozilla {
+namespace glue {
+
+nt::LoaderAPI* ModuleLoadFrame::sLoaderAPI;
+
+using GetNtLoaderAPIFn = decltype(&mozilla::GetNtLoaderAPI);
+
+/* static */
+void ModuleLoadFrame::StaticInit(
+    nt::LoaderObserver* aNewObserver,
+    nt::WinLauncherFunctions* aOutWinLauncherFunctions) {
+  const auto pGetNtLoaderAPI = reinterpret_cast<GetNtLoaderAPIFn>(
+      ::GetProcAddress(::GetModuleHandleW(nullptr), "GetNtLoaderAPI"));
+  if (!pGetNtLoaderAPI) {
+    // This case occurs in processes other than firefox.exe that do not contain
+    // the launcher process blocklist.
+    gFallbackLoaderAPI.SetObserver(aNewObserver);
+    sLoaderAPI = &gFallbackLoaderAPI;
+
+    if (aOutWinLauncherFunctions) {
+      aOutWinLauncherFunctions->mHandleLauncherError =
+          [](const mozilla::LauncherError&, const char*) {};
+      // We intentionally leave mInitDllBlocklistOOP null to make sure calling
+      // mInitDllBlocklistOOP in non-Firefox hits MOZ_RELEASE_ASSERT.
+    }
+    return;
+  }
+
+  sLoaderAPI = pGetNtLoaderAPI(aNewObserver);
+  MOZ_ASSERT(sLoaderAPI);
+
+  if (aOutWinLauncherFunctions) {
+    aOutWinLauncherFunctions->mInitDllBlocklistOOP =
+        sLoaderAPI->GetDllBlocklistInitFn();
+    aOutWinLauncherFunctions->mHandleLauncherError =
+        sLoaderAPI->GetHandleLauncherErrorFn();
+  }
+}
+
+ModuleLoadFrame::ModuleLoadFrame(PCUNICODE_STRING aRequestedDllName)
+    : mAlreadyLoaded(false),
+      mContext(nullptr),
+      mDllLoadStatus(STATUS_UNSUCCESSFUL),
+      mLoadInfo(sLoaderAPI->ConstructAndNotifyBeginDllLoad(&mContext,
+                                                           aRequestedDllName)) {
+  if (!aRequestedDllName) {
+    return;
+  }
+
+  UniquePtr<WCHAR[]> nameBuf;
+  const WCHAR* name = nullptr;
+
+  if (IsNullTerminated(aRequestedDllName)) {
+    name = aRequestedDllName->Buffer;
+  } else {
+    USHORT charLenExclNul = aRequestedDllName->Length / sizeof(WCHAR);
+    USHORT charLenInclNul = charLenExclNul + 1;
+    nameBuf = MakeUnique<WCHAR[]>(charLenInclNul);
+    if (!wcsncpy_s(nameBuf.get(), charLenInclNul, aRequestedDllName->Buffer,
+                   charLenExclNul)) {
+      name = nameBuf.get();
+    }
+  }
+
+  mAlreadyLoaded = name && !!::GetModuleHandleW(name);
+}
+
+ModuleLoadFrame::~ModuleLoadFrame() {
+  sLoaderAPI->NotifyEndDllLoad(mContext, mDllLoadStatus, std::move(mLoadInfo));
+}
+
+void ModuleLoadFrame::SetLoadStatus(NTSTATUS aNtStatus, HANDLE aHandle) {
+  mDllLoadStatus = aNtStatus;
+  void* baseAddr = mozilla::nt::PEHeaders::HModuleToBaseAddr<void*>(
+      reinterpret_cast<HMODULE>(aHandle));
+  mLoadInfo.mBaseAddr = baseAddr;
+  if (!mAlreadyLoaded) {
+    mLoadInfo.mSectionName = sLoaderAPI->GetSectionName(baseAddr);
+  }
+}
+
+}  // namespace glue
+}  // namespace mozilla
diff --git a/mozglue/dllservices/ModuleLoadFrame.h b/mozglue/dllservices/ModuleLoadFrame.h
new file mode 100644
index 0000000000..6f234f8788
--- /dev/null
+++ b/mozglue/dllservices/ModuleLoadFrame.h
@@ -0,0 +1,44 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_glue_ModuleLoadFrame_h
+#define mozilla_glue_ModuleLoadFrame_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/LoaderAPIInterfaces.h"
+
+namespace mozilla {
+namespace glue {
+
+class MOZ_RAII ModuleLoadFrame final {
+ public:
+  explicit ModuleLoadFrame(PCUNICODE_STRING aRequestedDllName);
+  ~ModuleLoadFrame();
+
+  void SetLoadStatus(NTSTATUS aNtStatus, HANDLE aHandle);
+
+  ModuleLoadFrame(const ModuleLoadFrame&) = delete;
+  ModuleLoadFrame(ModuleLoadFrame&&) = delete;
+  ModuleLoadFrame& operator=(const ModuleLoadFrame&) = delete;
+  ModuleLoadFrame& operator=(ModuleLoadFrame&&) = delete;
+
+  static void StaticInit(nt::LoaderObserver* aNewObserver,
+                         nt::WinLauncherFunctions* aOutWinLauncherFunctions);
+
+ private:
+  bool mAlreadyLoaded;
+  void* mContext;
+  NTSTATUS mDllLoadStatus;
+  ModuleLoadInfo mLoadInfo;
+
+ private:
+  static nt::LoaderAPI* sLoaderAPI;
+};
+
+}  // namespace glue
+}  // namespace mozilla
+
+#endif  // mozilla_glue_ModuleLoadFrame_h
diff --git a/mozglue/dllservices/ModuleLoadInfo.h b/mozglue/dllservices/ModuleLoadInfo.h
new file mode 100644
index 0000000000..2d9bdc0cc7
--- /dev/null
+++ b/mozglue/dllservices/ModuleLoadInfo.h
@@ -0,0 +1,173 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_ModuleLoadInfo_h
+#define mozilla_ModuleLoadInfo_h
+
+#include "mozilla/NativeNt.h"
+#include "mozilla/Vector.h"
+#include "mozilla/Unused.h"
+
+namespace mozilla {
+
+struct ModuleLoadInfo final {
+  // If you add a new value or change the meaning of the values, please
+  // update createLoadStatusElement in aboutSupport.js accordingly, which
+  // defines text labels of these enum values displayed on about:support.
+  enum class Status : uint32_t {
+    Loaded = 0,
+    Blocked,
+    Redirected,
+  };
+
+  // We do not provide these methods inside Gecko proper.
+#if !defined(MOZILLA_INTERNAL_API)
+
+  /**
+   * This constructor is for use by the LdrLoadDll hook.
+   */
+  explicit ModuleLoadInfo(PCUNICODE_STRING aRequestedDllName)
+      : mLoadTimeInfo(),
+        mThreadId(nt::RtlGetCurrentThreadId()),
+        mRequestedDllName(aRequestedDllName),
+        mBaseAddr(nullptr),
+        mStatus(Status::Loaded) {
+#  if defined(IMPL_MFBT)
+    ::QueryPerformanceCounter(&mBeginTimestamp);
+#  else
+    ::RtlQueryPerformanceCounter(&mBeginTimestamp);
+#  endif  // defined(IMPL_MFBT)
+  }
+
+  /**
+   * This constructor is used by the NtMapViewOfSection hook IF AND ONLY IF
+   * the LdrLoadDll hook did not already construct a ModuleLoadInfo for the
+   * current DLL load. This may occur while the loader is loading dependencies
+   * of another library.
+   */
+  ModuleLoadInfo(nt::AllocatedUnicodeString&& aSectionName,
+                 const void* aBaseAddr, Status aLoadStatus)
+      : mLoadTimeInfo(),
+        mThreadId(nt::RtlGetCurrentThreadId()),
+        mSectionName(std::move(aSectionName)),
+        mBaseAddr(aBaseAddr),
+        mStatus(aLoadStatus) {
+#  if defined(IMPL_MFBT)
+    ::QueryPerformanceCounter(&mBeginTimestamp);
+#  else
+    ::RtlQueryPerformanceCounter(&mBeginTimestamp);
+#  endif  // defined(IMPL_MFBT)
+  }
+
+  /**
+   * Marks the time that LdrLoadDll began loading this library.
+   */
+  void SetBeginLoadTimeStamp() {
+#  if defined(IMPL_MFBT)
+    ::QueryPerformanceCounter(&mLoadTimeInfo);
+#  else
+    ::RtlQueryPerformanceCounter(&mLoadTimeInfo);
+#  endif  // defined(IMPL_MFBT)
+  }
+
+  /**
+   * Marks the time that LdrLoadDll finished loading this library.
+   */
+  void SetEndLoadTimeStamp() {
+    LARGE_INTEGER endTimeStamp;
+#  if defined(IMPL_MFBT)
+    ::QueryPerformanceCounter(&endTimeStamp);
+#  else
+    ::RtlQueryPerformanceCounter(&endTimeStamp);
+#  endif  // defined(IMPL_MFBT)
+
+    LONGLONG& timeInfo = mLoadTimeInfo.QuadPart;
+    if (!timeInfo) {
+      return;
+    }
+
+    timeInfo = endTimeStamp.QuadPart - timeInfo;
+  }
+
+  /**
+   * Saves the current thread's call stack.
+   */
+  void CaptureBacktrace() {
+    const DWORD kMaxBacktraceSize = 512;
+
+    if (!mBacktrace.resize(kMaxBacktraceSize)) {
+      return;
+    }
+
+    // We don't use a Win32 variant here because Win32's CaptureStackBackTrace
+    // is just a macro that resolve to this function anyway.
+    WORD numCaptured = ::RtlCaptureStackBackTrace(2, kMaxBacktraceSize,
+                                                  mBacktrace.begin(), nullptr);
+    Unused << mBacktrace.resize(numCaptured);
+    // These backtraces might stick around for a while, so let's trim any
+    // excess memory.
+    mBacktrace.shrinkStorageToFit();
+  }
+
+#endif  // !defined(MOZILLA_INTERNAL_API)
+
+  ModuleLoadInfo(ModuleLoadInfo&&) = default;
+  ModuleLoadInfo& operator=(ModuleLoadInfo&&) = default;
+
+  ModuleLoadInfo() = delete;
+  ModuleLoadInfo(const ModuleLoadInfo&) = delete;
+  ModuleLoadInfo& operator=(const ModuleLoadInfo&) = delete;
+
+  /**
+   * A "bare" module load is one that was mapped without the code passing
+   * through a call to ntdll!LdrLoadDll.
+   */
+  bool IsBare() const {
+    // SetBeginLoadTimeStamp() and SetEndLoadTimeStamp() are only called by the
+    // LdrLoadDll hook, so when mLoadTimeInfo == 0, we know that we are bare.
+    return !mLoadTimeInfo.QuadPart;
+  }
+
+  /**
+   * Returns true for DLL loads where LdrLoadDll was called but
+   * NtMapViewOfSection was not. This will happen for DLL requests where the DLL
+   * was already mapped into memory by a previous request.
+   */
+  bool WasMapped() const { return !mSectionName.IsEmpty(); }
+
+  /**
+   * Returns true for DLL load which was denied by our blocklist.
+   */
+  bool WasDenied() const {
+    return mStatus == ModuleLoadInfo::Status::Blocked ||
+           mStatus == ModuleLoadInfo::Status::Redirected;
+  }
+
+  // Timestamp for the creation of this event
+  LARGE_INTEGER mBeginTimestamp;
+  // Duration of the LdrLoadDll call
+  LARGE_INTEGER mLoadTimeInfo;
+  // Thread ID of this DLL load
+  DWORD mThreadId;
+  // The name requested of LdrLoadDll by its caller
+  nt::AllocatedUnicodeString mRequestedDllName;
+  // The name of the DLL that backs section that was mapped by the loader. This
+  // string is the effective name of the DLL that was resolved by the loader's
+  // path search algorithm.
+  nt::AllocatedUnicodeString mSectionName;
+  // The base address of the module's mapped section
+  const void* mBaseAddr;
+  // If the module was successfully loaded, stack trace of the DLL load request
+  Vector<PVOID, 0, nt::RtlAllocPolicy> mBacktrace;
+  // The status of DLL load
+  Status mStatus;
+};
+
+using ModuleLoadInfoVec = Vector<ModuleLoadInfo, 0, nt::RtlAllocPolicy>;
+
+}  // namespace mozilla
+
+#endif  // mozilla_ModuleLoadInfo_h
diff --git a/mozglue/dllservices/NtLoaderAPI.h b/mozglue/dllservices/NtLoaderAPI.h
new file mode 100644
index 0000000000..628609092b
--- /dev/null
+++ b/mozglue/dllservices/NtLoaderAPI.h
@@ -0,0 +1,23 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_NtLoaderAPI_h
+#define mozilla_NtLoaderAPI_h
+
+#include "mozilla/LoaderAPIInterfaces.h"
+
+#if !defined(IMPL_MFBT)
+#  error "This should only be included from mozglue!"
+#endif  // !defined(IMPL_MFBT)
+
+namespace mozilla {
+
+extern "C" MOZ_IMPORT_API nt::LoaderAPI* GetNtLoaderAPI(
+    nt::LoaderObserver* aNewObserver);
+
+}  // namespace mozilla
+
+#endif  // mozilla_NtLoaderAPI_h
diff --git a/mozglue/dllservices/WindowsDllBlocklist.cpp b/mozglue/dllservices/WindowsDllBlocklist.cpp
new file mode 100644
index 0000000000..bacd6ad799
--- /dev/null
+++ b/mozglue/dllservices/WindowsDllBlocklist.cpp
@@ -0,0 +1,782 @@
+/* -*- Mode: C++; tab-width: 40; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+#include <winternl.h>
+
+#pragma warning(push)
+#pragma warning(disable : 4275 4530)  // See msvc-stl-wrapper.template.h
+#include <map>
+#pragma warning(pop)
+
+#include "Authenticode.h"
+#include "BaseProfiler.h"
+#include "nsWindowsDllInterceptor.h"
+#include "mozilla/CmdLineAndEnvUtils.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/StackWalk_windows.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "mozilla/WindowsVersion.h"
+#include "mozilla/WinHeaderOnlyUtils.h"
+#include "nsWindowsHelpers.h"
+#include "WindowsDllBlocklist.h"
+#include "mozilla/AutoProfilerLabel.h"
+#include "mozilla/glue/Debug.h"
+#include "mozilla/glue/WindowsDllServices.h"
+#include "mozilla/glue/WinUtils.h"
+
+// Start new implementation
+#include "LoaderObserver.h"
+#include "ModuleLoadFrame.h"
+#include "mozilla/glue/WindowsUnicode.h"
+
+namespace mozilla {
+
+glue::Win32SRWLock gDllServicesLock;
+glue::detail::DllServicesBase* gDllServices;
+
+}  // namespace mozilla
+
+using namespace mozilla;
+
+using CrashReporter::Annotation;
+using CrashReporter::AnnotationWriter;
+
+#define DLL_BLOCKLIST_ENTRY(name, ...) {name, __VA_ARGS__},
+#define DLL_BLOCKLIST_STRING_TYPE const char*
+#include "mozilla/WindowsDllBlocklistLegacyDefs.h"
+
+// define this for very verbose dll load debug spew
+#undef DEBUG_very_verbose
+
+static uint32_t sInitFlags;
+static bool sBlocklistInitAttempted;
+static bool sBlocklistInitFailed;
+static bool sUser32BeforeBlocklist;
+
+typedef MOZ_NORETURN_PTR void(__fastcall* BaseThreadInitThunk_func)(
+    BOOL aIsInitialThread, void* aStartAddress, void* aThreadParam);
+static WindowsDllInterceptor::FuncHookType<BaseThreadInitThunk_func>
+    stub_BaseThreadInitThunk;
+
+typedef NTSTATUS(NTAPI* LdrLoadDll_func)(PWCHAR filePath, PULONG flags,
+                                         PUNICODE_STRING moduleFileName,
+                                         PHANDLE handle);
+static WindowsDllInterceptor::FuncHookType<LdrLoadDll_func> stub_LdrLoadDll;
+
+#ifdef _M_AMD64
+typedef decltype(
+    RtlInstallFunctionTableCallback)* RtlInstallFunctionTableCallback_func;
+static WindowsDllInterceptor::FuncHookType<RtlInstallFunctionTableCallback_func>
+    stub_RtlInstallFunctionTableCallback;
+
+extern uint8_t* sMsMpegJitCodeRegionStart;
+extern size_t sMsMpegJitCodeRegionSize;
+
+BOOLEAN WINAPI patched_RtlInstallFunctionTableCallback(
+    DWORD64 TableIdentifier, DWORD64 BaseAddress, DWORD Length,
+    PGET_RUNTIME_FUNCTION_CALLBACK Callback, PVOID Context,
+    PCWSTR OutOfProcessCallbackDll) {
+  // msmpeg2vdec.dll sets up a function table callback for their JIT code that
+  // just terminates the process, because their JIT doesn't have unwind info.
+  // If we see this callback being registered, record the region address, so
+  // that StackWalk.cpp can avoid unwinding addresses in this region.
+  //
+  // To keep things simple I'm not tracking unloads of msmpeg2vdec.dll.
+  // Worst case the stack walker will needlessly avoid a few pages of memory.
+
+  // Tricky: GetModuleHandleExW adds a ref by default; GetModuleHandleW doesn't.
+  HMODULE callbackModule = nullptr;
+  DWORD moduleFlags = GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
+                      GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT;
+
+  // These GetModuleHandle calls enter a critical section on Win7.
+  AutoSuppressStackWalking suppress;
+
+  if (GetModuleHandleExW(moduleFlags, (LPWSTR)Callback, &callbackModule) &&
+      GetModuleHandleW(L"msmpeg2vdec.dll") == callbackModule) {
+    sMsMpegJitCodeRegionStart = (uint8_t*)BaseAddress;
+    sMsMpegJitCodeRegionSize = Length;
+  }
+
+  return stub_RtlInstallFunctionTableCallback(TableIdentifier, BaseAddress,
+                                              Length, Callback, Context,
+                                              OutOfProcessCallbackDll);
+}
+#endif
+
+template <class T>
+struct RVAMap {
+  RVAMap(HANDLE map, DWORD offset) {
+    SYSTEM_INFO info;
+    GetSystemInfo(&info);
+
+    DWORD alignedOffset =
+        (offset / info.dwAllocationGranularity) * info.dwAllocationGranularity;
+
+    MOZ_ASSERT(offset - alignedOffset < info.dwAllocationGranularity, "Wtf");
+
+    mRealView = ::MapViewOfFile(map, FILE_MAP_READ, 0, alignedOffset,
+                                sizeof(T) + (offset - alignedOffset));
+
+    mMappedView =
+        mRealView
+            ? reinterpret_cast<T*>((char*)mRealView + (offset - alignedOffset))
+            : nullptr;
+  }
+  ~RVAMap() {
+    if (mRealView) {
+      ::UnmapViewOfFile(mRealView);
+    }
+  }
+  operator const T*() const { return mMappedView; }
+  const T* operator->() const { return mMappedView; }
+
+ private:
+  const T* mMappedView;
+  void* mRealView;
+};
+
+static DWORD GetTimestamp(const wchar_t* path) {
+  DWORD timestamp = 0;
+
+  HANDLE file = ::CreateFileW(path, GENERIC_READ, FILE_SHARE_READ, nullptr,
+                              OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
+  if (file != INVALID_HANDLE_VALUE) {
+    HANDLE map =
+        ::CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr);
+    if (map) {
+      RVAMap<IMAGE_DOS_HEADER> peHeader(map, 0);
+      if (peHeader) {
+        RVAMap<IMAGE_NT_HEADERS> ntHeader(map, peHeader->e_lfanew);
+        if (ntHeader) {
+          timestamp = ntHeader->FileHeader.TimeDateStamp;
+        }
+      }
+      ::CloseHandle(map);
+    }
+    ::CloseHandle(file);
+  }
+
+  return timestamp;
+}
+
+// This lock protects both the reentrancy sentinel and the crash reporter
+// data structures.
+static CRITICAL_SECTION sLock;
+
+/**
+ * Some versions of Windows call LoadLibraryEx to get the version information
+ * for a DLL, which causes our patched LdrLoadDll implementation to re-enter
+ * itself and cause infinite recursion and a stack-exhaustion crash. We protect
+ * against reentrancy by allowing recursive loads of the same DLL.
+ *
+ * Note that we don't use __declspec(thread) because that doesn't work in DLLs
+ * loaded via LoadLibrary and there can be a limited number of TLS slots, so
+ * we roll our own.
+ */
+class ReentrancySentinel {
+ public:
+  explicit ReentrancySentinel(const char* dllName) {
+    DWORD currentThreadId = GetCurrentThreadId();
+    AutoCriticalSection lock(&sLock);
+    mPreviousDllName = (*sThreadMap)[currentThreadId];
+
+    // If there is a DLL currently being loaded and it has the same name
+    // as the current attempt, we're re-entering.
+    mReentered = mPreviousDllName && !stricmp(mPreviousDllName, dllName);
+    (*sThreadMap)[currentThreadId] = dllName;
+  }
+
+  ~ReentrancySentinel() {
+    DWORD currentThreadId = GetCurrentThreadId();
+    AutoCriticalSection lock(&sLock);
+    (*sThreadMap)[currentThreadId] = mPreviousDllName;
+  }
+
+  bool BailOut() const { return mReentered; };
+
+  static void InitializeStatics() {
+    InitializeCriticalSection(&sLock);
+    sThreadMap = new std::map<DWORD, const char*>;
+  }
+
+ private:
+  static std::map<DWORD, const char*>* sThreadMap;
+
+  const char* mPreviousDllName;
+  bool mReentered;
+};
+
+std::map<DWORD, const char*>* ReentrancySentinel::sThreadMap;
+
+class WritableBuffer {
+ public:
+  WritableBuffer() : mBuffer{0}, mLen(0) {}
+
+  void Write(const char* aData, size_t aLen) {
+    size_t writable_len = std::min(aLen, Available());
+    memcpy(mBuffer + mLen, aData, writable_len);
+    mLen += writable_len;
+  }
+
+  size_t const Length() { return mLen; }
+  const char* Data() { return mBuffer; }
+
+ private:
+  size_t const Available() { return sizeof(mBuffer) - mLen; }
+
+  char mBuffer[1024];
+  size_t mLen;
+};
+
+/**
+ * This is a linked list of DLLs that have been blocked. It doesn't use
+ * mozilla::LinkedList because this is an append-only list and doesn't need
+ * to be doubly linked.
+ */
+class DllBlockSet {
+ public:
+  static void Add(const char* name, unsigned long long version);
+
+  // Write the list of blocked DLLs to a WritableBuffer object. This method is
+  // run after a crash occurs and must therefore not use the heap, etc.
+  static void Write(WritableBuffer& buffer);
+
+ private:
+  DllBlockSet(const char* name, unsigned long long version)
+      : mName(name), mVersion(version), mNext(nullptr) {}
+
+  const char* mName;  // points into the gWindowsDllBlocklist string
+  unsigned long long mVersion;
+  DllBlockSet* mNext;
+
+  static DllBlockSet* gFirst;
+};
+
+DllBlockSet* DllBlockSet::gFirst;
+
+void DllBlockSet::Add(const char* name, unsigned long long version) {
+  AutoCriticalSection lock(&sLock);
+  for (DllBlockSet* b = gFirst; b; b = b->mNext) {
+    if (0 == strcmp(b->mName, name) && b->mVersion == version) {
+      return;
+    }
+  }
+  // Not already present
+  DllBlockSet* n = new DllBlockSet(name, version);
+  n->mNext = gFirst;
+  gFirst = n;
+}
+
+void DllBlockSet::Write(WritableBuffer& buffer) {
+  // It would be nicer to use AutoCriticalSection here. However, its destructor
+  // might not run if an exception occurs, in which case we would never leave
+  // the critical section. (MSVC warns about this possibility.) So we
+  // enter and leave manually.
+  ::EnterCriticalSection(&sLock);
+
+  // Because this method is called after a crash occurs, and uses heap memory,
+  // protect this entire block with a structured exception handler.
+  MOZ_SEH_TRY {
+    for (DllBlockSet* b = gFirst; b; b = b->mNext) {
+      // write name[,v.v.v.v];
+      buffer.Write(b->mName, strlen(b->mName));
+      if (b->mVersion != DllBlockInfo::ALL_VERSIONS) {
+        buffer.Write(",", 1);
+        uint16_t parts[4];
+        parts[0] = b->mVersion >> 48;
+        parts[1] = (b->mVersion >> 32) & 0xFFFF;
+        parts[2] = (b->mVersion >> 16) & 0xFFFF;
+        parts[3] = b->mVersion & 0xFFFF;
+        for (int p = 0; p < 4; ++p) {
+          char buf[32];
+          _ltoa_s(parts[p], buf, sizeof(buf), 10);
+          buffer.Write(buf, strlen(buf));
+          if (p != 3) {
+            buffer.Write(".", 1);
+          }
+        }
+      }
+      buffer.Write(";", 1);
+    }
+  }
+  MOZ_SEH_EXCEPT(EXCEPTION_EXECUTE_HANDLER) {}
+
+  ::LeaveCriticalSection(&sLock);
+}
+
+static UniquePtr<wchar_t[]> getFullPath(PWCHAR filePath, wchar_t* fname) {
+  // In Windows 8, the first parameter seems to be used for more than just the
+  // path name.  For example, its numerical value can be 1.  Passing a non-valid
+  // pointer to SearchPathW will cause a crash, so we need to check to see if we
+  // are handed a valid pointer, and otherwise just pass nullptr to SearchPathW.
+  PWCHAR sanitizedFilePath = nullptr;
+  if ((uintptr_t(filePath) >= 65536) && ((uintptr_t(filePath) & 1) == 0)) {
+    sanitizedFilePath = filePath;
+  }
+
+  // figure out the length of the string that we need
+  DWORD pathlen =
+      SearchPathW(sanitizedFilePath, fname, L".dll", 0, nullptr, nullptr);
+  if (pathlen == 0) {
+    return nullptr;
+  }
+
+  auto full_fname = MakeUnique<wchar_t[]>(pathlen + 1);
+  if (!full_fname) {
+    // couldn't allocate memory?
+    return nullptr;
+  }
+
+  // now actually grab it
+  SearchPathW(sanitizedFilePath, fname, L".dll", pathlen + 1, full_fname.get(),
+              nullptr);
+  return full_fname;
+}
+
+// No builtin function to find the last character matching a set
+static wchar_t* lastslash(wchar_t* s, int len) {
+  for (wchar_t* c = s + len - 1; c >= s; --c) {
+    if (*c == L'\\' || *c == L'/') {
+      return c;
+    }
+  }
+  return nullptr;
+}
+
+static NTSTATUS NTAPI patched_LdrLoadDll(PWCHAR filePath, PULONG flags,
+                                         PUNICODE_STRING moduleFileName,
+                                         PHANDLE handle) {
+  // We have UCS2 (UTF16?), we want ASCII, but we also just want the filename
+  // portion
+#define DLLNAME_MAX 128
+  char dllName[DLLNAME_MAX + 1];
+  wchar_t* dll_part;
+  char* dot;
+
+  int len = moduleFileName->Length / 2;
+  wchar_t* fname = moduleFileName->Buffer;
+  UniquePtr<wchar_t[]> full_fname;
+
+  // The filename isn't guaranteed to be null terminated, but in practice
+  // it always will be; ensure that this is so, and bail if not.
+  // This is done instead of the more robust approach because of bug 527122,
+  // where lots of weird things were happening when we tried to make a copy.
+  if (moduleFileName->MaximumLength < moduleFileName->Length + 2 ||
+      fname[len] != 0) {
+#ifdef DEBUG
+    printf_stderr("LdrLoadDll: non-null terminated string found!\n");
+#endif
+    goto continue_loading;
+  }
+
+  dll_part = lastslash(fname, len);
+  if (dll_part) {
+    dll_part = dll_part + 1;
+    len -= dll_part - fname;
+  } else {
+    dll_part = fname;
+  }
+
+#ifdef DEBUG_very_verbose
+  printf_stderr("LdrLoadDll: dll_part '%S' %d\n", dll_part, len);
+#endif
+
+  // if it's too long, then, we assume we won't want to block it,
+  // since DLLNAME_MAX should be at least long enough to hold the longest
+  // entry in our blocklist.
+  if (len > DLLNAME_MAX) {
+#ifdef DEBUG
+    printf_stderr("LdrLoadDll: len too long! %d\n", len);
+#endif
+    goto continue_loading;
+  }
+
+  // copy over to our char byte buffer, lowercasing ASCII as we go
+  for (int i = 0; i < len; i++) {
+    wchar_t c = dll_part[i];
+
+    if (c > 0x7f) {
+      // welp, it's not ascii; if we need to add non-ascii things to
+      // our blocklist, we'll have to remove this limitation.
+      goto continue_loading;
+    }
+
+    // ensure that dll name is all lowercase
+    if (c >= 'A' && c <= 'Z') c += 'a' - 'A';
+
+    dllName[i] = (char)c;
+  }
+
+  dllName[len] = 0;
+
+#ifdef DEBUG_very_verbose
+  printf_stderr("LdrLoadDll: dll name '%s'\n", dllName);
+#endif
+
+  if (!(sInitFlags & eDllBlocklistInitFlagWasBootstrapped)) {
+    // Block a suspicious binary that uses various 12-digit hex strings
+    // e.g. MovieMode.48CA2AEFA22D.dll (bug 973138)
+    dot = strchr(dllName, '.');
+    if (dot && (strchr(dot + 1, '.') == dot + 13)) {
+      char* end = nullptr;
+      _strtoui64(dot + 1, &end, 16);
+      if (end == dot + 13) {
+        return STATUS_DLL_NOT_FOUND;
+      }
+    }
+    // Block binaries where the filename is at least 16 hex digits
+    if (dot && ((dot - dllName) >= 16)) {
+      char* current = dllName;
+      while (current < dot && isxdigit(*current)) {
+        current++;
+      }
+      if (current == dot) {
+        return STATUS_DLL_NOT_FOUND;
+      }
+    }
+
+    // then compare to everything on the blocklist
+    DECLARE_POINTER_TO_FIRST_DLL_BLOCKLIST_ENTRY(info);
+    while (info->mName) {
+      if (strcmp(info->mName, dllName) == 0) break;
+
+      info++;
+    }
+
+    if (info->mName) {
+      bool load_ok = false;
+
+#ifdef DEBUG_very_verbose
+      printf_stderr("LdrLoadDll: info->mName: '%s'\n", info->mName);
+#endif
+
+      if (info->mFlags & DllBlockInfo::REDIRECT_TO_NOOP_ENTRYPOINT) {
+        printf_stderr(
+            "LdrLoadDll: "
+            "Ignoring the REDIRECT_TO_NOOP_ENTRYPOINT flag\n");
+      }
+
+      if ((info->mFlags & DllBlockInfo::BLOCK_WIN8_AND_OLDER) &&
+          IsWin8Point1OrLater()) {
+        goto continue_loading;
+      }
+
+      if ((info->mFlags & DllBlockInfo::BLOCK_WIN7_AND_OLDER) &&
+          IsWin8OrLater()) {
+        goto continue_loading;
+      }
+
+      if ((info->mFlags & DllBlockInfo::CHILD_PROCESSES_ONLY) &&
+          !(sInitFlags & eDllBlocklistInitFlagIsChildProcess)) {
+        goto continue_loading;
+      }
+
+      if ((info->mFlags & DllBlockInfo::BROWSER_PROCESS_ONLY) &&
+          (sInitFlags & eDllBlocklistInitFlagIsChildProcess)) {
+        goto continue_loading;
+      }
+
+      unsigned long long fVersion = DllBlockInfo::ALL_VERSIONS;
+
+      if (info->mMaxVersion != DllBlockInfo::ALL_VERSIONS) {
+        ReentrancySentinel sentinel(dllName);
+        if (sentinel.BailOut()) {
+          goto continue_loading;
+        }
+
+        full_fname = getFullPath(filePath, fname);
+        if (!full_fname) {
+          // uh, we couldn't find the DLL at all, so...
+          printf_stderr(
+              "LdrLoadDll: Blocking load of '%s' (SearchPathW didn't find "
+              "it?)\n",
+              dllName);
+          return STATUS_DLL_NOT_FOUND;
+        }
+
+        if (info->mFlags & DllBlockInfo::USE_TIMESTAMP) {
+          fVersion = GetTimestamp(full_fname.get());
+          if (fVersion > info->mMaxVersion) {
+            load_ok = true;
+          }
+        } else {
+          LauncherResult<ModuleVersion> version =
+              GetModuleVersion(full_fname.get());
+          // If we failed to get the version information, we block.
+          if (version.isOk()) {
+            load_ok = !info->IsVersionBlocked(version.unwrap());
+          }
+        }
+      }
+
+      if (!load_ok) {
+        printf_stderr(
+            "LdrLoadDll: Blocking load of '%s' -- see "
+            "http://www.mozilla.com/en-US/blocklist/\n",
+            dllName);
+        DllBlockSet::Add(info->mName, fVersion);
+        return STATUS_DLL_NOT_FOUND;
+      }
+    }
+  }
+
+continue_loading:
+#ifdef DEBUG_very_verbose
+  printf_stderr("LdrLoadDll: continuing load... ('%S')\n",
+                moduleFileName->Buffer);
+#endif
+
+  glue::ModuleLoadFrame loadFrame(moduleFileName);
+
+  NTSTATUS ret;
+  HANDLE myHandle;
+
+  ret = stub_LdrLoadDll(filePath, flags, moduleFileName, &myHandle);
+
+  if (handle) {
+    *handle = myHandle;
+  }
+
+  loadFrame.SetLoadStatus(ret, myHandle);
+
+  return ret;
+}
+
+#if defined(NIGHTLY_BUILD)
+// Map of specific thread proc addresses we should block. In particular,
+// LoadLibrary* APIs which indicate DLL injection
+static void* gStartAddressesToBlock[4];
+#endif  // defined(NIGHTLY_BUILD)
+
+static bool ShouldBlockThread(void* aStartAddress) {
+  // Allows crashfirefox.exe to continue to work. Also if your threadproc is
+  // null, this crash is intentional.
+  if (aStartAddress == nullptr) return false;
+
+#if defined(NIGHTLY_BUILD)
+  for (auto p : gStartAddressesToBlock) {
+    if (p == aStartAddress) {
+      return true;
+    }
+  }
+#endif
+
+  bool shouldBlock = false;
+  MEMORY_BASIC_INFORMATION startAddressInfo = {0};
+  if (VirtualQuery(aStartAddress, &startAddressInfo,
+                   sizeof(startAddressInfo))) {
+    shouldBlock |= startAddressInfo.State != MEM_COMMIT;
+    shouldBlock |= startAddressInfo.Protect != PAGE_EXECUTE_READ;
+  }
+
+  return shouldBlock;
+}
+
+// Allows blocked threads to still run normally through BaseThreadInitThunk, in
+// case there's any magic there that we shouldn't skip.
+static DWORD WINAPI NopThreadProc(void* /* aThreadParam */) { return 0; }
+
+static MOZ_NORETURN void __fastcall patched_BaseThreadInitThunk(
+    BOOL aIsInitialThread, void* aStartAddress, void* aThreadParam) {
+  if (ShouldBlockThread(aStartAddress)) {
+    aStartAddress = (void*)NopThreadProc;
+  }
+
+  stub_BaseThreadInitThunk(aIsInitialThread, aStartAddress, aThreadParam);
+}
+
+static WindowsDllInterceptor NtDllIntercept;
+static WindowsDllInterceptor Kernel32Intercept;
+
+static void GetNativeNtBlockSetWriter();
+
+static glue::LoaderObserver gMozglueLoaderObserver;
+static nt::WinLauncherFunctions gWinLauncherFunctions;
+
+MFBT_API void DllBlocklist_Initialize(uint32_t aInitFlags) {
+  if (sBlocklistInitAttempted) {
+    return;
+  }
+  sBlocklistInitAttempted = true;
+
+  sInitFlags = aInitFlags;
+
+  glue::ModuleLoadFrame::StaticInit(&gMozglueLoaderObserver,
+                                    &gWinLauncherFunctions);
+
+#ifdef _M_AMD64
+  if (!IsWin8OrLater()) {
+    Kernel32Intercept.Init("kernel32.dll");
+
+    // The crash that this hook works around is only seen on Win7.
+    stub_RtlInstallFunctionTableCallback.Set(
+        Kernel32Intercept, "RtlInstallFunctionTableCallback",
+        &patched_RtlInstallFunctionTableCallback);
+  }
+#endif
+
+  if (aInitFlags & eDllBlocklistInitFlagWasBootstrapped) {
+    GetNativeNtBlockSetWriter();
+    return;
+  }
+
+  // There are a couple of exceptional cases where we skip user32.dll check.
+  // - If the the process was bootstrapped by the launcher process, AppInit
+  //   DLLs will be intercepted by the new DllBlockList.  No need to check
+  //   here.
+  // - The code to initialize the base profiler loads winmm.dll which
+  //   statically links user32.dll on an older Windows.  This means if the base
+  //   profiler is active before coming here, we cannot fully intercept AppInit
+  //   DLLs.  Given that the base profiler is used outside the typical use
+  //   cases, it's ok not to check user32.dll in this scenario.
+  const bool skipUser32Check =
+      (sInitFlags & eDllBlocklistInitFlagWasBootstrapped)
+#ifdef MOZ_GECKO_PROFILER
+      ||
+      (!IsWin10AnniversaryUpdateOrLater() && baseprofiler::profiler_is_active())
+#endif
+      ;
+
+  // In order to be effective against AppInit DLLs, the blocklist must be
+  // initialized before user32.dll is loaded into the process (bug 932100).
+  if (!skipUser32Check && GetModuleHandleW(L"user32.dll")) {
+    sUser32BeforeBlocklist = true;
+#ifdef DEBUG
+    printf_stderr("DLL blocklist was unable to intercept AppInit DLLs.\n");
+#endif
+  }
+
+  NtDllIntercept.Init("ntdll.dll");
+
+  ReentrancySentinel::InitializeStatics();
+
+  // We specifically use a detour, because there are cases where external
+  // code also tries to hook LdrLoadDll, and doesn't know how to relocate our
+  // nop space patches. (Bug 951827)
+  bool ok = stub_LdrLoadDll.SetDetour(NtDllIntercept, "LdrLoadDll",
+                                      &patched_LdrLoadDll);
+
+  if (!ok) {
+    sBlocklistInitFailed = true;
+#ifdef DEBUG
+    printf_stderr("LdrLoadDll hook failed, no dll blocklisting active\n");
+#endif
+  }
+
+  // If someone injects a thread early that causes user32.dll to load off the
+  // main thread this causes issues, so load it as soon as we've initialized
+  // the block-list. (See bug 1400637)
+  if (!sUser32BeforeBlocklist) {
+    ::LoadLibraryW(L"user32.dll");
+  }
+
+  Kernel32Intercept.Init("kernel32.dll");
+
+  // Bug 1361410: WRusr.dll will overwrite our hook and cause a crash.
+  // Workaround: If we detect WRusr.dll, don't hook.
+  if (!GetModuleHandleW(L"WRusr.dll")) {
+    if (!stub_BaseThreadInitThunk.SetDetour(Kernel32Intercept,
+                                            "BaseThreadInitThunk",
+                                            &patched_BaseThreadInitThunk)) {
+#ifdef DEBUG
+      printf_stderr("BaseThreadInitThunk hook failed\n");
+#endif
+    }
+  }
+
+#if defined(NIGHTLY_BUILD)
+  // Populate a list of thread start addresses to block.
+  HMODULE hKernel = GetModuleHandleW(L"kernel32.dll");
+  if (hKernel) {
+    void* pProc;
+
+    pProc = (void*)GetProcAddress(hKernel, "LoadLibraryA");
+    gStartAddressesToBlock[0] = pProc;
+
+    pProc = (void*)GetProcAddress(hKernel, "LoadLibraryW");
+    gStartAddressesToBlock[1] = pProc;
+
+    pProc = (void*)GetProcAddress(hKernel, "LoadLibraryExA");
+    gStartAddressesToBlock[2] = pProc;
+
+    pProc = (void*)GetProcAddress(hKernel, "LoadLibraryExW");
+    gStartAddressesToBlock[3] = pProc;
+  }
+#endif
+}
+
+#ifdef DEBUG
+MFBT_API void DllBlocklist_Shutdown() {}
+#endif  // DEBUG
+
+static void InternalWriteNotes(AnnotationWriter& aWriter) {
+  WritableBuffer buffer;
+  DllBlockSet::Write(buffer);
+
+  aWriter.Write(Annotation::BlockedDllList, buffer.Data(), buffer.Length());
+
+  if (sBlocklistInitFailed) {
+    aWriter.Write(Annotation::BlocklistInitFailed, "1");
+  }
+
+  if (sUser32BeforeBlocklist) {
+    aWriter.Write(Annotation::User32BeforeBlocklist, "1");
+  }
+}
+
+using WriterFn = void (*)(AnnotationWriter&);
+static WriterFn gWriterFn = &InternalWriteNotes;
+
+static void GetNativeNtBlockSetWriter() {
+  auto nativeWriter = reinterpret_cast<WriterFn>(
+      ::GetProcAddress(::GetModuleHandleW(nullptr), "NativeNtBlockSet_Write"));
+  if (nativeWriter) {
+    gWriterFn = nativeWriter;
+  }
+}
+
+MFBT_API void DllBlocklist_WriteNotes(AnnotationWriter& aWriter) {
+  MOZ_ASSERT(gWriterFn);
+  gWriterFn(aWriter);
+}
+
+MFBT_API bool DllBlocklist_CheckStatus() {
+  if (sBlocklistInitFailed || sUser32BeforeBlocklist) return false;
+  return true;
+}
+
+// ============================================================================
+// This section is for DLL Services
+// ============================================================================
+
+namespace mozilla {
+Authenticode* GetAuthenticode();
+}  // namespace mozilla
+
+MFBT_API void DllBlocklist_SetFullDllServices(
+    mozilla::glue::detail::DllServicesBase* aSvc) {
+  glue::AutoExclusiveLock lock(gDllServicesLock);
+  if (aSvc) {
+    aSvc->SetAuthenticodeImpl(GetAuthenticode());
+    aSvc->SetWinLauncherFunctions(gWinLauncherFunctions);
+    gMozglueLoaderObserver.Forward(aSvc);
+  }
+
+  gDllServices = aSvc;
+}
+
+MFBT_API void DllBlocklist_SetBasicDllServices(
+    mozilla::glue::detail::DllServicesBase* aSvc) {
+  if (!aSvc) {
+    return;
+  }
+
+  aSvc->SetAuthenticodeImpl(GetAuthenticode());
+  gMozglueLoaderObserver.Disable();
+}
diff --git a/mozglue/dllservices/WindowsDllBlocklist.h b/mozglue/dllservices/WindowsDllBlocklist.h
new file mode 100644
index 0000000000..475c5a34a5
--- /dev/null
+++ b/mozglue/dllservices/WindowsDllBlocklist.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_windowsdllblocklist_h
+#define mozilla_windowsdllblocklist_h
+
+#if (defined(_MSC_VER) || defined(__MINGW32__)) && \
+    (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64))
+
+#  include <windows.h>
+#  include "CrashAnnotations.h"
+#  include "mozilla/Attributes.h"
+#  include "mozilla/Types.h"
+
+#  define HAS_DLL_BLOCKLIST
+
+enum DllBlocklistInitFlags {
+  eDllBlocklistInitFlagDefault = 0,
+  eDllBlocklistInitFlagIsChildProcess = 1,
+  eDllBlocklistInitFlagWasBootstrapped = 2
+};
+
+// Only available from within firefox.exe
+#  if !defined(IMPL_MFBT) && !defined(MOZILLA_INTERNAL_API)
+extern uint32_t gBlocklistInitFlags;
+#  endif  // !defined(IMPL_MFBT) && !defined(MOZILLA_INTERNAL_API)
+
+MFBT_API void DllBlocklist_Initialize(
+    uint32_t aInitFlags = eDllBlocklistInitFlagDefault);
+MFBT_API void DllBlocklist_WriteNotes(CrashReporter::AnnotationWriter& aWriter);
+MFBT_API bool DllBlocklist_CheckStatus();
+
+// This export intends to clean up after DllBlocklist_Initialize().
+// It's disabled in release builds for performance and to limit callers' ability
+// to interfere with dll blocking.
+#  ifdef DEBUG
+MFBT_API void DllBlocklist_Shutdown();
+#  endif  // DEBUG
+
+// Forward declaration
+namespace mozilla {
+namespace glue {
+namespace detail {
+class DllServicesBase;
+}  // namespace detail
+}  // namespace glue
+}  // namespace mozilla
+
+MFBT_API void DllBlocklist_SetFullDllServices(
+    mozilla::glue::detail::DllServicesBase* aSvc);
+MFBT_API void DllBlocklist_SetBasicDllServices(
+    mozilla::glue::detail::DllServicesBase* aSvc);
+
+#endif  // defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+#endif  // mozilla_windowsdllblocklist_h
diff --git a/mozglue/dllservices/WindowsDllBlocklistCommon.h b/mozglue/dllservices/WindowsDllBlocklistCommon.h
new file mode 100644
index 0000000000..aa8d65e135
--- /dev/null
+++ b/mozglue/dllservices/WindowsDllBlocklistCommon.h
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_WindowsDllBlocklistCommon_h
+#define mozilla_WindowsDllBlocklistCommon_h
+
+#include <stdint.h>
+
+#include "mozilla/ArrayUtils.h"
+
+namespace mozilla {
+
+template <typename StrType>
+struct DllBlockInfoT {
+  // The name of the DLL -- in LOWERCASE!  It will be compared to
+  // a lowercase version of the DLL name only.
+  StrType mName;
+
+  // If mMaxVersion is ALL_VERSIONS, we'll block all versions of this
+  // dll.  Otherwise, we'll block all versions less than or equal to
+  // the given version, as queried by GetFileVersionInfo and
+  // VS_FIXEDFILEINFO's dwFileVersionMS and dwFileVersionLS fields.
+  //
+  // Note that the version is usually 4 components, which is A.B.C.D
+  // encoded as 0x AAAA BBBB CCCC DDDD ULL (spaces added for clarity),
+  // but it's not required to be of that format.
+  uint64_t mMaxVersion;
+
+  // If the USE_TIMESTAMP flag is set, then we use the timestamp from
+  // the IMAGE_FILE_HEADER in lieu of a version number.
+  //
+  // If the CHILD_PROCESSES_ONLY flag is set, then the dll is blocked
+  // only when we are a child process.
+  enum Flags {
+    FLAGS_DEFAULT = 0,
+    BLOCK_WIN7_AND_OLDER = 1 << 0,
+    BLOCK_WIN8_AND_OLDER = 1 << 1,
+    USE_TIMESTAMP = 1 << 2,
+    CHILD_PROCESSES_ONLY = 1 << 3,
+    BROWSER_PROCESS_ONLY = 1 << 4,
+    REDIRECT_TO_NOOP_ENTRYPOINT = 1 << 5,
+  } mFlags;
+
+  bool IsVersionBlocked(const uint64_t aOther) const {
+    if (mMaxVersion == ALL_VERSIONS) {
+      return true;
+    }
+
+    return aOther <= mMaxVersion;
+  }
+
+  static const uint64_t ALL_VERSIONS = (uint64_t)-1LL;
+
+  // DLLs sometimes ship without a version number, particularly early
+  // releases. Blocking "version <= 0" has the effect of blocking unversioned
+  // DLLs (since the call to get version info fails), but not blocking
+  // any versioned instance.
+  static const uint64_t UNVERSIONED = 0ULL;
+};
+
+}  // namespace mozilla
+
+// Convert the 4 (decimal) components of a DLL version number into a
+// single unsigned long long, as needed by the blocklist
+#if defined(_MSC_VER) && !defined(__clang__)
+
+// MSVC does not properly handle the constexpr MAKE_VERSION, so we use a macro
+// instead (ugh).
+#  define MAKE_VERSION(a, b, c, d) \
+    ((a##ULL << 48) + (b##ULL << 32) + (c##ULL << 16) + d##ULL)
+
+#else
+
+static inline constexpr uint64_t MAKE_VERSION(uint16_t a, uint16_t b,
+                                              uint16_t c, uint16_t d) {
+  return static_cast<uint64_t>(a) << 48 | static_cast<uint64_t>(b) << 32 |
+         static_cast<uint64_t>(c) << 16 | static_cast<uint64_t>(d);
+}
+
+#endif
+
+#if !defined(DLL_BLOCKLIST_STRING_TYPE)
+#  error "You must define DLL_BLOCKLIST_STRING_TYPE"
+#endif  // !defined(DLL_BLOCKLIST_STRING_TYPE)
+
+#define DLL_BLOCKLIST_DEFINITIONS_BEGIN_NAMED(name)                       \
+  using DllBlockInfo = mozilla::DllBlockInfoT<DLL_BLOCKLIST_STRING_TYPE>; \
+  static const DllBlockInfo name[] = {
+#define DLL_BLOCKLIST_DEFINITIONS_BEGIN \
+  DLL_BLOCKLIST_DEFINITIONS_BEGIN_NAMED(gWindowsDllBlocklist)
+
+#define DLL_BLOCKLIST_DEFINITIONS_END \
+  {}                                  \
+  }                                   \
+  ;
+
+#define DECLARE_POINTER_TO_FIRST_DLL_BLOCKLIST_ENTRY_FOR(name, list) \
+  const DllBlockInfo* name = &list[0]
+
+#define DECLARE_POINTER_TO_FIRST_DLL_BLOCKLIST_ENTRY(name) \
+  DECLARE_POINTER_TO_FIRST_DLL_BLOCKLIST_ENTRY_FOR(name, gWindowsDllBlocklist)
+
+#define DECLARE_POINTER_TO_LAST_DLL_BLOCKLIST_ENTRY_FOR(name, list) \
+  const DllBlockInfo* name = &list[mozilla::ArrayLength(list) - 1]
+
+#define DECLARE_POINTER_TO_LAST_DLL_BLOCKLIST_ENTRY(name) \
+  DECLARE_POINTER_TO_LAST_DLL_BLOCKLIST_ENTRY_FOR(name, gWindowsDllBlocklist)
+
+#define DECLARE_DLL_BLOCKLIST_NUM_ENTRIES_FOR(name, list) \
+  const size_t name = mozilla::ArrayLength(list) - 1
+
+#define DECLARE_DLL_BLOCKLIST_NUM_ENTRIES(name) \
+  DECLARE_DLL_BLOCKLIST_NUM_ENTRIES_FOR(name, gWindowsDllBlocklist)
+
+#endif  // mozilla_WindowsDllBlocklistCommon_h
diff --git a/mozglue/dllservices/WindowsDllBlocklistDefs.in b/mozglue/dllservices/WindowsDllBlocklistDefs.in
new file mode 100644
index 0000000000..752a131f9f
--- /dev/null
+++ b/mozglue/dllservices/WindowsDllBlocklistDefs.in
@@ -0,0 +1,284 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This file exposes three lists:
+# ALL_PROCESSES, BROWSER_PROCESS, and CHILD_PROCESSES
+#
+# In addition, each of those lists supports a special variant for test-only
+# entries:
+# ALL_PROCESSES_TESTS, BROWSER_PROCESS_TESTS, and CHILD_PROCESSES_TESTS
+#
+# Choose the list that is applicable to the applicable process type(s) for your
+# DLL block.
+#
+# The currently supported blocklist entry types are:
+# DllBlocklistEntry, A11yBlocklistEntry, LspBlocklistEntry,
+# RedirectToNoOpEntryPoint
+# (See gen_dll_blocklist_defs.py for their documentation.)
+#
+# Example:
+# ALL_PROCESSES += [
+#     DllBlocklistEntry("foo.dll", (1, 2, 3, 4)),
+#     DllBlocklistEntry("foo.dll", ALL_VERSIONS),
+#     DllBlocklistEntry("foo.dll", UNVERSIONED),
+#     DllBlocklistEntry("foo.dll", 0x0000123400000000),
+#     DllBlocklistEntry("foo.dll", PETimeStamp(0x12345678)),
+# ]
+#
+# The version parameter the "last bad" version, that is, we block anything that
+# is less-than or equal to that version.
+
+ALL_PROCESSES += [
+    # NPFFAddon - Known malware
+    DllBlocklistEntry("npffaddon.dll", ALL_VERSIONS),
+
+    # AVG 8 - Antivirus vendor AVG, old version, plugin already blocklisted
+    DllBlocklistEntry("avgrsstx.dll", (8,5,0,401)),
+
+    # calc.dll - Suspected malware
+    DllBlocklistEntry("calc.dll", (1,0,0,1)),
+
+    # hook.dll - Suspected malware
+    DllBlocklistEntry("hook.dll", ALL_VERSIONS),
+
+    # GoogleDesktopNetwork3.dll - Extremely old, unversioned instances
+    # of this DLL cause crashes
+    DllBlocklistEntry("googledesktopnetwork3.dll", UNVERSIONED),
+
+    # rdolib.dll - Suspected malware
+    DllBlocklistEntry("rdolib.dll", (6,0,88,4)),
+
+    # fgjk4wvb.dll - Suspected malware
+    DllBlocklistEntry("fgjk4wvb.dll", (8,8,8,8)),
+
+    # radhslib.dll - Naomi internet filter - unmaintained since 2006
+    DllBlocklistEntry("radhslib.dll", UNVERSIONED),
+
+    # Music download filter for vkontakte.ru - old instances
+    # of this DLL cause crashes
+    DllBlocklistEntry("vksaver.dll", (2,2,2,0)),
+
+    # Topcrash in Firefox 4.0b1
+    DllBlocklistEntry("rlxf.dll", (1,2,323,1)),
+
+    # psicon.dll - Topcrashes in Thunderbird, and some crashes in Firefox
+    # Adobe photoshop library, now redundant in later installations
+    DllBlocklistEntry("psicon.dll", ALL_VERSIONS),
+
+    # Topcrash in Firefox 4 betas (bug 618899),
+    DllBlocklistEntry("accelerator.dll", (3,2,1,6)),
+
+    # Topcrash with Roboform in Firefox 8 (bug 699134),
+    DllBlocklistEntry("rf-firefox.dll", (7,6,1,0)),
+    DllBlocklistEntry("roboform.dll", (7,6,1,0)),
+
+    # Topcrash with Babylon Toolbar on FF16+ (bug 721264),
+    DllBlocklistEntry("babyfox.dll", ALL_VERSIONS),
+
+    # sprotector.dll crashes, bug 957258
+    DllBlocklistEntry("sprotector.dll", ALL_VERSIONS),
+
+    # Windows Media Foundation FLAC decoder and type sniffer (bug 839031).
+    DllBlocklistEntry("mfflac.dll", ALL_VERSIONS),
+
+    # Older Relevant Knowledge DLLs cause us to crash (bug 904001).
+    DllBlocklistEntry("rlnx.dll", (1, 3, 334, 9)),
+    DllBlocklistEntry("pmnx.dll", (1, 3, 334, 9)),
+    DllBlocklistEntry("opnx.dll", (1, 3, 334, 9)),
+    DllBlocklistEntry("prnx.dll", (1, 3, 334, 9)),
+
+    # Older belgian ID card software causes Firefox to crash or hang on
+    # shutdown, bug 831285 and 918399.
+    DllBlocklistEntry("beid35cardlayer.dll", (3, 5, 6, 6968)),
+
+    # bug 925459, bitguard crashes
+    DllBlocklistEntry("bitguard.dll", ALL_VERSIONS),
+
+    # bug 812683 - crashes in Windows library when Asus Gamer OSD is installed
+    # Software is discontinued/unsupported
+    DllBlocklistEntry("atkdx11disp.dll", ALL_VERSIONS),
+
+    # Topcrash with Conduit SearchProtect, bug 944542
+    DllBlocklistEntry("spvc32.dll", ALL_VERSIONS),
+
+    # Topcrash with V-bates, bug 1002748 and bug 1023239
+    DllBlocklistEntry("libinject.dll", UNVERSIONED),
+    DllBlocklistEntry("libinject2.dll", PETimeStamp(0x537DDC93)),
+    DllBlocklistEntry("libredir2.dll", PETimeStamp(0x5385B7ED)),
+
+    # Crashes with RoboForm2Go written against old SDK, bug 988311/1196859
+    DllBlocklistEntry("rf-firefox-22.dll", ALL_VERSIONS),
+    DllBlocklistEntry("rf-firefox-40.dll", ALL_VERSIONS),
+
+    # Crashes with DesktopTemperature, bug 1046382
+    DllBlocklistEntry("dtwxsvc.dll", PETimeStamp(0x53153234)),
+
+    # Startup crashes with Lenovo Onekey Theater, bug 1123778
+    DllBlocklistEntry("activedetect32.dll", UNVERSIONED),
+    DllBlocklistEntry("activedetect64.dll", UNVERSIONED),
+    DllBlocklistEntry("windowsapihookdll32.dll", UNVERSIONED),
+    DllBlocklistEntry("windowsapihookdll64.dll", UNVERSIONED),
+
+    # Flash crashes with RealNetworks RealDownloader, bug 1132663
+    DllBlocklistEntry("rndlnpshimswf.dll", ALL_VERSIONS),
+    DllBlocklistEntry("rndlmainbrowserrecordplugin.dll", ALL_VERSIONS),
+
+    # Startup crashes with RealNetworks Browser Record Plugin, bug 1170141
+    DllBlocklistEntry("nprpffbrowserrecordext.dll", ALL_VERSIONS),
+    DllBlocklistEntry("nprndlffbrowserrecordext.dll", ALL_VERSIONS),
+
+    # Crashes with CyberLink YouCam, bug 1136968
+    DllBlocklistEntry("ycwebcamerasource.ax", (2, 0, 0, 1611)),
+
+    # Old version of WebcamMax crashes WebRTC, bug 1130061
+    DllBlocklistEntry("vwcsource.ax", (1, 5, 0, 0)),
+
+    # NetOp School, discontinued product, bug 763395
+    DllBlocklistEntry("nlsp.dll", (6, 23, 2012, 19)),
+
+    # Orbit Downloader, bug 1222819
+    DllBlocklistEntry("grabdll.dll", (2, 6, 1, 0)),
+    DllBlocklistEntry("grabkernel.dll", (1, 0, 0, 1)),
+
+    # ESET, bug 1229252
+    DllBlocklistEntry("eoppmonitor.dll", ALL_VERSIONS),
+
+    # SS2OSD, bug 1262348
+    DllBlocklistEntry("ss2osd.dll", ALL_VERSIONS),
+    DllBlocklistEntry("ss2devprops.dll", ALL_VERSIONS),
+
+    # NHASUSSTRIXOSD.DLL, bug 1269244
+    DllBlocklistEntry("nhasusstrixosd.dll", ALL_VERSIONS),
+    DllBlocklistEntry("nhasusstrixdevprops.dll", ALL_VERSIONS),
+
+    # Crashes with PremierOpinion/RelevantKnowledge, bug 1277846
+    DllBlocklistEntry("opls.dll", ALL_VERSIONS),
+    DllBlocklistEntry("opls64.dll", ALL_VERSIONS),
+    DllBlocklistEntry("pmls.dll", ALL_VERSIONS),
+    DllBlocklistEntry("pmls64.dll", ALL_VERSIONS),
+    DllBlocklistEntry("prls.dll", ALL_VERSIONS),
+    DllBlocklistEntry("prls64.dll", ALL_VERSIONS),
+    DllBlocklistEntry("rlls.dll", ALL_VERSIONS),
+    DllBlocklistEntry("rlls64.dll", ALL_VERSIONS),
+
+    # Vorbis DirectShow filters, bug 1239690.
+    DllBlocklistEntry("vorbis.acm", (0, 0, 3, 6)),
+
+    # AhnLab Internet Security, bug 1311969
+    DllBlocklistEntry("nzbrcom.dll", ALL_VERSIONS),
+
+    # K7TotalSecurity, bug 1339083.
+    DllBlocklistEntry("k7pswsen.dll", (15, 2, 2, 95)),
+
+    # smci*.dll - goobzo crashware (bug 1339908),
+    DllBlocklistEntry("smci32.dll", ALL_VERSIONS),
+    DllBlocklistEntry("smci64.dll", ALL_VERSIONS),
+
+    # Crashes with Internet Download Manager, bug 1333486
+    DllBlocklistEntry("idmcchandler7.dll", ALL_VERSIONS),
+    DllBlocklistEntry("idmcchandler7_64.dll", ALL_VERSIONS),
+    DllBlocklistEntry("idmcchandler5.dll", ALL_VERSIONS),
+    DllBlocklistEntry("idmcchandler5_64.dll", ALL_VERSIONS),
+
+    # Nahimic 2 breaks applicaton update (bug 1356637),
+    DllBlocklistEntry("nahimic2devprops.dll", (2, 5, 19, 0xffff)),
+    # Nahimic is causing crashes, bug 1233556
+    DllBlocklistEntry("nahimicmsiosd.dll", UNVERSIONED),
+    # Nahimic is causing crashes, bug 1360029
+    DllBlocklistEntry("nahimicvrdevprops.dll", UNVERSIONED),
+    DllBlocklistEntry("nahimic2osd.dll", (2, 5, 19, 0xffff)),
+    DllBlocklistEntry("nahimicmsidevprops.dll", UNVERSIONED),
+
+    # Bug 1268470 - crashes with Kaspersky Lab on Windows 8
+    DllBlocklistEntry("klsihk64.dll", (14, 0, 456, 0xffff),
+                      BLOCK_WIN8_AND_OLDER),
+
+    # Bug 1579758, crashes with OpenSC nightly version 0.19.0.448 and lower
+    DllBlocklistEntry("onepin-opensc-pkcs11.dll", (0, 19, 0, 448)),
+
+    # Avecto Privilege Guard causes crashes, bug 1385542
+    DllBlocklistEntry("pghook.dll", ALL_VERSIONS),
+
+    # Old versions of G DATA BankGuard, bug 1421991
+    DllBlocklistEntry("banksafe64.dll", (1, 2, 15299, 65535)),
+
+    # Old versions of G DATA, bug 1043775
+    DllBlocklistEntry("gdkbfltdll64.dll", (1, 0, 14141, 240)),
+
+    # Dell Backup and Recovery tool causes crashes, bug 1433408
+    DllBlocklistEntry("dbroverlayiconnotbackuped.dll", (1, 8, 0, 9)),
+    DllBlocklistEntry("dbroverlayiconbackuped.dll", (1, 8, 0, 9)),
+
+    # NVIDIA nView Desktop Management causes crashes, bug 1465787
+    DllBlocklistEntry("nviewh64.dll", (6, 14, 10, 14847)),
+
+    # Ivanti Endpoint Security, bug 1553776
+    DllBlocklistEntry("sxwmon.dll", ALL_VERSIONS),
+    DllBlocklistEntry("sxwmon64.dll", ALL_VERSIONS),
+
+    # 360 Safeguard/360 Total Security causes a11y crashes, bug 1536227.
+    DllBlocklistEntry("safemon64.dll", ALL_VERSIONS),
+
+    # Old versions of Digital Guardian, bug 1318858, bug 1603974,
+    # and bug 1672367
+    RedirectToNoOpEntryPoint("dgapi.dll", (7, 5, 0xffff, 0xffff)),
+    RedirectToNoOpEntryPoint("dgapi64.dll", (7, 5, 0xffff, 0xffff)),
+
+    # Old versions of COMODO Internet Security, bug 1608048
+    DllBlocklistEntry("IseGuard32.dll", (1, 6, 13835, 184)),
+    DllBlocklistEntry("IseGuard64.dll", (1, 6, 13835, 184)),
+
+    # Old version of COMODO Firewall, bug 1407712 and bug 1624336
+    DllBlocklistEntry("guard64.dll", (8, 4, 0, 65535)),
+
+    # Old version of Panda Security, bug 1637984
+    DllBlocklistEntry("PavLspHook64.dll", (9, 2, 2, 1), BLOCK_WIN7_AND_OLDER),
+]
+
+ALL_PROCESSES_TESTS += [
+    # DLLs used by TestDllBlocklist* gTests
+    DllBlocklistEntry("testdllblocklist_matchbyname.dll", ALL_VERSIONS),
+    DllBlocklistEntry("testdllblocklist_matchbyversion.dll", (5, 5, 5, 5)),
+    DllBlocklistEntry("testdllblocklist_allowbyversion.dll", (5, 5, 5, 5)),
+    RedirectToNoOpEntryPoint("testdllblocklist_noopentrypoint.dll",
+                             (5, 5, 5, 5)),
+]
+
+BROWSER_PROCESS += [
+    # RealPlayer, bug 1418535, bug 1437417
+    # Versions before 18.1.11.0 cause severe performance problems.
+    A11yBlocklistEntry("dtvhooks.dll", (18, 1, 10, 0xffff)),
+    A11yBlocklistEntry("dtvhooks64.dll", (18, 1, 10, 0xffff)),
+
+    # SolidWorks Windows Explorer integration causes crashes, bug 1566109
+    # and bug 1468250
+    DllBlocklistEntry("Database.dll", ALL_VERSIONS),
+
+    # Hancom Office shell extension causes crashes when the file picker is
+    # opened. See bug 1581092.
+    DllBlocklistEntry("hncshellext64.dll", (1, 0, 0 ,3)),
+
+    # Cambridge Silicon Radio, bug 1634538
+    DllBlocklistEntry("BLEtokenCredentialProvider.dll", (2, 1, 63, 0)),
+
+    # FYunZip and PuddingZip, loaded as shell extension, cause crashes
+    # bug 1576728
+    DllBlocklistEntry("oly64.dll", (1, 1, 3, 19920)),
+    DllBlocklistEntry("oly.dll", (1, 1, 3, 19920)),
+    DllBlocklistEntry("pdzipmenu64.dll", (1, 4, 4, 20103)),
+    DllBlocklistEntry("pdzipmenu32.dll", (1, 4, 4, 20103)),
+
+    # McAfee Data Loss Prevention causes crashs with multiple signatures,
+    # bug 1634090
+    DllBlocklistEntry("fcagff.dll", (11, 6, 0xffff, 0xffff)),
+    DllBlocklistEntry("fcagff64.dll", (11, 6, 0xffff, 0xffff)),
+]
+
+CHILD_PROCESSES += [
+    # Causes crashes in the GPU process with WebRender enabled, bug 1544435
+    DllBlocklistEntry("wbload.dll", ALL_VERSIONS),
+]
+
diff --git a/mozglue/dllservices/WindowsDllServices.h b/mozglue/dllservices/WindowsDllServices.h
new file mode 100644
index 0000000000..e065c8ff6c
--- /dev/null
+++ b/mozglue/dllservices/WindowsDllServices.h
@@ -0,0 +1,211 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_glue_WindowsDllServices_h
+#define mozilla_glue_WindowsDllServices_h
+
+#include <utility>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Authenticode.h"
+#include "mozilla/LoaderAPIInterfaces.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "mozilla/WinHeaderOnlyUtils.h"
+#include "mozilla/WindowsDllBlocklist.h"
+#include "mozilla/mozalloc.h"
+
+#if defined(MOZILLA_INTERNAL_API)
+#  include "MainThreadUtils.h"
+#  include "nsISupportsImpl.h"
+#  include "nsString.h"
+#  include "nsThreadUtils.h"
+#  include "prthread.h"
+#  include "mozilla/SchedulerGroup.h"
+#endif  // defined(MOZILLA_INTERNAL_API)
+
+// For PCUNICODE_STRING
+#include <winternl.h>
+
+namespace mozilla {
+namespace glue {
+namespace detail {
+
+class DllServicesBase : public Authenticode {
+ public:
+  /**
+   * WARNING: This method is called from within an unsafe context that holds
+   *          multiple locks inside the Windows loader. The only thing that
+   *          this function should be used for is dispatching the event to our
+   *          event loop so that it may be handled in a safe context.
+   */
+  virtual void DispatchDllLoadNotification(ModuleLoadInfo&& aModLoadInfo) = 0;
+
+  /**
+   * This function accepts module load events to be processed later for
+   * the untrusted modules telemetry ping.
+   *
+   * WARNING: This method is run from within the Windows loader and should
+   *          only perform trivial, loader-friendly operations.
+   */
+  virtual void DispatchModuleLoadBacklogNotification(
+      ModuleLoadInfoVec&& aEvents) = 0;
+
+  void SetAuthenticodeImpl(Authenticode* aAuthenticode) {
+    mAuthenticode = aAuthenticode;
+  }
+
+  void SetWinLauncherFunctions(const nt::WinLauncherFunctions& aFunctions) {
+    mWinLauncherFunctions = aFunctions;
+  }
+
+  template <typename... Args>
+  LauncherVoidResultWithLineInfo InitDllBlocklistOOP(Args&&... aArgs) {
+    MOZ_RELEASE_ASSERT(mWinLauncherFunctions.mInitDllBlocklistOOP);
+    return mWinLauncherFunctions.mInitDllBlocklistOOP(
+        std::forward<Args>(aArgs)...);
+  }
+
+  template <typename... Args>
+  void HandleLauncherError(Args&&... aArgs) {
+    MOZ_RELEASE_ASSERT(mWinLauncherFunctions.mHandleLauncherError);
+    mWinLauncherFunctions.mHandleLauncherError(std::forward<Args>(aArgs)...);
+  }
+
+  // In debug builds we override GetBinaryOrgName to add a Gecko-specific
+  // assertion. OTOH, we normally do not want people overriding this function,
+  // so we'll make it final in the release case, thus covering all bases.
+#if defined(DEBUG)
+  UniquePtr<wchar_t[]> GetBinaryOrgName(
+      const wchar_t* aFilePath,
+      AuthenticodeFlags aFlags = AuthenticodeFlags::Default) override
+#else
+  UniquePtr<wchar_t[]> GetBinaryOrgName(
+      const wchar_t* aFilePath,
+      AuthenticodeFlags aFlags = AuthenticodeFlags::Default) final
+#endif  // defined(DEBUG)
+  {
+    if (!mAuthenticode) {
+      return nullptr;
+    }
+
+    return mAuthenticode->GetBinaryOrgName(aFilePath, aFlags);
+  }
+
+  virtual void DisableFull() { DllBlocklist_SetFullDllServices(nullptr); }
+
+  DllServicesBase(const DllServicesBase&) = delete;
+  DllServicesBase(DllServicesBase&&) = delete;
+  DllServicesBase& operator=(const DllServicesBase&) = delete;
+  DllServicesBase& operator=(DllServicesBase&&) = delete;
+
+ protected:
+  DllServicesBase() : mAuthenticode(nullptr) {}
+
+  virtual ~DllServicesBase() = default;
+
+  void EnableFull() { DllBlocklist_SetFullDllServices(this); }
+  void EnableBasic() { DllBlocklist_SetBasicDllServices(this); }
+
+ private:
+  Authenticode* mAuthenticode;
+  nt::WinLauncherFunctions mWinLauncherFunctions;
+};
+
+}  // namespace detail
+
+#if defined(MOZILLA_INTERNAL_API)
+
+struct EnhancedModuleLoadInfo final {
+  explicit EnhancedModuleLoadInfo(ModuleLoadInfo&& aModLoadInfo)
+      : mNtLoadInfo(std::move(aModLoadInfo)) {
+    // Only populate mThreadName when we're on the same thread as the event
+    if (mNtLoadInfo.mThreadId == ::GetCurrentThreadId()) {
+      mThreadName = PR_GetThreadName(PR_GetCurrentThread());
+    }
+    MOZ_ASSERT(!mNtLoadInfo.mSectionName.IsEmpty());
+  }
+
+  EnhancedModuleLoadInfo(EnhancedModuleLoadInfo&&) = default;
+  EnhancedModuleLoadInfo& operator=(EnhancedModuleLoadInfo&&) = default;
+
+  EnhancedModuleLoadInfo(const EnhancedModuleLoadInfo&) = delete;
+  EnhancedModuleLoadInfo& operator=(const EnhancedModuleLoadInfo&) = delete;
+
+  nsDependentString GetSectionName() const {
+    return mNtLoadInfo.mSectionName.AsString();
+  }
+
+  using BacktraceType = decltype(ModuleLoadInfo::mBacktrace);
+
+  ModuleLoadInfo mNtLoadInfo;
+  nsCString mThreadName;
+};
+
+class DllServices : public detail::DllServicesBase {
+ public:
+  void DispatchDllLoadNotification(ModuleLoadInfo&& aModLoadInfo) final {
+    nsCOMPtr<nsIRunnable> runnable(
+        NewRunnableMethod<StoreCopyPassByRRef<EnhancedModuleLoadInfo>>(
+            "DllServices::NotifyDllLoad", this, &DllServices::NotifyDllLoad,
+            std::move(aModLoadInfo)));
+
+    SchedulerGroup::Dispatch(TaskCategory::Other, runnable.forget());
+  }
+
+  void DispatchModuleLoadBacklogNotification(
+      ModuleLoadInfoVec&& aEvents) final {
+    nsCOMPtr<nsIRunnable> runnable(
+        NewRunnableMethod<StoreCopyPassByRRef<ModuleLoadInfoVec>>(
+            "DllServices::NotifyModuleLoadBacklog", this,
+            &DllServices::NotifyModuleLoadBacklog, std::move(aEvents)));
+
+    SchedulerGroup::Dispatch(TaskCategory::Other, runnable.forget());
+  }
+
+#  if defined(DEBUG)
+  UniquePtr<wchar_t[]> GetBinaryOrgName(
+      const wchar_t* aFilePath,
+      AuthenticodeFlags aFlags = AuthenticodeFlags::Default) final {
+    // This function may perform disk I/O, so we should never call it on the
+    // main thread.
+    MOZ_ASSERT(!NS_IsMainThread());
+    return detail::DllServicesBase::GetBinaryOrgName(aFilePath, aFlags);
+  }
+#  endif  // defined(DEBUG)
+
+  NS_INLINE_DECL_THREADSAFE_VIRTUAL_REFCOUNTING(DllServices)
+
+ protected:
+  DllServices() = default;
+  ~DllServices() = default;
+
+  virtual void NotifyDllLoad(EnhancedModuleLoadInfo&& aModLoadInfo) = 0;
+  virtual void NotifyModuleLoadBacklog(ModuleLoadInfoVec&& aEvents) = 0;
+};
+
+#else
+
+class BasicDllServices final : public detail::DllServicesBase {
+ public:
+  BasicDllServices() { EnableBasic(); }
+
+  ~BasicDllServices() = default;
+
+  // Not useful in this class, so provide a default implementation
+  virtual void DispatchDllLoadNotification(
+      ModuleLoadInfo&& aModLoadInfo) override {}
+
+  virtual void DispatchModuleLoadBacklogNotification(
+      ModuleLoadInfoVec&& aEvents) override {}
+};
+
+#endif  // defined(MOZILLA_INTERNAL_API)
+
+}  // namespace glue
+}  // namespace mozilla
+
+#endif  // mozilla_glue_WindowsDllServices_h
diff --git a/mozglue/dllservices/WindowsFallbackLoaderAPI.cpp b/mozglue/dllservices/WindowsFallbackLoaderAPI.cpp
new file mode 100644
index 0000000000..e80aa376a7
--- /dev/null
+++ b/mozglue/dllservices/WindowsFallbackLoaderAPI.cpp
@@ -0,0 +1,86 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "WindowsFallbackLoaderAPI.h"
+
+namespace mozilla {
+
+ModuleLoadInfo FallbackLoaderAPI::ConstructAndNotifyBeginDllLoad(
+    void** aContext, PCUNICODE_STRING aRequestedDllName) {
+  ModuleLoadInfo loadInfo(aRequestedDllName);
+
+  MOZ_ASSERT(mLoaderObserver);
+  if (mLoaderObserver) {
+    mLoaderObserver->OnBeginDllLoad(aContext, aRequestedDllName);
+  }
+
+  return loadInfo;
+}
+
+bool FallbackLoaderAPI::SubstituteForLSP(PCUNICODE_STRING aLSPLeafName,
+                                         PHANDLE aOutHandle) {
+  MOZ_ASSERT(mLoaderObserver);
+  if (!mLoaderObserver) {
+    return false;
+  }
+
+  return mLoaderObserver->SubstituteForLSP(aLSPLeafName, aOutHandle);
+}
+
+void FallbackLoaderAPI::NotifyEndDllLoad(void* aContext, NTSTATUS aLoadNtStatus,
+                                         ModuleLoadInfo&& aModuleLoadInfo) {
+  aModuleLoadInfo.SetEndLoadTimeStamp();
+
+  if (NT_SUCCESS(aLoadNtStatus)) {
+    aModuleLoadInfo.CaptureBacktrace();
+  }
+
+  MOZ_ASSERT(mLoaderObserver);
+  if (mLoaderObserver) {
+    mLoaderObserver->OnEndDllLoad(aContext, aLoadNtStatus,
+                                  std::move(aModuleLoadInfo));
+  }
+}
+
+nt::AllocatedUnicodeString FallbackLoaderAPI::GetSectionName(
+    void* aSectionAddr) {
+  static const StaticDynamicallyLinkedFunctionPtr<decltype(
+      &::NtQueryVirtualMemory)>
+      pNtQueryVirtualMemory(L"ntdll.dll", "NtQueryVirtualMemory");
+  MOZ_ASSERT(pNtQueryVirtualMemory);
+
+  if (!pNtQueryVirtualMemory) {
+    return nt::AllocatedUnicodeString();
+  }
+
+  nt::MemorySectionNameBuf buf;
+  NTSTATUS ntStatus =
+      pNtQueryVirtualMemory(::GetCurrentProcess(), aSectionAddr,
+                            MemorySectionName, &buf, sizeof(buf), nullptr);
+  if (!NT_SUCCESS(ntStatus)) {
+    return nt::AllocatedUnicodeString();
+  }
+
+  return nt::AllocatedUnicodeString(&buf.mSectionFileName);
+}
+
+nt::LoaderAPI::InitDllBlocklistOOPFnPtr
+FallbackLoaderAPI::GetDllBlocklistInitFn() {
+  MOZ_ASSERT_UNREACHABLE("This should not be called so soon!");
+  return nullptr;
+}
+
+nt::LoaderAPI::HandleLauncherErrorFnPtr
+FallbackLoaderAPI::GetHandleLauncherErrorFn() {
+  MOZ_ASSERT_UNREACHABLE("This should not be called so soon!");
+  return nullptr;
+}
+
+void FallbackLoaderAPI::SetObserver(nt::LoaderObserver* aLoaderObserver) {
+  mLoaderObserver = aLoaderObserver;
+}
+
+}  // namespace mozilla
diff --git a/mozglue/dllservices/WindowsFallbackLoaderAPI.h b/mozglue/dllservices/WindowsFallbackLoaderAPI.h
new file mode 100644
index 0000000000..e0c4236b62
--- /dev/null
+++ b/mozglue/dllservices/WindowsFallbackLoaderAPI.h
@@ -0,0 +1,38 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_WindowsFallbackLoaderAPI_h
+#define mozilla_WindowsFallbackLoaderAPI_h
+
+#include "mozilla/Attributes.h"
+#include "NtLoaderAPI.h"
+
+namespace mozilla {
+
+class MOZ_ONLY_USED_TO_AVOID_STATIC_CONSTRUCTORS FallbackLoaderAPI final
+    : public nt::LoaderAPI {
+ public:
+  constexpr FallbackLoaderAPI() : mLoaderObserver(nullptr) {}
+
+  ModuleLoadInfo ConstructAndNotifyBeginDllLoad(
+      void** aContext, PCUNICODE_STRING aRequestedDllName) final;
+  bool SubstituteForLSP(PCUNICODE_STRING aLSPLeafName,
+                        PHANDLE aOutHandle) final;
+  void NotifyEndDllLoad(void* aContext, NTSTATUS aLoadNtStatus,
+                        ModuleLoadInfo&& aModuleLoadInfo) final;
+  nt::AllocatedUnicodeString GetSectionName(void* aSectionAddr) final;
+  nt::LoaderAPI::InitDllBlocklistOOPFnPtr GetDllBlocklistInitFn() final;
+  nt::LoaderAPI::HandleLauncherErrorFnPtr GetHandleLauncherErrorFn() final;
+
+  void SetObserver(nt::LoaderObserver* aLoaderObserver);
+
+ private:
+  nt::LoaderObserver* mLoaderObserver;
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_WindowsFallbackLoaderAPI_h
diff --git a/mozglue/dllservices/gen_dll_blocklist_defs.py b/mozglue/dllservices/gen_dll_blocklist_defs.py
new file mode 100644
index 0000000000..cc71c6b49f
--- /dev/null
+++ b/mozglue/dllservices/gen_dll_blocklist_defs.py
@@ -0,0 +1,744 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import print_function
+
+from copy import deepcopy
+from six import iteritems, PY2
+from struct import unpack
+import os
+from uuid import UUID
+
+H_HEADER = """/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This file was auto-generated from {0} by gen_dll_blocklist_data.py.  */
+
+#ifndef mozilla_{1}_h
+#define mozilla_{1}_h
+
+"""
+
+H_FOOTER = """#endif  // mozilla_{1}_h
+
+"""
+
+H_DEFS_BEGIN_DEFAULT = """#include "mozilla/WindowsDllBlocklistCommon.h"
+
+DLL_BLOCKLIST_DEFINITIONS_BEGIN
+
+"""
+
+H_DEFS_END_DEFAULT = """
+DLL_BLOCKLIST_DEFINITIONS_END
+
+"""
+
+H_BEGIN_LSP = """#include <guiddef.h>
+
+static const GUID gLayerGuids[] = {
+
+"""
+
+H_END_LSP = """
+};
+
+"""
+
+H_BEGIN_A11Y = """#include "mozilla/WindowsDllBlocklistCommon.h"
+
+DLL_BLOCKLIST_DEFINITIONS_BEGIN_NAMED(gBlockedInprocDlls)
+
+"""
+
+# These flag names should match the ones defined in WindowsDllBlocklistCommon.h
+FLAGS_DEFAULT = "FLAGS_DEFAULT"
+BLOCK_WIN8_AND_OLDER = "BLOCK_WIN8_AND_OLDER"
+BLOCK_WIN7_AND_OLDER = "BLOCK_WIN7_AND_OLDER"
+USE_TIMESTAMP = "USE_TIMESTAMP"
+CHILD_PROCESSES_ONLY = "CHILD_PROCESSES_ONLY"
+BROWSER_PROCESS_ONLY = "BROWSER_PROCESS_ONLY"
+SUBSTITUTE_LSP_PASSTHROUGH = "SUBSTITUTE_LSP_PASSTHROUGH"
+REDIRECT_TO_NOOP_ENTRYPOINT = "REDIRECT_TO_NOOP_ENTRYPOINT"
+
+# Only these flags are available in the input script
+INPUT_ONLY_FLAGS = {
+    BLOCK_WIN8_AND_OLDER,
+    BLOCK_WIN7_AND_OLDER,
+}
+
+
+def FILTER_ALLOW_ALL(entry):
+    # A11y entries are special, so we always exclude those by default
+    # (so it's not really allowing 'all', but it is simpler to reason about by
+    #  pretending that it is allowing all.)
+    return not isinstance(entry, A11yBlocklistEntry)
+
+
+def FILTER_DENY_ALL(entry):
+    return False
+
+
+def FILTER_ALLOW_ONLY_A11Y(entry):
+    return isinstance(entry, A11yBlocklistEntry)
+
+
+def FILTER_ALLOW_ONLY_LSP(entry):
+    return isinstance(entry, LspBlocklistEntry)
+
+
+def FILTER_TESTS_ONLY(entry):
+    return not isinstance(entry, A11yBlocklistEntry) and entry.is_test()
+
+
+def derive_test_key(key):
+    return key + "_TESTS"
+
+
+ALL_DEFINITION_LISTS = ("ALL_PROCESSES", "BROWSER_PROCESS", "CHILD_PROCESSES")
+
+
+class BlocklistDescriptor(object):
+    """This class encapsulates every file that is output from this script.
+    Each instance has a name, an "input specification", and optional "flag
+    specification" and "output specification" entries.
+    """
+
+    DEFAULT_OUTSPEC = {
+        "mode": "",
+        "filter": FILTER_ALLOW_ALL,
+        "begin": H_DEFS_BEGIN_DEFAULT,
+        "end": H_DEFS_END_DEFAULT,
+    }
+
+    FILE_NAME_TPL = "WindowsDllBlocklist{0}Defs"
+
+    OutputDir = None
+    ExistingFd = None
+    ExistingFdLeafName = None
+
+    def __init__(self, name, inspec, **kwargs):
+        """Positional arguments:
+
+        name -- String containing the name of the output list.
+
+        inspec -- One or more members of ALL_DEFINITION_LISTS. The input used
+        for this blocklist file is the union of all lists specified by this
+        variable.
+
+        Keyword arguments:
+
+        outspec -- an optional list of dicts that specify how the lists output
+        will be written out to a file. Each dict may contain the following keys:
+
+            'mode' -- a string that specifies a mode that is used when writing
+            out list entries to this particular output. This is passed in as the
+            mode argument to DllBlocklistEntry's write method.
+
+            'filter' -- a function that, given a blocklist entry, decides
+            whether or not that entry shall be included in this output file.
+
+            'begin' -- a string that is written to the output file after writing
+            the file's header, but prior to writing out any blocklist entries.
+
+            'end' -- a string that is written to the output file after writing
+            out any blocklist entries but before the file's footer.
+
+        Any unspecified keys will be assigned default values.
+
+        flagspec -- an optional dict whose keys consist of one or more of the
+        list names from inspec. Each corresponding value is a set of flags that
+        should be applied to each entry from that list. For example, the
+        flagspec:
+
+            {'CHILD_PROCESSES': {CHILD_PROCESSES_ONLY}}
+
+        causes any blocklist entries from the CHILD_PROCESSES list to be output
+        with the CHILD_PROCESSES_ONLY flag set.
+
+        """
+
+        self._name = name
+
+        # inspec's elements must all come from ALL_DEFINITION_LISTS
+        assert not (set(inspec).difference(set(ALL_DEFINITION_LISTS)))
+
+        # Internally to the descriptor, we store input specifications as a dict
+        # that maps each input blocklist name to the set of flags to be applied
+        # to each entry in that blocklist.
+        self._inspec = {blocklist: set() for blocklist in inspec}
+
+        self._outspecs = kwargs.get("outspec", BlocklistDescriptor.DEFAULT_OUTSPEC)
+        if isinstance(self._outspecs, dict):
+            # _outspecs should always be stored as a list of dicts
+            self._outspecs = [self._outspecs]
+
+        flagspecs = kwargs.get("flagspec", dict())
+        # flagspec's keys must all come from ALL_DEFINITION_LISTS
+        assert not (set(flagspecs.keys()).difference(set(self._inspec.keys())))
+
+        # Merge the flags from flagspec into _inspec's sets
+        for blocklist, flagspec in iteritems(flagspecs):
+            spec = self._inspec[blocklist]
+            if not isinstance(spec, set):
+                raise TypeError("Flag spec for list %s must be a set!" % blocklist)
+            spec.update(flagspec)
+
+    @staticmethod
+    def set_output_fd(fd):
+        """The build system has already provided an open file descriptor for
+        one of our outputs. We save that here so that we may use that fd once
+        we're ready to process that fd's file. We also obtain the output dir for
+        use as the base directory for the other output files that we open.
+        """
+        BlocklistDescriptor.ExistingFd = fd
+        (
+            BlocklistDescriptor.OutputDir,
+            BlocklistDescriptor.ExistingFdLeafName,
+        ) = os.path.split(fd.name)
+
+    @staticmethod
+    def ensure_no_dupes(defs):
+        """Ensure that defs does not contain any dupes. We raise a ValueError
+        because this is a bug in the input and requires developer intervention.
+        """
+        seen = set()
+        for e in defs:
+            name = e.get_name()
+            if name not in seen:
+                seen.add(name)
+            else:
+                raise ValueError("Duplicate entry found: %s" % name)
+
+    @staticmethod
+    def get_test_entries(exec_env, blocklist):
+        """Obtains all test entries for the corresponding blocklist, and also
+        ensures that each entry has its test flag set.
+
+        Positional arguments:
+
+        exec_env -- dict containing the globals that were passed to exec to
+        when the input script was run.
+
+        blocklist -- The name of the blocklist to obtain tests from. This
+        should be one of the members of ALL_DEFINITION_LISTS
+        """
+        test_key = derive_test_key(blocklist)
+
+        def set_is_test(elem):
+            elem.set_is_test()
+            return elem
+
+        return map(set_is_test, exec_env[test_key])
+
+    def gen_list(self, exec_env, filter_func):
+        """Generates a sorted list of blocklist entries from the input script,
+        filtered via filter_func.
+
+        Positional arguments:
+
+        exec_env -- dict containing the globals that were passed to exec to
+        when the input script was run. This function expects exec_env to
+        contain lists of blocklist entries, keyed using one of the members of
+        ALL_DEFINITION_LISTS.
+
+        filter_func -- a filter function that evaluates each blocklist entry
+        to determine whether or not it should be included in the results.
+        """
+
+        # This list contains all the entries across all blocklists that we
+        # potentially want to process
+        unified_list = []
+
+        # For each blocklist specified in the _inspec, we query the globals
+        # for their entries, add any flags, and then add them to the
+        # unified_list.
+        for blocklist, listflags in iteritems(self._inspec):
+
+            def add_list_flags(elem):
+                # We deep copy so that flags set for an entry in one blocklist
+                # do not interfere with flags set for an entry in a different
+                # list.
+                result = deepcopy(elem)
+                result.add_flags(listflags)
+                return result
+
+            # We add list flags *before* filtering because the filters might
+            # want to access flags as part of their operation.
+            unified_list.extend(map(add_list_flags, exec_env[blocklist]))
+
+            # We also add any test entries for the lists specified by _inspec
+            unified_list.extend(
+                map(add_list_flags, self.get_test_entries(exec_env, blocklist))
+            )
+
+        # There should be no dupes in the input. If there are, raise an error.
+        self.ensure_no_dupes(unified_list)
+
+        # Now we filter out any unwanted list entries
+        filtered_list = filter(filter_func, unified_list)
+
+        # Sort the list on entry name so that the blocklist code may use
+        # binary search if it so chooses.
+        return sorted(filtered_list, key=lambda e: e.get_name())
+
+    @staticmethod
+    def get_fd(outspec_leaf_name):
+        """If BlocklistDescriptor.ExistingFd corresponds to outspec_leaf_name,
+        then we return that. Otherwise, we construct a new absolute path to
+        outspec_leaf_name and open a new file descriptor for writing.
+        """
+        if (
+            not BlocklistDescriptor.ExistingFd
+            or BlocklistDescriptor.ExistingFdLeafName != outspec_leaf_name
+        ):
+            new_name = os.path.join(BlocklistDescriptor.OutputDir, outspec_leaf_name)
+            return open(new_name, "w")
+
+        fd = BlocklistDescriptor.ExistingFd
+        BlocklistDescriptor.ExistingFd = None
+        return fd
+
+    def write(self, src_file, exec_env):
+        """Write out all output files that are specified by this descriptor.
+
+        Positional arguments:
+
+        src_file -- name of the input file from which the lists were generated.
+
+        exec_env -- dictionary containing the lists that were parsed from the
+        input file when it was executed.
+        """
+
+        for outspec in self._outspecs:
+            # Use DEFAULT_OUTSPEC to supply defaults for any unused outspec keys
+            effective_outspec = BlocklistDescriptor.DEFAULT_OUTSPEC.copy()
+            effective_outspec.update(outspec)
+
+            entries = self.gen_list(exec_env, effective_outspec["filter"])
+            if not entries:
+                continue
+
+            mode = effective_outspec["mode"]
+
+            # Since each output descriptor may generate output across multiple
+            # modes, each list is uniquified via the concatenation of our name
+            # and the mode.
+            listname = self._name + mode
+            leafname_no_ext = BlocklistDescriptor.FILE_NAME_TPL.format(listname)
+            leafname = leafname_no_ext + ".h"
+
+            with self.get_fd(leafname) as output:
+                print(H_HEADER.format(src_file, leafname_no_ext), file=output, end="")
+                print(effective_outspec["begin"], file=output, end="")
+
+                for e in entries:
+                    e.write(output, mode)
+
+                print(effective_outspec["end"], file=output, end="")
+                print(H_FOOTER.format(src_file, leafname_no_ext), file=output, end="")
+
+
+A11Y_OUTPUT_SPEC = {
+    "filter": FILTER_ALLOW_ONLY_A11Y,
+    "begin": H_BEGIN_A11Y,
+}
+
+LSP_MODE_GUID = "Guid"
+
+LSP_OUTPUT_SPEC = [
+    {
+        "mode": LSP_MODE_GUID,
+        "filter": FILTER_ALLOW_ONLY_LSP,
+        "begin": H_BEGIN_LSP,
+        "end": H_END_LSP,
+    },
+]
+
+GENERATED_BLOCKLIST_FILES = [
+    BlocklistDescriptor("A11y", ["BROWSER_PROCESS"], outspec=A11Y_OUTPUT_SPEC),
+    BlocklistDescriptor(
+        "Launcher",
+        ALL_DEFINITION_LISTS,
+        flagspec={
+            "BROWSER_PROCESS": {BROWSER_PROCESS_ONLY},
+            "CHILD_PROCESSES": {CHILD_PROCESSES_ONLY},
+        },
+    ),
+    BlocklistDescriptor(
+        "Legacy",
+        ALL_DEFINITION_LISTS,
+        flagspec={
+            "BROWSER_PROCESS": {BROWSER_PROCESS_ONLY},
+            "CHILD_PROCESSES": {CHILD_PROCESSES_ONLY},
+        },
+    ),
+    # Roughed-in for the moment; we'll enable this in bug 1238735
+    # BlocklistDescriptor('LSP', ALL_DEFINITION_LISTS, outspec=LSP_OUTPUT_SPEC),
+    BlocklistDescriptor(
+        "Test", ALL_DEFINITION_LISTS, outspec={"filter": FILTER_TESTS_ONLY}
+    ),
+]
+
+
+class PETimeStamp(object):
+    def __init__(self, ts):
+        max_timestamp = (2 ** 32) - 1
+        if ts < 0 or ts > max_timestamp:
+            raise ValueError("Invalid timestamp value")
+        self._value = ts
+
+    def __str__(self):
+        return "0x%08XU" % self._value
+
+
+class Version(object):
+    """Encapsulates a DLL version."""
+
+    ALL_VERSIONS = 0xFFFFFFFFFFFFFFFF
+    UNVERSIONED = 0
+
+    def __init__(self, *args):
+        """There are multiple ways to construct a Version:
+
+        As a tuple containing four elements (recommended);
+        As four integral arguments;
+        As a PETimeStamp;
+        As a long integer.
+
+        The tuple and list formats require the value of each element to be
+        between 0 and 0xFFFF, inclusive.
+        """
+
+        self._ver = Version.UNVERSIONED
+
+        if len(args) == 1:
+            if isinstance(args[0], tuple):
+                self.validate_iterable(args[0])
+
+                self._ver = "MAKE_VERSION%r" % (args[0],)
+            elif isinstance(args[0], PETimeStamp):
+                self._ver = args[0]
+            else:
+                self._ver = int(args[0])
+        elif len(args) == 4:
+            self.validate_iterable(args)
+
+            self._ver = "MAKE_VERSION%r" % (tuple(args),)
+        else:
+            raise ValueError("Bad arguments to Version constructor")
+
+    def validate_iterable(self, arg):
+        if len(arg) != 4:
+            raise ValueError("Versions must be a 4-tuple")
+
+        for component in arg:
+            if not isinstance(component, int) or component < 0 or component > 0xFFFF:
+                raise ValueError(
+                    "Each version component must be a 16-bit " "unsigned integer"
+                )
+
+    def build_long(self, args):
+        self.validate_iterable(args)
+        return (
+            (int(args[0]) << 48)
+            | (int(args[1]) << 32)
+            | (int(args[2]) << 16)
+            | int(args[3])
+        )
+
+    def is_timestamp(self):
+        return isinstance(self._ver, PETimeStamp)
+
+    def __str__(self):
+        if isinstance(self._ver, int):
+            if self._ver == Version.ALL_VERSIONS:
+                return "DllBlockInfo::ALL_VERSIONS"
+
+            if self._ver == Version.UNVERSIONED:
+                return "DllBlockInfo::UNVERSIONED"
+
+            return "0x%016XULL" % self._ver
+
+        return str(self._ver)
+
+
+class DllBlocklistEntry(object):
+    TEST_CONDITION = "defined(ENABLE_TESTS)"
+
+    def __init__(self, name, ver, flags=(), **kwargs):
+        """Positional arguments:
+
+        name -- The leaf name of the DLL.
+
+        ver -- The maximum version to be blocked. NB: The comparison used by the
+        blocklist is <=, so you should specify the last bad version, as opposed
+        to the first good version.
+
+        flags -- iterable containing the flags that should be applicable to
+        this blocklist entry.
+
+        Keyword arguments:
+
+        condition -- a string containing a C++ preprocessor expression. This
+        condition is written as a condition for an #if/#endif block that is
+        generated around the entry during output.
+        """
+
+        self.check_ascii(name)
+        self._name = name.lower()
+        self._ver = Version(ver)
+
+        self._flags = set()
+        self.add_flags(flags)
+        if self._ver.is_timestamp():
+            self._flags.add(USE_TIMESTAMP)
+
+        self._cond = kwargs.get("condition", set())
+        if isinstance(self._cond, str):
+            self._cond = {self._cond}
+
+    @staticmethod
+    def check_ascii(name):
+        if PY2:
+            if not all(ord(c) < 128 for c in name):
+                raise ValueError('DLL name "%s" must be ASCII!' % name)
+            return
+
+        try:
+            # Supported in Python 3.7
+            if not name.isascii():
+                raise ValueError('DLL name "%s" must be ASCII!' % name)
+            return
+        except AttributeError:
+            pass
+
+        try:
+            name.encode("ascii")
+        except UnicodeEncodeError:
+            raise ValueError('DLL name "%s" must be ASCII!' % name)
+
+    def get_name(self):
+        return self._name
+
+    def set_condition(self, cond):
+        self._cond.add(cond)
+
+    def get_condition(self):
+        if len(self._cond) == 1:
+            fmt = "{0}"
+        else:
+            fmt = "({0})"
+
+        return " && ".join([fmt.format(c) for c in self._cond])
+
+    def set_is_test(self):
+        self.set_condition(DllBlocklistEntry.TEST_CONDITION)
+
+    def is_test(self):
+        return self._cond and DllBlocklistEntry.TEST_CONDITION in self._cond
+
+    def add_flags(self, new_flags):
+        if isinstance(new_flags, str):
+            self._flags.add(new_flags)
+        else:
+            self._flags.update(new_flags)
+
+    @staticmethod
+    def get_flag_string(flag):
+        return "DllBlockInfo::" + flag
+
+    def get_flags_list(self):
+        return self._flags
+
+    def write(self, output, mode):
+        if self._cond:
+            print("#if %s" % self.get_condition(), file=output)
+
+        flags_str = ""
+
+        flags = self.get_flags_list()
+        if flags:
+            flags_str = ", " + " | ".join(map(self.get_flag_string, flags))
+
+        entry_str = '  DLL_BLOCKLIST_ENTRY("%s", %s%s)' % (
+            self._name,
+            str(self._ver),
+            flags_str,
+        )
+        print(entry_str, file=output)
+
+        if self._cond:
+            print("#endif  // %s" % self.get_condition(), file=output)
+
+
+class A11yBlocklistEntry(DllBlocklistEntry):
+    """Represents a blocklist entry for injected a11y DLLs. This class does
+    not need to do anything special compared to a DllBlocklistEntry; we just
+    use this type to distinguish a11y entries from "regular" blocklist entries.
+    """
+
+    def __init__(self, name, ver, flags=(), **kwargs):
+        """These arguments are identical to DllBlocklistEntry.__init__"""
+
+        super(A11yBlocklistEntry, self).__init__(name, ver, flags, **kwargs)
+
+
+class RedirectToNoOpEntryPoint(DllBlocklistEntry):
+    """Represents a blocklist entry to hook the entrypoint into a function
+    just returning TRUE to keep a module alive and harmless.
+    This entry is intended to block a DLL which is injected by IAT patching
+    which is planted by a kernel callback routine for LoadImage because
+    blocking such a DLL makes a process fail to launch.
+    """
+
+    def __init__(self, name, ver, flags=(), **kwargs):
+        """These arguments are identical to DllBlocklistEntry.__init__"""
+
+        super(RedirectToNoOpEntryPoint, self).__init__(name, ver, flags, **kwargs)
+
+    def get_flags_list(self):
+        flags = super(RedirectToNoOpEntryPoint, self).get_flags_list()
+        # RedirectToNoOpEntryPoint items always include the following flag
+        flags.add(REDIRECT_TO_NOOP_ENTRYPOINT)
+        return flags
+
+
+class LspBlocklistEntry(DllBlocklistEntry):
+    """Represents a blocklist entry for a WinSock Layered Service Provider (LSP)."""
+
+    GUID_UNPACK_FMT_LE = "<IHHBBBBBBBB"
+    Guids = dict()
+
+    def __init__(self, name, ver, guids, flags=(), **kwargs):
+        """Positional arguments:
+
+        name -- The leaf name of the DLL.
+
+        ver -- The maximum version to be blocked. NB: The comparison used by the
+        blocklist is <=, so you should specify the last bad version, as opposed
+        to the first good version.
+
+        guids -- Either a string or list of strings containing the GUIDs that
+        uniquely identify the LSP. These GUIDs are generated by the developer of
+        the LSP and are registered with WinSock alongside the LSP. We record
+        this GUID as part of the "Winsock_LSP" annotation in our crash reports.
+
+        flags -- iterable containing the flags that should be applicable to
+        this blocklist entry.
+
+        Keyword arguments:
+
+        condition -- a string containing a C++ preprocessor expression. This
+        condition is written as a condition for an #if/#endif block that is
+        generated around the entry during output.
+        """
+
+        super(LspBlocklistEntry, self).__init__(name, ver, flags, **kwargs)
+        if not guids:
+            raise ValueError("Missing GUID(s)!")
+
+        if isinstance(guids, str):
+            self.insert(UUID(guids), name)
+        else:
+            for guid in guids:
+                self.insert(UUID(guid), name)
+
+    def insert(self, guid, name):
+        # Some explanation here: Multiple DLLs (and thus multiple instances of
+        # LspBlocklistEntry) may share the same GUIDs. To ensure that we do not
+        # have any duplicates, we store each GUID in a class variable, Guids.
+        # We include the original DLL name from the blocklist definitions so
+        # that we may output a comment above each GUID indicating which entries
+        # correspond to which GUID.
+        LspBlocklistEntry.Guids.setdefault(guid, []).append(name)
+
+    def get_flags_list(self):
+        flags = super(LspBlocklistEntry, self).get_flags_list()
+        # LSP entries always include the following flag
+        flags.add(SUBSTITUTE_LSP_PASSTHROUGH)
+        return flags
+
+    @staticmethod
+    def as_c_struct(guid, names):
+        parts = unpack(LspBlocklistEntry.GUID_UNPACK_FMT_LE, guid.bytes_le)
+        str_guid = (
+            "  // %r\n  // {%s}\n  { 0x%08x, 0x%04x, 0x%04x, "
+            "{ 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x }"
+            " }"
+            % (
+                names,
+                str(guid),
+                parts[0],
+                parts[1],
+                parts[2],
+                parts[3],
+                parts[4],
+                parts[5],
+                parts[6],
+                parts[7],
+                parts[8],
+                parts[9],
+                parts[10],
+            )
+        )
+        return str_guid
+
+    def write(self, output, mode):
+        if mode != LSP_MODE_GUID:
+            super(LspBlocklistEntry, self).write(output, mode)
+            return
+
+        # We dump the entire contents of Guids on the first call, and then
+        # clear it. Remaining invocations of this method are no-ops.
+        if LspBlocklistEntry.Guids:
+            result = ",\n".join(
+                [
+                    self.as_c_struct(guid, names)
+                    for guid, names in iteritems(LspBlocklistEntry.Guids)
+                ]
+            )
+            print(result, file=output)
+            LspBlocklistEntry.Guids.clear()
+
+
+def exec_script_file(script_name, globals):
+    with open(script_name) as script:
+        exec(compile(script.read(), script_name, "exec"), globals)
+
+
+def gen_blocklists(first_fd, defs_filename):
+
+    BlocklistDescriptor.set_output_fd(first_fd)
+
+    # exec_env defines the global variables that will be present in the
+    # execution environment when defs_filename is run by exec.
+    exec_env = {
+        # Add the blocklist entry types
+        "A11yBlocklistEntry": A11yBlocklistEntry,
+        "DllBlocklistEntry": DllBlocklistEntry,
+        "LspBlocklistEntry": LspBlocklistEntry,
+        "RedirectToNoOpEntryPoint": RedirectToNoOpEntryPoint,
+        # Add the special version types
+        "ALL_VERSIONS": Version.ALL_VERSIONS,
+        "UNVERSIONED": Version.UNVERSIONED,
+        "PETimeStamp": PETimeStamp,
+    }
+
+    # Import definition lists into exec_env
+    for defname in ALL_DEFINITION_LISTS:
+        exec_env[defname] = []
+        # For each defname, add a special list for test-only entries
+        exec_env[derive_test_key(defname)] = []
+
+    # Import flags into exec_env
+    exec_env.update({flag: flag for flag in INPUT_ONLY_FLAGS})
+
+    # Now execute the input script with exec_env providing the globals
+    exec_script_file(defs_filename, exec_env)
+
+    # Tell the output descriptors to write out the output files.
+    for desc in GENERATED_BLOCKLIST_FILES:
+        desc.write(defs_filename, exec_env)
diff --git a/mozglue/dllservices/moz.build b/mozglue/dllservices/moz.build
new file mode 100644
index 0000000000..b46692fe84
--- /dev/null
+++ b/mozglue/dllservices/moz.build
@@ -0,0 +1,60 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG["MOZ_WIDGET_TOOLKIT"]:
+
+    SOURCES += [
+        # This file contains a |using namespace mozilla;| statement
+        "WindowsDllBlocklist.cpp",
+    ]
+
+    UNIFIED_SOURCES += [
+        "Authenticode.cpp",
+        "LoaderObserver.cpp",
+        "ModuleLoadFrame.cpp",
+        "WindowsFallbackLoaderAPI.cpp",
+    ]
+
+OS_LIBS += [
+    "crypt32",
+    "ntdll",
+    "version",
+    "wintrust",
+]
+
+DELAYLOAD_DLLS += [
+    "crypt32.dll",
+    "wintrust.dll",
+]
+
+EXPORTS.mozilla += [
+    "Authenticode.h",
+    "LoaderAPIInterfaces.h",
+    "ModuleLoadInfo.h",
+    "WindowsDllBlocklist.h",
+    "WindowsDllBlocklistCommon.h",
+]
+
+EXPORTS.mozilla.glue += [
+    "WindowsDllServices.h",
+]
+
+# Generate DLL Blocklists
+blocklist_header_types = ["A11y", "Launcher", "Legacy", "Test"]
+blocklist_file_leaf_tpl = "WindowsDllBlocklist{0}Defs.h"
+blocklist_files = tuple(
+    [blocklist_file_leaf_tpl.format(type) for type in blocklist_header_types]
+)
+
+GeneratedFile(
+    *blocklist_files,
+    script="gen_dll_blocklist_defs.py",
+    entry_point="gen_blocklists",
+    inputs=["WindowsDllBlocklistDefs.in"]
+)
+EXPORTS.mozilla += ["!" + hdr for hdr in blocklist_files]
+
+FINAL_LIBRARY = "mozglue"
diff --git a/mozglue/linker/BaseElf.cpp b/mozglue/linker/BaseElf.cpp
new file mode 100644
index 0000000000..78542b3875
--- /dev/null
+++ b/mozglue/linker/BaseElf.cpp
@@ -0,0 +1,190 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseElf.h"
+#include "Elfxx.h"
+#include "Logging.h"
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/RefPtr.h"
+
+using namespace Elf;
+
+unsigned long BaseElf::Hash(const char* symbol) {
+  const unsigned char* sym = reinterpret_cast<const unsigned char*>(symbol);
+  unsigned long h = 0, g;
+  while (*sym) {
+    h = (h << 4) + *sym++;
+    g = h & 0xf0000000;
+    h ^= g;
+    h ^= g >> 24;
+  }
+  return h;
+}
+
+void* BaseElf::GetSymbolPtr(const char* symbol) const {
+  return GetSymbolPtr(symbol, Hash(symbol));
+}
+
+void* BaseElf::GetSymbolPtr(const char* symbol, unsigned long hash) const {
+  const Sym* sym = GetSymbol(symbol, hash);
+  void* ptr = nullptr;
+  if (sym && sym->st_shndx != SHN_UNDEF) ptr = GetPtr(sym->st_value);
+  DEBUG_LOG("BaseElf::GetSymbolPtr(%p [\"%s\"], \"%s\") = %p",
+            reinterpret_cast<const void*>(this), GetPath(), symbol, ptr);
+  return ptr;
+}
+
+const Sym* BaseElf::GetSymbol(const char* symbol, unsigned long hash) const {
+  /* Search symbol with the buckets and chains tables.
+   * The hash computed from the symbol name gives an index in the buckets
+   * table. The corresponding value in the bucket table is an index in the
+   * symbols table and in the chains table.
+   * If the corresponding symbol in the symbols table matches, we're done.
+   * Otherwise, the corresponding value in the chains table is a new index
+   * in both tables, which corresponding symbol is tested and so on and so
+   * forth */
+  size_t bucket = hash % buckets.numElements();
+  for (size_t y = buckets[bucket]; y != STN_UNDEF; y = chains[y]) {
+    if (strcmp(symbol, strtab.GetStringAt(symtab[y].st_name))) continue;
+    return &symtab[y];
+  }
+  return nullptr;
+}
+
+bool BaseElf::Contains(void* addr) const { return base.Contains(addr); }
+
+#ifdef __ARM_EABI__
+const void* BaseElf::FindExidx(int* pcount) const {
+  if (arm_exidx) {
+    *pcount = arm_exidx.numElements();
+    return arm_exidx;
+  }
+  *pcount = 0;
+  return nullptr;
+}
+#endif
+
+already_AddRefed<LibHandle> LoadedElf::Create(const char* path,
+                                              void* base_addr) {
+  DEBUG_LOG("LoadedElf::Create(\"%s\", %p) = ...", path, base_addr);
+
+  uint8_t mapped;
+  /* If the page is not mapped, mincore returns an error. If base_addr is
+   * nullptr, as would happen if the corresponding binary is prelinked with
+   * the prelink look (but not with the android apriori tool), no page being
+   * mapped there (right?), mincore returns an error, too, which makes
+   * prelinked libraries on glibc unsupported. This is not an interesting
+   * use case for now, so don't try supporting that case.
+   */
+  if (mincore(const_cast<void*>(base_addr), PageSize(), &mapped))
+    return nullptr;
+
+  RefPtr<LoadedElf> elf = new LoadedElf(path);
+
+  const Ehdr* ehdr = Ehdr::validate(base_addr);
+  if (!ehdr) return nullptr;
+
+  Addr min_vaddr = (Addr)-1;  // We want to find the lowest and biggest
+  Addr max_vaddr = 0;         // virtual address used by this Elf.
+  const Phdr* dyn = nullptr;
+#ifdef __ARM_EABI__
+  const Phdr* arm_exidx_phdr = nullptr;
+#endif
+
+  Array<Phdr> phdrs(reinterpret_cast<const char*>(ehdr) + ehdr->e_phoff,
+                    ehdr->e_phnum);
+  for (auto phdr = phdrs.begin(); phdr < phdrs.end(); ++phdr) {
+    switch (phdr->p_type) {
+      case PT_LOAD:
+        if (phdr->p_vaddr < min_vaddr) min_vaddr = phdr->p_vaddr;
+        if (max_vaddr < phdr->p_vaddr + phdr->p_memsz)
+          max_vaddr = phdr->p_vaddr + phdr->p_memsz;
+        break;
+      case PT_DYNAMIC:
+        dyn = &*phdr;
+        break;
+#ifdef __ARM_EABI__
+      case PT_ARM_EXIDX:
+        /* We cannot initialize arm_exidx here
+           because we don't have a base yet */
+        arm_exidx_phdr = &*phdr;
+        break;
+#endif
+    }
+  }
+
+  /* If the lowest PT_LOAD virtual address in headers is not 0, then the ELF
+   * is either prelinked or a non-PIE executable. The former case is not
+   * possible, because base_addr would be nullptr and the mincore test above
+   * would already have made us return.
+   * For a non-PIE executable, PT_LOADs contain absolute addresses, so in
+   * practice, this means min_vaddr should be equal to base_addr. max_vaddr
+   * can thus be adjusted accordingly.
+   */
+  if (min_vaddr != 0) {
+    void* min_vaddr_ptr =
+        reinterpret_cast<void*>(static_cast<uintptr_t>(min_vaddr));
+    if (min_vaddr_ptr != base_addr) {
+      LOG("%s: %p != %p", elf->GetPath(), min_vaddr_ptr, base_addr);
+      return nullptr;
+    }
+    max_vaddr -= min_vaddr;
+  }
+  if (!dyn) {
+    LOG("%s: No PT_DYNAMIC segment found", elf->GetPath());
+    return nullptr;
+  }
+
+  elf->base.Assign(base_addr, max_vaddr);
+
+  if (!elf->InitDyn(dyn)) return nullptr;
+
+#ifdef __ARM_EABI__
+  if (arm_exidx_phdr)
+    elf->arm_exidx.InitSize(elf->GetPtr(arm_exidx_phdr->p_vaddr),
+                            arm_exidx_phdr->p_memsz);
+#endif
+
+  DEBUG_LOG("LoadedElf::Create(\"%s\", %p) = %p", path, base_addr,
+            static_cast<void*>(elf));
+
+  ElfLoader::Singleton.Register(elf);
+  return elf.forget();
+}
+
+bool LoadedElf::InitDyn(const Phdr* pt_dyn) {
+  Array<Dyn> dyns;
+  dyns.InitSize(GetPtr<Dyn>(pt_dyn->p_vaddr), pt_dyn->p_filesz);
+
+  size_t symnum = 0;
+  for (auto dyn = dyns.begin(); dyn < dyns.end() && dyn->d_tag; ++dyn) {
+    switch (dyn->d_tag) {
+      case DT_HASH: {
+        DEBUG_LOG("%s 0x%08" PRIxPTR, "DT_HASH", uintptr_t(dyn->d_un.d_val));
+        const Elf::Word* hash_table_header = GetPtr<Elf::Word>(dyn->d_un.d_ptr);
+        symnum = hash_table_header[1];
+        buckets.Init(&hash_table_header[2], hash_table_header[0]);
+        chains.Init(&*buckets.end());
+      } break;
+      case DT_STRTAB:
+        DEBUG_LOG("%s 0x%08" PRIxPTR, "DT_STRTAB", uintptr_t(dyn->d_un.d_val));
+        strtab.Init(GetPtr(dyn->d_un.d_ptr));
+        break;
+      case DT_SYMTAB:
+        DEBUG_LOG("%s 0x%08" PRIxPTR, "DT_SYMTAB", uintptr_t(dyn->d_un.d_val));
+        symtab.Init(GetPtr(dyn->d_un.d_ptr));
+        break;
+    }
+  }
+  if (!buckets || !symnum) {
+    ERROR("%s: Missing or broken DT_HASH", GetPath());
+  } else if (!strtab) {
+    ERROR("%s: Missing DT_STRTAB", GetPath());
+  } else if (!symtab) {
+    ERROR("%s: Missing DT_SYMTAB", GetPath());
+  } else {
+    return true;
+  }
+  return false;
+}
diff --git a/mozglue/linker/BaseElf.h b/mozglue/linker/BaseElf.h
new file mode 100644
index 0000000000..9569dbc579
--- /dev/null
+++ b/mozglue/linker/BaseElf.h
@@ -0,0 +1,128 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BaseElf_h
+#define BaseElf_h
+
+#include "ElfLoader.h"
+#include "Elfxx.h"
+
+/**
+ * Base class for ELF libraries. This class includes things that will be
+ * common between SystemElfs and CustomElfs.
+ */
+class BaseElf : public LibHandle {
+ public:
+  /**
+   * Hash function for symbol lookup, as defined in ELF standard for System V.
+   */
+  static unsigned long Hash(const char* symbol);
+
+  /**
+   * Returns the address corresponding to the given symbol name (with a
+   * pre-computed hash).
+   */
+  void* GetSymbolPtr(const char* symbol, unsigned long hash) const;
+
+  /**
+   * Returns a pointer to the Elf Symbol in the Dynamic Symbol table
+   * corresponding to the given symbol name (with a pre-computed hash).
+   */
+  const Elf::Sym* GetSymbol(const char* symbol, unsigned long hash) const;
+
+  explicit BaseElf(const char* path, Mappable* mappable = nullptr)
+      : LibHandle(path), mappable(mappable) {}
+
+ protected:
+  /**
+   * Inherited from LibHandle. Those are temporary and are not supposed to
+   * be used.
+   */
+  virtual void* GetSymbolPtr(const char* symbol) const;
+  virtual bool Contains(void* addr) const;
+  virtual void* GetBase() const { return GetPtr(0); }
+
+#ifdef __ARM_EABI__
+  virtual const void* FindExidx(int* pcount) const;
+#endif
+
+  virtual Mappable* GetMappable() const { return NULL; };
+
+ public:
+  /* private: */
+  /**
+   * Returns a pointer relative to the base address where the library is
+   * loaded.
+   */
+  void* GetPtr(const Elf::Addr offset) const {
+    if (reinterpret_cast<void*>(offset) > base)
+      return reinterpret_cast<void*>(offset);
+    return base + offset;
+  }
+
+  /**
+   * Like the above, but returns a typed (const) pointer
+   */
+  template <typename T>
+  const T* GetPtr(const Elf::Addr offset) const {
+    if (reinterpret_cast<void*>(offset) > base)
+      return reinterpret_cast<const T*>(offset);
+    return reinterpret_cast<const T*>(base + offset);
+  }
+
+  /* Appropriated Mappable */
+  /* /!\ we rely on this being nullptr for BaseElf instances, but not
+   * CustomElf instances. */
+  RefPtr<Mappable> mappable;
+
+  /* Base address where the library is loaded */
+  MappedPtr base;
+
+  /* Buckets and chains for the System V symbol hash table */
+  Array<Elf::Word> buckets;
+  UnsizedArray<Elf::Word> chains;
+
+  /* protected: */
+  /* String table */
+  Elf::Strtab strtab;
+
+  /* Symbol table */
+  UnsizedArray<Elf::Sym> symtab;
+
+#ifdef __ARM_EABI__
+  /* ARM.exidx information used by FindExidx */
+  Array<uint32_t[2]> arm_exidx;
+#endif
+};
+
+/**
+ * Class for ELF libraries that already loaded in memory.
+ */
+class LoadedElf : public BaseElf {
+ public:
+  /**
+   * Returns a LoadedElf corresponding to the already loaded ELF
+   * at the given base address.
+   */
+  static already_AddRefed<LibHandle> Create(const char* path, void* base_addr);
+
+ private:
+  explicit LoadedElf(const char* path) : BaseElf(path) {}
+
+  ~LoadedElf() {
+    /* Avoid base's destructor unmapping something that doesn't actually
+     * belong to it. */
+    base.release();
+    ElfLoader::Singleton.Forget(this);
+  }
+
+  /**
+   * Initializes the library according to information found in the given
+   * PT_DYNAMIC header.
+   * Returns whether this succeeded or failed.
+   */
+  bool InitDyn(const Elf::Phdr* pt_dyn);
+};
+
+#endif /* BaseElf_h */
diff --git a/mozglue/linker/CustomElf.cpp b/mozglue/linker/CustomElf.cpp
new file mode 100644
index 0000000000..5d44b34d22
--- /dev/null
+++ b/mozglue/linker/CustomElf.cpp
@@ -0,0 +1,680 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <cstring>
+#include <sys/mman.h>
+#include <vector>
+#include <dlfcn.h>
+#include <signal.h>
+#include <string.h>
+#include "CustomElf.h"
+#include "BaseElf.h"
+#include "Mappable.h"
+#include "Logging.h"
+#include "mozilla/IntegerPrintfMacros.h"
+
+using namespace Elf;
+
+/* TODO: Fill ElfLoader::Singleton.lastError on errors. */
+
+const Ehdr* Ehdr::validate(const void* buf) {
+  if (!buf || buf == MAP_FAILED) return nullptr;
+
+  const Ehdr* ehdr = reinterpret_cast<const Ehdr*>(buf);
+
+  /* Only support ELF executables or libraries for the host system */
+  if (memcmp(ELFMAG, &ehdr->e_ident, SELFMAG) ||
+      ehdr->e_ident[EI_CLASS] != ELFCLASS ||
+      ehdr->e_ident[EI_DATA] != ELFDATA || ehdr->e_ident[EI_VERSION] != 1 ||
+      (ehdr->e_ident[EI_OSABI] != ELFOSABI &&
+       ehdr->e_ident[EI_OSABI] != ELFOSABI_NONE) ||
+#ifdef EI_ABIVERSION
+      ehdr->e_ident[EI_ABIVERSION] != ELFABIVERSION ||
+#endif
+      (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
+      ehdr->e_machine != ELFMACHINE || ehdr->e_version != 1 ||
+      ehdr->e_phentsize != sizeof(Phdr))
+    return nullptr;
+
+  return ehdr;
+}
+
+namespace {
+
+void debug_phdr(const char* type, const Phdr* phdr) {
+  DEBUG_LOG("%s @0x%08" PRIxPTR
+            " ("
+            "filesz: 0x%08" PRIxPTR
+            ", "
+            "memsz: 0x%08" PRIxPTR
+            ", "
+            "offset: 0x%08" PRIxPTR
+            ", "
+            "flags: %c%c%c)",
+            type, uintptr_t(phdr->p_vaddr), uintptr_t(phdr->p_filesz),
+            uintptr_t(phdr->p_memsz), uintptr_t(phdr->p_offset),
+            phdr->p_flags & PF_R ? 'r' : '-', phdr->p_flags & PF_W ? 'w' : '-',
+            phdr->p_flags & PF_X ? 'x' : '-');
+}
+
+static int p_flags_to_mprot(Word flags) {
+  return ((flags & PF_X) ? PROT_EXEC : 0) | ((flags & PF_W) ? PROT_WRITE : 0) |
+         ((flags & PF_R) ? PROT_READ : 0);
+}
+
+} /* anonymous namespace */
+
+/**
+ * RAII wrapper for a mapping of the first page off a Mappable object.
+ * This calls Mappable::munmap instead of system munmap.
+ */
+class Mappable1stPagePtr : public GenericMappedPtr<Mappable1stPagePtr> {
+ public:
+  explicit Mappable1stPagePtr(Mappable* mappable)
+      : GenericMappedPtr<Mappable1stPagePtr>(
+            mappable->mmap(nullptr, PageSize(), PROT_READ, MAP_PRIVATE, 0)),
+        mappable(mappable) {}
+
+ private:
+  friend class GenericMappedPtr<Mappable1stPagePtr>;
+  void munmap(void* buf, size_t length) { mappable->munmap(buf, length); }
+
+  RefPtr<Mappable> mappable;
+};
+
+already_AddRefed<LibHandle> CustomElf::Load(Mappable* mappable,
+                                            const char* path, int flags) {
+  DEBUG_LOG("CustomElf::Load(\"%s\", 0x%x) = ...", path, flags);
+  if (!mappable) return nullptr;
+  /* Keeping a RefPtr of the CustomElf is going to free the appropriate
+   * resources when returning nullptr */
+  RefPtr<CustomElf> elf = new CustomElf(mappable, path);
+  /* Map the first page of the Elf object to access Elf and program headers */
+  Mappable1stPagePtr ehdr_raw(mappable);
+  if (ehdr_raw == MAP_FAILED) return nullptr;
+
+  const Ehdr* ehdr = Ehdr::validate(ehdr_raw);
+  if (!ehdr) return nullptr;
+
+  /* Scan Elf Program Headers and gather some information about them */
+  std::vector<const Phdr*> pt_loads;
+  Addr min_vaddr = (Addr)-1;  // We want to find the lowest and biggest
+  Addr max_vaddr = 0;         // virtual address used by this Elf.
+  const Phdr* dyn = nullptr;
+
+  const Phdr* first_phdr = reinterpret_cast<const Phdr*>(
+      reinterpret_cast<const char*>(ehdr) + ehdr->e_phoff);
+  const Phdr* end_phdr = &first_phdr[ehdr->e_phnum];
+#ifdef __ARM_EABI__
+  const Phdr* arm_exidx_phdr = nullptr;
+#endif
+
+  for (const Phdr* phdr = first_phdr; phdr < end_phdr; phdr++) {
+    switch (phdr->p_type) {
+      case PT_LOAD:
+        debug_phdr("PT_LOAD", phdr);
+        pt_loads.push_back(phdr);
+        if (phdr->p_vaddr < min_vaddr) min_vaddr = phdr->p_vaddr;
+        if (max_vaddr < phdr->p_vaddr + phdr->p_memsz)
+          max_vaddr = phdr->p_vaddr + phdr->p_memsz;
+        break;
+      case PT_DYNAMIC:
+        debug_phdr("PT_DYNAMIC", phdr);
+        if (!dyn) {
+          dyn = phdr;
+        } else {
+          ERROR("%s: Multiple PT_DYNAMIC segments detected", elf->GetPath());
+          return nullptr;
+        }
+        break;
+      case PT_TLS:
+        debug_phdr("PT_TLS", phdr);
+        if (phdr->p_memsz) {
+          ERROR("%s: TLS is not supported", elf->GetPath());
+          return nullptr;
+        }
+        break;
+      case PT_GNU_STACK:
+        debug_phdr("PT_GNU_STACK", phdr);
+// Skip on Android until bug 706116 is fixed
+#ifndef ANDROID
+        if (phdr->p_flags & PF_X) {
+          ERROR("%s: Executable stack is not supported", elf->GetPath());
+          return nullptr;
+        }
+#endif
+        break;
+#ifdef __ARM_EABI__
+      case PT_ARM_EXIDX:
+        /* We cannot initialize arm_exidx here
+           because we don't have a base yet */
+        arm_exidx_phdr = phdr;
+        break;
+#endif
+      default:
+        DEBUG_LOG("%s: Program header type #%d not handled", elf->GetPath(),
+                  phdr->p_type);
+    }
+  }
+
+  if (min_vaddr != 0) {
+    ERROR("%s: Unsupported minimal virtual address: 0x%08" PRIxPTR,
+          elf->GetPath(), uintptr_t(min_vaddr));
+    return nullptr;
+  }
+  if (!dyn) {
+    ERROR("%s: No PT_DYNAMIC segment found", elf->GetPath());
+    return nullptr;
+  }
+
+  /* Reserve enough memory to map the complete virtual address space for this
+   * library.
+   * As we are using the base address from here to mmap something else with
+   * MAP_FIXED | MAP_SHARED, we need to make sure these mmaps will work. For
+   * instance, on armv6, MAP_SHARED mappings require a 16k alignment, but mmap
+   * MAP_PRIVATE only returns a 4k aligned address. So we first get a base
+   * address with MAP_SHARED, which guarantees the kernel returns an address
+   * that we'll be able to use with MAP_FIXED, and then remap MAP_PRIVATE at
+   * the same address, because of some bad side effects of keeping it as
+   * MAP_SHARED. */
+  elf->base.Assign(MemoryRange::mmap(nullptr, max_vaddr, PROT_NONE,
+                                     MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+  if ((elf->base == MAP_FAILED) ||
+      (mmap(elf->base, max_vaddr, PROT_NONE,
+            MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != elf->base)) {
+    ERROR("%s: Failed to mmap", elf->GetPath());
+    return nullptr;
+  }
+
+  /* Load and initialize library */
+  for (std::vector<const Phdr*>::iterator it = pt_loads.begin();
+       it < pt_loads.end(); ++it)
+    if (!elf->LoadSegment(*it)) return nullptr;
+
+  /* We're not going to mmap anymore */
+  mappable->finalize();
+
+  elf->l_addr = elf->base;
+  elf->l_name = elf->GetPath();
+  elf->l_ld = elf->GetPtr<Dyn>(dyn->p_vaddr);
+  ElfLoader::Singleton.Register(elf);
+
+  if (!elf->InitDyn(dyn)) return nullptr;
+
+  if (elf->has_text_relocs) {
+    for (std::vector<const Phdr*>::iterator it = pt_loads.begin();
+         it < pt_loads.end(); ++it)
+      mprotect(PageAlignedPtr(elf->GetPtr((*it)->p_vaddr)),
+               PageAlignedEndPtr((*it)->p_memsz),
+               p_flags_to_mprot((*it)->p_flags) | PROT_WRITE);
+  }
+
+  if (!elf->Relocate() || !elf->RelocateJumps()) return nullptr;
+
+  if (elf->has_text_relocs) {
+    for (std::vector<const Phdr*>::iterator it = pt_loads.begin();
+         it < pt_loads.end(); ++it)
+      mprotect(PageAlignedPtr(elf->GetPtr((*it)->p_vaddr)),
+               PageAlignedEndPtr((*it)->p_memsz),
+               p_flags_to_mprot((*it)->p_flags));
+  }
+
+  if (!elf->CallInit()) return nullptr;
+
+#ifdef __ARM_EABI__
+  if (arm_exidx_phdr)
+    elf->arm_exidx.InitSize(elf->GetPtr(arm_exidx_phdr->p_vaddr),
+                            arm_exidx_phdr->p_memsz);
+#endif
+
+  DEBUG_LOG("CustomElf::Load(\"%s\", 0x%x) = %p", path, flags,
+            static_cast<void*>(elf));
+  return elf.forget();
+}
+
+CustomElf::~CustomElf() {
+  DEBUG_LOG("CustomElf::~CustomElf(%p [\"%s\"])", reinterpret_cast<void*>(this),
+            GetPath());
+  CallFini();
+  /* Normally, __cxa_finalize is called by the .fini function. However,
+   * Android NDK before r6b doesn't do that. Our wrapped cxa_finalize only
+   * calls destructors once, so call it in all cases. */
+  ElfLoader::__wrap_cxa_finalize(this);
+  ElfLoader::Singleton.Forget(this);
+}
+
+void* CustomElf::GetSymbolPtrInDeps(const char* symbol) const {
+  /* Resolve dlopen and related functions to point to ours */
+  if (symbol[0] == 'd' && symbol[1] == 'l') {
+    if (strcmp(symbol + 2, "open") == 0) return FunctionPtr(__wrap_dlopen);
+    if (strcmp(symbol + 2, "error") == 0) return FunctionPtr(__wrap_dlerror);
+    if (strcmp(symbol + 2, "close") == 0) return FunctionPtr(__wrap_dlclose);
+    if (strcmp(symbol + 2, "sym") == 0) return FunctionPtr(__wrap_dlsym);
+    if (strcmp(symbol + 2, "addr") == 0) return FunctionPtr(__wrap_dladdr);
+    if (strcmp(symbol + 2, "_iterate_phdr") == 0)
+      return FunctionPtr(__wrap_dl_iterate_phdr);
+  } else if (symbol[0] == '_' && symbol[1] == '_') {
+    /* Resolve a few C++ ABI specific functions to point to ours */
+#ifdef __ARM_EABI__
+    if (strcmp(symbol + 2, "aeabi_atexit") == 0)
+      return FunctionPtr(&ElfLoader::__wrap_aeabi_atexit);
+#else
+    if (strcmp(symbol + 2, "cxa_atexit") == 0)
+      return FunctionPtr(&ElfLoader::__wrap_cxa_atexit);
+#endif
+    if (strcmp(symbol + 2, "cxa_finalize") == 0)
+      return FunctionPtr(&ElfLoader::__wrap_cxa_finalize);
+    if (strcmp(symbol + 2, "dso_handle") == 0)
+      return const_cast<CustomElf*>(this);
+#ifdef __ARM_EABI__
+    if (strcmp(symbol + 2, "gnu_Unwind_Find_exidx") == 0)
+      return FunctionPtr(__wrap___gnu_Unwind_Find_exidx);
+#endif
+  } else if (symbol[0] == 's' && symbol[1] == 'i') {
+    if (strcmp(symbol + 2, "gnal") == 0) return FunctionPtr(signal);
+    if (strcmp(symbol + 2, "gaction") == 0) return FunctionPtr(sigaction);
+  }
+
+  void* sym;
+
+  unsigned long hash = Hash(symbol);
+
+  /* self_elf should never be NULL, but better safe than sorry. */
+  if (ElfLoader::Singleton.self_elf) {
+    /* We consider the library containing this code a permanent LD_PRELOAD,
+     * so, check if the symbol exists here first. */
+    sym = static_cast<BaseElf*>(ElfLoader::Singleton.self_elf.get())
+              ->GetSymbolPtr(symbol, hash);
+    if (sym) return sym;
+  }
+
+  /* Then search the symbol in our dependencies. Since we already searched in
+   * libraries the system linker loaded, skip those (on glibc systems). We
+   * also assume the symbol is to be found in one of the dependent libraries
+   * directly, not in their own dependent libraries. Building libraries with
+   * --no-allow-shlib-undefined ensures such indirect symbol dependency don't
+   * happen. */
+  for (std::vector<RefPtr<LibHandle> >::const_iterator it =
+           dependencies.begin();
+       it < dependencies.end(); ++it) {
+    /* Skip if it's the library containing this code, since we've already
+     * looked at it above. */
+    if (*it == ElfLoader::Singleton.self_elf) continue;
+    if (BaseElf* be = (*it)->AsBaseElf()) {
+      sym = be->GetSymbolPtr(symbol, hash);
+    } else {
+      sym = (*it)->GetSymbolPtr(symbol);
+    }
+    if (sym) return sym;
+  }
+  return nullptr;
+}
+
+bool CustomElf::LoadSegment(const Phdr* pt_load) const {
+  if (pt_load->p_type != PT_LOAD) {
+    DEBUG_LOG("%s: Elf::LoadSegment only takes PT_LOAD program headers",
+              GetPath());
+    return false;
+    ;
+  }
+
+  int prot = p_flags_to_mprot(pt_load->p_flags);
+
+  /* Mmap at page boundary */
+  Addr align = PageSize();
+  Addr align_offset;
+  void *mapped, *where;
+  do {
+    align_offset = pt_load->p_vaddr - AlignedPtr(pt_load->p_vaddr, align);
+    where = GetPtr(pt_load->p_vaddr - align_offset);
+    DEBUG_LOG("%s: Loading segment @%p %c%c%c", GetPath(), where,
+              prot & PROT_READ ? 'r' : '-', prot & PROT_WRITE ? 'w' : '-',
+              prot & PROT_EXEC ? 'x' : '-');
+    mapped = mappable->mmap(where, pt_load->p_filesz + align_offset, prot,
+                            MAP_PRIVATE | MAP_FIXED,
+                            pt_load->p_offset - align_offset);
+    if ((mapped != MAP_FAILED) || (pt_load->p_vaddr == 0) ||
+        (pt_load->p_align == align))
+      break;
+    /* The virtual address space for the library is properly aligned at
+     * 16k on ARMv6 (see CustomElf::Load), and so is the first segment
+     * (p_vaddr == 0). But subsequent segments may not be 16k aligned
+     * and fail to mmap. In such case, try to mmap again at the p_align
+     * boundary instead of page boundary. */
+    DEBUG_LOG("%s: Failed to mmap, retrying", GetPath());
+    align = pt_load->p_align;
+  } while (1);
+
+  if (mapped != where) {
+    if (mapped == MAP_FAILED) {
+      ERROR("%s: Failed to mmap", GetPath());
+    } else {
+      ERROR("%s: Didn't map at the expected location (wanted: %p, got: %p)",
+            GetPath(), where, mapped);
+    }
+    return false;
+  }
+
+  /* When p_memsz is greater than p_filesz, we need to have nulled out memory
+   * after p_filesz and before p_memsz.
+   * Above the end of the last page, and up to p_memsz, we already have nulled
+   * out memory because we mapped anonymous memory on the whole library virtual
+   * address space. We just need to adjust this anonymous memory protection
+   * flags. */
+  if (pt_load->p_memsz > pt_load->p_filesz) {
+    Addr file_end = pt_load->p_vaddr + pt_load->p_filesz;
+    Addr mem_end = pt_load->p_vaddr + pt_load->p_memsz;
+    Addr next_page = PageAlignedEndPtr(file_end);
+    if (next_page > file_end) {
+      void* ptr = GetPtr(file_end);
+      memset(ptr, 0, next_page - file_end);
+    }
+    if (mem_end > next_page) {
+      if (mprotect(GetPtr(next_page), mem_end - next_page, prot) < 0) {
+        ERROR("%s: Failed to mprotect", GetPath());
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+namespace {
+
+void debug_dyn(const char* type, const Dyn* dyn) {
+  DEBUG_LOG("%s 0x%08" PRIxPTR, type, uintptr_t(dyn->d_un.d_val));
+}
+
+} /* anonymous namespace */
+
+bool CustomElf::InitDyn(const Phdr* pt_dyn) {
+  /* Scan PT_DYNAMIC segment and gather some information */
+  const Dyn* first_dyn = GetPtr<Dyn>(pt_dyn->p_vaddr);
+  const Dyn* end_dyn = GetPtr<Dyn>(pt_dyn->p_vaddr + pt_dyn->p_filesz);
+  std::vector<Word> dt_needed;
+  size_t symnum = 0;
+  for (const Dyn* dyn = first_dyn; dyn < end_dyn && dyn->d_tag; dyn++) {
+    switch (dyn->d_tag) {
+      case DT_NEEDED:
+        debug_dyn("DT_NEEDED", dyn);
+        dt_needed.push_back(dyn->d_un.d_val);
+        break;
+      case DT_HASH: {
+        debug_dyn("DT_HASH", dyn);
+        const Word* hash_table_header = GetPtr<Word>(dyn->d_un.d_ptr);
+        symnum = hash_table_header[1];
+        buckets.Init(&hash_table_header[2], hash_table_header[0]);
+        chains.Init(&*buckets.end());
+      } break;
+      case DT_STRTAB:
+        debug_dyn("DT_STRTAB", dyn);
+        strtab.Init(GetPtr(dyn->d_un.d_ptr));
+        break;
+      case DT_SYMTAB:
+        debug_dyn("DT_SYMTAB", dyn);
+        symtab.Init(GetPtr(dyn->d_un.d_ptr));
+        break;
+      case DT_SYMENT:
+        debug_dyn("DT_SYMENT", dyn);
+        if (dyn->d_un.d_val != sizeof(Sym)) {
+          ERROR("%s: Unsupported DT_SYMENT", GetPath());
+          return false;
+        }
+        break;
+      case DT_TEXTREL:
+        if (strcmp("libflashplayer.so", GetName()) == 0) {
+          has_text_relocs = true;
+        } else {
+          ERROR("%s: Text relocations are not supported", GetPath());
+          return false;
+        }
+        break;
+      case DT_STRSZ: /* Ignored */
+        debug_dyn("DT_STRSZ", dyn);
+        break;
+      case UNSUPPORTED_RELOC():
+      case UNSUPPORTED_RELOC(SZ):
+      case UNSUPPORTED_RELOC(ENT):
+        ERROR("%s: Unsupported relocations", GetPath());
+        return false;
+      case RELOC():
+        debug_dyn(STR_RELOC(), dyn);
+        relocations.Init(GetPtr(dyn->d_un.d_ptr));
+        break;
+      case RELOC(SZ):
+        debug_dyn(STR_RELOC(SZ), dyn);
+        relocations.InitSize(dyn->d_un.d_val);
+        break;
+      case RELOC(ENT):
+        debug_dyn(STR_RELOC(ENT), dyn);
+        if (dyn->d_un.d_val != sizeof(Reloc)) {
+          ERROR("%s: Unsupported DT_RELENT", GetPath());
+          return false;
+        }
+        break;
+      case DT_JMPREL:
+        debug_dyn("DT_JMPREL", dyn);
+        jumprels.Init(GetPtr(dyn->d_un.d_ptr));
+        break;
+      case DT_PLTRELSZ:
+        debug_dyn("DT_PLTRELSZ", dyn);
+        jumprels.InitSize(dyn->d_un.d_val);
+        break;
+      case DT_PLTGOT:
+        debug_dyn("DT_PLTGOT", dyn);
+        break;
+      case DT_INIT:
+        debug_dyn("DT_INIT", dyn);
+        init = dyn->d_un.d_ptr;
+        break;
+      case DT_INIT_ARRAY:
+        debug_dyn("DT_INIT_ARRAY", dyn);
+        init_array.Init(GetPtr(dyn->d_un.d_ptr));
+        break;
+      case DT_INIT_ARRAYSZ:
+        debug_dyn("DT_INIT_ARRAYSZ", dyn);
+        init_array.InitSize(dyn->d_un.d_val);
+        break;
+      case DT_FINI:
+        debug_dyn("DT_FINI", dyn);
+        fini = dyn->d_un.d_ptr;
+        break;
+      case DT_FINI_ARRAY:
+        debug_dyn("DT_FINI_ARRAY", dyn);
+        fini_array.Init(GetPtr(dyn->d_un.d_ptr));
+        break;
+      case DT_FINI_ARRAYSZ:
+        debug_dyn("DT_FINI_ARRAYSZ", dyn);
+        fini_array.InitSize(dyn->d_un.d_val);
+        break;
+      case DT_PLTREL:
+        if (dyn->d_un.d_val != RELOC()) {
+          ERROR("%s: Error: DT_PLTREL is not " STR_RELOC(), GetPath());
+          return false;
+        }
+        break;
+      case DT_FLAGS: {
+        Addr flags = dyn->d_un.d_val;
+        /* Treat as a DT_TEXTREL tag */
+        if (flags & DF_TEXTREL) {
+          if (strcmp("libflashplayer.so", GetName()) == 0) {
+            has_text_relocs = true;
+          } else {
+            ERROR("%s: Text relocations are not supported", GetPath());
+            return false;
+          }
+        }
+        /* we can treat this like having a DT_SYMBOLIC tag */
+        flags &= ~DF_SYMBOLIC;
+        if (flags)
+          WARN("%s: unhandled flags #%" PRIxPTR " not handled", GetPath(),
+               uintptr_t(flags));
+      } break;
+      case DT_SONAME:    /* Should match GetName(), but doesn't matter */
+      case DT_SYMBOLIC:  /* Indicates internal symbols should be looked up in
+                          * the library itself first instead of the executable,
+                          * which is actually what this linker does by default */
+      case RELOC(COUNT): /* Indicates how many relocations are relative, which
+                          * is usually used to skip relocations on prelinked
+                          * libraries. They are not supported anyways. */
+      case UNSUPPORTED_RELOC(COUNT): /* This should error out, but it doesn't
+                                      * really matter. */
+      case DT_FLAGS_1: /* Additional linker-internal flags that we don't care
+                        * about. See DF_1_* values in src/include/elf/common.h
+                        * in binutils. */
+      case DT_VERSYM:  /* DT_VER* entries are used for symbol versioning, which
+                        */
+      case DT_VERDEF:  /* this linker doesn't support yet. */
+      case DT_VERDEFNUM:
+      case DT_VERNEED:
+      case DT_VERNEEDNUM:
+        /* Ignored */
+        break;
+      default:
+        WARN("%s: dynamic header type #%" PRIxPTR " not handled", GetPath(),
+             uintptr_t(dyn->d_tag));
+    }
+  }
+
+  if (!buckets || !symnum) {
+    ERROR("%s: Missing or broken DT_HASH", GetPath());
+    return false;
+  }
+  if (!strtab) {
+    ERROR("%s: Missing DT_STRTAB", GetPath());
+    return false;
+  }
+  if (!symtab) {
+    ERROR("%s: Missing DT_SYMTAB", GetPath());
+    return false;
+  }
+
+  /* Load dependent libraries */
+  for (size_t i = 0; i < dt_needed.size(); i++) {
+    const char* name = strtab.GetStringAt(dt_needed[i]);
+    RefPtr<LibHandle> handle =
+        ElfLoader::Singleton.Load(name, RTLD_GLOBAL | RTLD_LAZY, this);
+    if (!handle) return false;
+    dependencies.push_back(handle);
+  }
+
+  return true;
+}
+
+bool CustomElf::Relocate() {
+  DEBUG_LOG("Relocate %s @%p", GetPath(), static_cast<void*>(base));
+  uint32_t symtab_index = (uint32_t)-1;
+  void* symptr = nullptr;
+  for (Array<Reloc>::iterator rel = relocations.begin();
+       rel < relocations.end(); ++rel) {
+    /* Location of the relocation */
+    void* ptr = GetPtr(rel->r_offset);
+
+    /* R_*_RELATIVE relocations apply directly at the given location */
+    if (ELF_R_TYPE(rel->r_info) == R_RELATIVE) {
+      *(void**)ptr = GetPtr(rel->GetAddend(base));
+      continue;
+    }
+    /* Other relocation types need a symbol resolution */
+    /* Avoid symbol resolution when it's the same symbol as last iteration */
+    if (symtab_index != ELF_R_SYM(rel->r_info)) {
+      symtab_index = ELF_R_SYM(rel->r_info);
+      const Sym sym = symtab[symtab_index];
+      if (sym.st_shndx != SHN_UNDEF) {
+        symptr = GetPtr(sym.st_value);
+      } else {
+        /* TODO: handle symbol resolving to nullptr vs. being undefined. */
+        symptr = GetSymbolPtrInDeps(strtab.GetStringAt(sym.st_name));
+      }
+    }
+
+    if (symptr == nullptr)
+      WARN("%s: Relocation to NULL @0x%08" PRIxPTR, GetPath(),
+           uintptr_t(rel->r_offset));
+
+    /* Apply relocation */
+    switch (ELF_R_TYPE(rel->r_info)) {
+      case R_GLOB_DAT:
+        /* R_*_GLOB_DAT relocations simply use the symbol value */
+        *(void**)ptr = symptr;
+        break;
+      case R_ABS:
+        /* R_*_ABS* relocations add the relocation added to the symbol value */
+        *(const char**)ptr = (const char*)symptr + rel->GetAddend(base);
+        break;
+      default:
+        ERROR("%s: Unsupported relocation type: 0x%" PRIxPTR, GetPath(),
+              uintptr_t(ELF_R_TYPE(rel->r_info)));
+        return false;
+    }
+  }
+  return true;
+}
+
+bool CustomElf::RelocateJumps() {
+  /* TODO: Dynamic symbol resolution */
+  for (Array<Reloc>::iterator rel = jumprels.begin(); rel < jumprels.end();
+       ++rel) {
+    /* Location of the relocation */
+    void* ptr = GetPtr(rel->r_offset);
+
+    /* Only R_*_JMP_SLOT relocations are expected */
+    if (ELF_R_TYPE(rel->r_info) != R_JMP_SLOT) {
+      ERROR("%s: Jump relocation type mismatch", GetPath());
+      return false;
+    }
+
+    /* TODO: Avoid code duplication with the relocations above */
+    const Sym sym = symtab[ELF_R_SYM(rel->r_info)];
+    void* symptr;
+    if (sym.st_shndx != SHN_UNDEF)
+      symptr = GetPtr(sym.st_value);
+    else
+      symptr = GetSymbolPtrInDeps(strtab.GetStringAt(sym.st_name));
+
+    if (symptr == nullptr) {
+      if (ELF_ST_BIND(sym.st_info) == STB_WEAK) {
+        WARN("%s: Relocation to NULL @0x%08" PRIxPTR " for symbol \"%s\"",
+             GetPath(), uintptr_t(rel->r_offset),
+             strtab.GetStringAt(sym.st_name));
+      } else {
+        ERROR("%s: Relocation to NULL @0x%08" PRIxPTR " for symbol \"%s\"",
+              GetPath(), uintptr_t(rel->r_offset),
+              strtab.GetStringAt(sym.st_name));
+        return false;
+      }
+    }
+    /* Apply relocation */
+    *(void**)ptr = symptr;
+  }
+  return true;
+}
+
+bool CustomElf::CallInit() {
+  if (init) CallFunction(init);
+
+  for (Array<void*>::iterator it = init_array.begin(); it < init_array.end();
+       ++it) {
+    /* Android x86 NDK wrongly puts 0xffffffff in INIT_ARRAY */
+    if (*it && *it != reinterpret_cast<void*>(-1)) CallFunction(*it);
+  }
+  initialized = true;
+  return true;
+}
+
+void CustomElf::CallFini() {
+  if (!initialized) return;
+  for (Array<void*>::reverse_iterator it = fini_array.rbegin();
+       it < fini_array.rend(); ++it) {
+    /* Android x86 NDK wrongly puts 0xffffffff in FINI_ARRAY */
+    if (*it && *it != reinterpret_cast<void*>(-1)) CallFunction(*it);
+  }
+  if (fini) CallFunction(fini);
+}
+
+Mappable* CustomElf::GetMappable() const {
+  if (!mappable) return nullptr;
+  if (mappable->GetKind() == Mappable::MAPPABLE_EXTRACT_FILE) return mappable;
+  return ElfLoader::GetMappableFromPath(GetPath());
+}
diff --git a/mozglue/linker/CustomElf.h b/mozglue/linker/CustomElf.h
new file mode 100644
index 0000000000..f7b116e9d3
--- /dev/null
+++ b/mozglue/linker/CustomElf.h
@@ -0,0 +1,147 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CustomElf_h
+#define CustomElf_h
+
+#include "ElfLoader.h"
+#include "BaseElf.h"
+#include "Logging.h"
+#include "Elfxx.h"
+
+/**
+ * Library Handle class for ELF libraries we don't let the system linker
+ * handle.
+ */
+class CustomElf : public BaseElf, private ElfLoader::link_map {
+  friend class ElfLoader;
+  friend class SEGVHandler;
+
+ public:
+  /**
+   * Returns a new CustomElf using the given file descriptor to map ELF
+   * content. The file descriptor ownership is stolen, and it will be closed
+   * in CustomElf's destructor if an instance is created, or by the Load
+   * method otherwise. The path corresponds to the file descriptor, and flags
+   * are the same kind of flags that would be given to dlopen(), though
+   * currently, none are supported and the behaviour is more or less that of
+   * RTLD_GLOBAL | RTLD_BIND_NOW.
+   */
+  static already_AddRefed<LibHandle> Load(Mappable* mappable, const char* path,
+                                          int flags);
+
+  /**
+   * Inherited from LibHandle/BaseElf
+   */
+  virtual ~CustomElf();
+
+ protected:
+  virtual Mappable* GetMappable() const;
+
+ public:
+  /**
+   * Returns the instance, casted as BaseElf. (short of a better way to do
+   * this without RTTI)
+   */
+  virtual BaseElf* AsBaseElf() { return this; }
+
+ private:
+  /**
+   * Scan dependent libraries to find the address corresponding to the
+   * given symbol name. This is used to find symbols that are undefined
+   * in the Elf object.
+   */
+  void* GetSymbolPtrInDeps(const char* symbol) const;
+
+  /**
+   * Private constructor
+   */
+  CustomElf(Mappable* mappable, const char* path)
+      : BaseElf(path, mappable),
+        link_map(),
+        init(0),
+        fini(0),
+        initialized(false),
+        has_text_relocs(false) {}
+
+  /**
+   * Loads an Elf segment defined by the given PT_LOAD header.
+   * Returns whether this succeeded or failed.
+   */
+  bool LoadSegment(const Elf::Phdr* pt_load) const;
+
+  /**
+   * Initializes the library according to information found in the given
+   * PT_DYNAMIC header.
+   * Returns whether this succeeded or failed.
+   */
+  bool InitDyn(const Elf::Phdr* pt_dyn);
+
+  /**
+   * Apply .rel.dyn/.rela.dyn relocations.
+   * Returns whether this succeeded or failed.
+   */
+  bool Relocate();
+
+  /**
+   * Apply .rel.plt/.rela.plt relocations.
+   * Returns whether this succeeded or failed.
+   */
+  bool RelocateJumps();
+
+  /**
+   * Call initialization functions (.init/.init_array)
+   * Returns true;
+   */
+  bool CallInit();
+
+  /**
+   * Call destructor functions (.fini_array/.fini)
+   * Returns whether this succeeded or failed.
+   */
+  void CallFini();
+
+  /**
+   * Call a function given a pointer to its location.
+   */
+  void CallFunction(void* ptr) const {
+    /* C++ doesn't allow direct conversion between pointer-to-object
+     * and pointer-to-function. */
+    union {
+      void* ptr;
+      void (*func)(void);
+    } f;
+    f.ptr = ptr;
+    DEBUG_LOG("%s: Calling function @%p", GetPath(), ptr);
+    f.func();
+  }
+
+  /**
+   * Call a function given a an address relative to the library base
+   */
+  void CallFunction(Elf::Addr addr) const { return CallFunction(GetPtr(addr)); }
+
+  /* List of dependent libraries */
+  std::vector<RefPtr<LibHandle> > dependencies;
+
+  /* List of .rel.dyn/.rela.dyn relocations */
+  Array<Elf::Reloc> relocations;
+
+  /* List of .rel.plt/.rela.plt relocation */
+  Array<Elf::Reloc> jumprels;
+
+  /* Relative address of the initialization and destruction functions
+   * (.init/.fini) */
+  Elf::Addr init, fini;
+
+  /* List of initialization and destruction functions
+   * (.init_array/.fini_array) */
+  Array<void*> init_array, fini_array;
+
+  bool initialized;
+
+  bool has_text_relocs;
+};
+
+#endif /* CustomElf_h */
diff --git a/mozglue/linker/ElfLoader.cpp b/mozglue/linker/ElfLoader.cpp
new file mode 100644
index 0000000000..55b113467a
--- /dev/null
+++ b/mozglue/linker/ElfLoader.cpp
@@ -0,0 +1,1360 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <string>
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+#include <dlfcn.h>
+#include <unistd.h>
+#include <errno.h>
+#include <algorithm>
+#include <fcntl.h>
+#include "ElfLoader.h"
+#include "BaseElf.h"
+#include "CustomElf.h"
+#include "Mappable.h"
+#include "Logging.h"
+#include "Utils.h"
+#include <inttypes.h>
+
+// From Utils.h
+mozilla::Atomic<size_t, mozilla::ReleaseAcquire> gPageSize;
+
+#if defined(ANDROID)
+#  include <sys/syscall.h>
+#  include <sys/system_properties.h>
+#  include <math.h>
+
+#  include <android/api-level.h>
+
+/**
+ * Return the current Android version, or 0 on failure.
+ */
+static int GetAndroidSDKVersion() {
+  static int version = 0;
+  if (version) {
+    return version;
+  }
+
+  char version_string[PROP_VALUE_MAX] = {'\0'};
+  int len = __system_property_get("ro.build.version.sdk", version_string);
+  if (len) {
+    version = static_cast<int>(strtol(version_string, nullptr, 10));
+  }
+  return version;
+}
+
+#  if __ANDROID_API__ < 8
+/* Android API < 8 doesn't provide sigaltstack */
+
+extern "C" {
+
+inline int sigaltstack(const stack_t* ss, stack_t* oss) {
+  return syscall(__NR_sigaltstack, ss, oss);
+}
+
+} /* extern "C" */
+#  endif /* __ANDROID_API__ */
+#endif   /* ANDROID */
+
+#ifdef __ARM_EABI__
+extern "C" MOZ_EXPORT const void* __gnu_Unwind_Find_exidx(void* pc, int* pcount)
+    __attribute__((weak));
+#endif
+
+/* Ideally we'd #include <link.h>, but that's a world of pain
+ * Moreover, not all versions of android support it, so we need a weak
+ * reference. */
+extern "C" MOZ_EXPORT int dl_iterate_phdr(dl_phdr_cb callback, void* data)
+    __attribute__((weak));
+
+/* Pointer to the PT_DYNAMIC section of the executable or library
+ * containing this code. */
+extern "C" Elf::Dyn _DYNAMIC[];
+
+/**
+ * dlfcn.h replacements functions
+ */
+
+void* __wrap_dlopen(const char* path, int flags) {
+#if defined(ANDROID)
+  if (GetAndroidSDKVersion() >= 23) {
+    return dlopen(path, flags);
+  }
+#endif
+
+  RefPtr<LibHandle> handle = ElfLoader::Singleton.Load(path, flags);
+  if (handle) handle->AddDirectRef();
+  return handle;
+}
+
+const char* __wrap_dlerror(void) {
+#if defined(ANDROID)
+  if (GetAndroidSDKVersion() >= 23) {
+    return dlerror();
+  }
+#endif
+
+  const char* error = ElfLoader::Singleton.lastError.exchange(nullptr);
+  if (error) {
+    // Return a custom error if available.
+    return error;
+  }
+  // Or fallback to the system error.
+  return dlerror();
+}
+
+void* __wrap_dlsym(void* handle, const char* symbol) {
+#if defined(ANDROID)
+  if (GetAndroidSDKVersion() >= 23) {
+    return dlsym(handle, symbol);
+  }
+#endif
+
+  if (!handle) {
+    ElfLoader::Singleton.lastError = "dlsym(NULL, sym) unsupported";
+    return nullptr;
+  }
+  if (handle != RTLD_DEFAULT && handle != RTLD_NEXT) {
+    LibHandle* h = reinterpret_cast<LibHandle*>(handle);
+    return h->GetSymbolPtr(symbol);
+  }
+
+  ElfLoader::Singleton.lastError = nullptr;  // Use system dlerror.
+  return dlsym(handle, symbol);
+}
+
+int __wrap_dlclose(void* handle) {
+#if defined(ANDROID)
+  if (GetAndroidSDKVersion() >= 23) {
+    return dlclose(handle);
+  }
+#endif
+
+  if (!handle) {
+    ElfLoader::Singleton.lastError = "No handle given to dlclose()";
+    return -1;
+  }
+  reinterpret_cast<LibHandle*>(handle)->ReleaseDirectRef();
+  return 0;
+}
+
+int __wrap_dladdr(const void* addr, Dl_info* info) {
+#if defined(ANDROID)
+  if (GetAndroidSDKVersion() >= 23) {
+    return dladdr(addr, info);
+  }
+#endif
+
+  RefPtr<LibHandle> handle =
+      ElfLoader::Singleton.GetHandleByPtr(const_cast<void*>(addr));
+  if (!handle) {
+    return dladdr(addr, info);
+  }
+  info->dli_fname = handle->GetPath();
+  info->dli_fbase = handle->GetBase();
+  return 1;
+}
+
+class DlIteratePhdrHelper {
+ public:
+  DlIteratePhdrHelper() {
+    int pipefd[2];
+    valid_pipe = (pipe(pipefd) == 0);
+    read_fd.reset(pipefd[0]);
+    write_fd.reset(pipefd[1]);
+  }
+
+  int fill_and_call(dl_phdr_cb callback, const void* l_addr, const char* l_name,
+                    void* data);
+
+ private:
+  bool valid_pipe;
+  AutoCloseFD read_fd;
+  AutoCloseFD write_fd;
+};
+
+// This function is called for each shared library iterated over by
+// dl_iterate_phdr, and is used to fill a dl_phdr_info which is then
+// sent through to the dl_iterate_phdr callback.
+int DlIteratePhdrHelper::fill_and_call(dl_phdr_cb callback, const void* l_addr,
+                                       const char* l_name, void* data) {
+  dl_phdr_info info;
+  info.dlpi_addr = reinterpret_cast<Elf::Addr>(l_addr);
+  info.dlpi_name = l_name;
+  info.dlpi_phdr = nullptr;
+  info.dlpi_phnum = 0;
+
+  // Assuming l_addr points to Elf headers (in most cases, this is true),
+  // get the Phdr location from there.
+  // Unfortunately, when l_addr doesn't point to Elf headers, it may point
+  // to unmapped memory, or worse, unreadable memory. The only way to detect
+  // the latter without causing a SIGSEGV is to use the pointer in a system
+  // call that will try to read from there, and return an EFAULT error if
+  // it can't. One such system call is write(). It used to be possible to
+  // use a file descriptor on /dev/null for these kind of things, but recent
+  // Linux kernels never return an EFAULT error when using /dev/null.
+  // So instead, we use a self pipe. We do however need to read() from the
+  // read end of the pipe as well so as to not fill up the pipe buffer and
+  // block on subsequent writes.
+  // In the unlikely event reads from or write to the pipe fail for some
+  // other reason than EFAULT, we don't try any further and just skip setting
+  // the Phdr location for all subsequent libraries, rather than trying to
+  // start over with a new pipe.
+  int can_read = true;
+  if (valid_pipe) {
+    int ret;
+    char raw_ehdr[sizeof(Elf::Ehdr)];
+    static_assert(sizeof(raw_ehdr) < PIPE_BUF, "PIPE_BUF is too small");
+    do {
+      // writes are atomic when smaller than PIPE_BUF, per POSIX.1-2008.
+      ret = write(write_fd, l_addr, sizeof(raw_ehdr));
+    } while (ret == -1 && errno == EINTR);
+    if (ret != sizeof(raw_ehdr)) {
+      if (ret == -1 && errno == EFAULT) {
+        can_read = false;
+      } else {
+        valid_pipe = false;
+      }
+    } else {
+      size_t nbytes = 0;
+      do {
+        // Per POSIX.1-2008, interrupted reads can return a length smaller
+        // than the given one instead of failing with errno EINTR.
+        ret = read(read_fd, raw_ehdr + nbytes, sizeof(raw_ehdr) - nbytes);
+        if (ret > 0) nbytes += ret;
+      } while ((nbytes != sizeof(raw_ehdr) && ret > 0) ||
+               (ret == -1 && errno == EINTR));
+      if (nbytes != sizeof(raw_ehdr)) {
+        valid_pipe = false;
+      }
+    }
+  }
+
+  if (valid_pipe && can_read) {
+    const Elf::Ehdr* ehdr = Elf::Ehdr::validate(l_addr);
+    if (ehdr) {
+      info.dlpi_phdr = reinterpret_cast<const Elf::Phdr*>(
+          reinterpret_cast<const char*>(ehdr) + ehdr->e_phoff);
+      info.dlpi_phnum = ehdr->e_phnum;
+    }
+  }
+
+  return callback(&info, sizeof(dl_phdr_info), data);
+}
+
+int __wrap_dl_iterate_phdr(dl_phdr_cb callback, void* data) {
+#if defined(ANDROID)
+  if (GetAndroidSDKVersion() >= 23) {
+    return dl_iterate_phdr(callback, data);
+  }
+#endif
+
+  DlIteratePhdrHelper helper;
+  AutoLock lock(&ElfLoader::Singleton.handlesMutex);
+
+  if (dl_iterate_phdr) {
+    for (ElfLoader::LibHandleList::reverse_iterator it =
+             ElfLoader::Singleton.handles.rbegin();
+         it < ElfLoader::Singleton.handles.rend(); ++it) {
+      BaseElf* elf = (*it)->AsBaseElf();
+      if (!elf) {
+        continue;
+      }
+      int ret = helper.fill_and_call(callback, (*it)->GetBase(),
+                                     (*it)->GetPath(), data);
+      if (ret) return ret;
+    }
+    return dl_iterate_phdr(callback, data);
+  }
+
+  /* For versions of Android that don't support dl_iterate_phdr (< 5.0),
+   * we go through the debugger helper data, which is known to be racy, but
+   * there's not much we can do about this :( . */
+  if (!ElfLoader::Singleton.dbg) return -1;
+
+  for (ElfLoader::DebuggerHelper::iterator it =
+           ElfLoader::Singleton.dbg.begin();
+       it < ElfLoader::Singleton.dbg.end(); ++it) {
+    int ret = helper.fill_and_call(callback, it->l_addr, it->l_name, data);
+    if (ret) return ret;
+  }
+  return 0;
+}
+
+#ifdef __ARM_EABI__
+const void* __wrap___gnu_Unwind_Find_exidx(void* pc, int* pcount) {
+  RefPtr<LibHandle> handle = ElfLoader::Singleton.GetHandleByPtr(pc);
+  if (handle) return handle->FindExidx(pcount);
+  if (__gnu_Unwind_Find_exidx) return __gnu_Unwind_Find_exidx(pc, pcount);
+  *pcount = 0;
+  return nullptr;
+}
+#endif
+
+/**
+ * faulty.lib public API
+ */
+
+MFBT_API size_t __dl_get_mappable_length(void* handle) {
+  if (!handle) return 0;
+  return reinterpret_cast<LibHandle*>(handle)->GetMappableLength();
+}
+
+MFBT_API void* __dl_mmap(void* handle, void* addr, size_t length,
+                         off_t offset) {
+  if (!handle) return nullptr;
+  return reinterpret_cast<LibHandle*>(handle)->MappableMMap(addr, length,
+                                                            offset);
+}
+
+MFBT_API void __dl_munmap(void* handle, void* addr, size_t length) {
+  if (!handle) return;
+  return reinterpret_cast<LibHandle*>(handle)->MappableMUnmap(addr, length);
+}
+
+MFBT_API bool IsSignalHandlingBroken() {
+  return ElfLoader::Singleton.isSignalHandlingBroken();
+}
+
+namespace {
+
+/**
+ * Returns the part after the last '/' for the given path
+ */
+const char* LeafName(const char* path) {
+  const char* lastSlash = strrchr(path, '/');
+  if (lastSlash) return lastSlash + 1;
+  return path;
+}
+
+/**
+ * Run the given lambda while holding the internal lock of the system linker.
+ * To take the lock, we call the system dl_iterate_phdr and invoke the lambda
+ * from the callback, which is called while the lock is held. Return true on
+ * success.
+ */
+template <class Lambda>
+static bool RunWithSystemLinkerLock(Lambda&& aLambda) {
+  if (!dl_iterate_phdr) {
+    // No dl_iterate_phdr support.
+    return false;
+  }
+
+#if defined(ANDROID)
+  if (GetAndroidSDKVersion() < 23) {
+    // dl_iterate_phdr is _not_ protected by a lock on Android < 23.
+    // Also return false here if we failed to get the version.
+    return false;
+  }
+#endif
+
+  dl_iterate_phdr(
+      [](dl_phdr_info*, size_t, void* lambda) -> int {
+        (*static_cast<Lambda*>(lambda))();
+        // Return 1 to stop iterating.
+        return 1;
+      },
+      &aLambda);
+  return true;
+}
+
+} /* Anonymous namespace */
+
+/**
+ * LibHandle
+ */
+LibHandle::~LibHandle() { free(path); }
+
+const char* LibHandle::GetName() const {
+  return path ? LeafName(path) : nullptr;
+}
+
+size_t LibHandle::GetMappableLength() const {
+  if (!mappable) mappable = GetMappable();
+  if (!mappable) return 0;
+  return mappable->GetLength();
+}
+
+void* LibHandle::MappableMMap(void* addr, size_t length, off_t offset) const {
+  if (!mappable) mappable = GetMappable();
+  if (!mappable) return MAP_FAILED;
+  void* mapped = mappable->mmap(addr, length, PROT_READ, MAP_PRIVATE, offset);
+  return mapped;
+}
+
+void LibHandle::MappableMUnmap(void* addr, size_t length) const {
+  if (mappable) mappable->munmap(addr, length);
+}
+
+/**
+ * SystemElf
+ */
+already_AddRefed<LibHandle> SystemElf::Load(const char* path, int flags) {
+  /* The Android linker returns a handle when the file name matches an
+   * already loaded library, even when the full path doesn't exist */
+  if (path && path[0] == '/' && (access(path, F_OK) == -1)) {
+    DEBUG_LOG("dlopen(\"%s\", 0x%x) = %p", path, flags, (void*)nullptr);
+    ElfLoader::Singleton.lastError = "Specified file does not exist";
+    return nullptr;
+  }
+
+  ElfLoader::Singleton.lastError = nullptr;  // Use system dlerror.
+  void* handle = dlopen(path, flags);
+  DEBUG_LOG("dlopen(\"%s\", 0x%x) = %p", path, flags, handle);
+  if (handle) {
+    SystemElf* elf = new SystemElf(path, handle);
+    ElfLoader::Singleton.Register(elf);
+    RefPtr<LibHandle> lib(elf);
+    return lib.forget();
+  }
+  return nullptr;
+}
+
+SystemElf::~SystemElf() {
+  if (!dlhandle) return;
+  DEBUG_LOG("dlclose(%p [\"%s\"])", dlhandle, GetPath());
+  ElfLoader::Singleton.lastError = nullptr;  // Use system dlerror.
+  dlclose(dlhandle);
+  ElfLoader::Singleton.Forget(this);
+}
+
+void* SystemElf::GetSymbolPtr(const char* symbol) const {
+  ElfLoader::Singleton.lastError = nullptr;  // Use system dlerror.
+  void* sym = dlsym(dlhandle, symbol);
+  DEBUG_LOG("dlsym(%p [\"%s\"], \"%s\") = %p", dlhandle, GetPath(), symbol,
+            sym);
+  return sym;
+}
+
+Mappable* SystemElf::GetMappable() const {
+  const char* path = GetPath();
+  if (!path) return nullptr;
+#ifdef ANDROID
+  /* On Android, if we don't have the full path, try in /system/lib */
+  const char* name = LeafName(path);
+  std::string systemPath;
+  if (name == path) {
+    systemPath = "/system/lib/";
+    systemPath += path;
+    path = systemPath.c_str();
+  }
+#endif
+
+  return MappableFile::Create(path);
+}
+
+#ifdef __ARM_EABI__
+const void* SystemElf::FindExidx(int* pcount) const {
+  /* TODO: properly implement when ElfLoader::GetHandleByPtr
+     does return SystemElf handles */
+  *pcount = 0;
+  return nullptr;
+}
+#endif
+
+/**
+ * ElfLoader
+ */
+
+/* Unique ElfLoader instance */
+ElfLoader ElfLoader::Singleton;
+
+already_AddRefed<LibHandle> ElfLoader::Load(const char* path, int flags,
+                                            LibHandle* parent) {
+  /* Ensure logging is initialized or refresh if environment changed. */
+  Logging::Init();
+
+  /* Ensure self_elf initialization. */
+  if (!self_elf) Init();
+
+  RefPtr<LibHandle> handle;
+
+  /* Handle dlopen(nullptr) directly. */
+  if (!path) {
+    handle = SystemElf::Load(nullptr, flags);
+    return handle.forget();
+  }
+
+  /* TODO: Handle relative paths correctly */
+  const char* name = LeafName(path);
+
+  /* Search the list of handles we already have for a match. When the given
+   * path is not absolute, compare file names, otherwise compare full paths. */
+  if (name == path) {
+    AutoLock lock(&handlesMutex);
+    for (LibHandleList::iterator it = handles.begin(); it < handles.end(); ++it)
+      if ((*it)->GetName() && (strcmp((*it)->GetName(), name) == 0)) {
+        handle = *it;
+        return handle.forget();
+      }
+  } else {
+    AutoLock lock(&handlesMutex);
+    for (LibHandleList::iterator it = handles.begin(); it < handles.end(); ++it)
+      if ((*it)->GetPath() && (strcmp((*it)->GetPath(), path) == 0)) {
+        handle = *it;
+        return handle.forget();
+      }
+  }
+
+  char* abs_path = nullptr;
+  const char* requested_path = path;
+
+  /* When the path is not absolute and the library is being loaded for
+   * another, first try to load the library from the directory containing
+   * that parent library. */
+  if ((name == path) && parent) {
+    const char* parentPath = parent->GetPath();
+    abs_path = new char[strlen(parentPath) + strlen(path)];
+    strcpy(abs_path, parentPath);
+    char* slash = strrchr(abs_path, '/');
+    strcpy(slash + 1, path);
+    path = abs_path;
+  }
+
+  Mappable* mappable = GetMappableFromPath(path);
+
+  /* Try loading with the custom linker if we have a Mappable */
+  if (mappable) handle = CustomElf::Load(mappable, path, flags);
+
+  /* Try loading with the system linker if everything above failed */
+  if (!handle) handle = SystemElf::Load(path, flags);
+
+  /* If we didn't have an absolute path and haven't been able to load
+   * a library yet, try in the system search path */
+  if (!handle && abs_path) handle = SystemElf::Load(name, flags);
+
+  delete[] abs_path;
+  DEBUG_LOG("ElfLoader::Load(\"%s\", 0x%x, %p [\"%s\"]) = %p", requested_path,
+            flags, reinterpret_cast<void*>(parent),
+            parent ? parent->GetPath() : "", static_cast<void*>(handle));
+
+  return handle.forget();
+}
+
+already_AddRefed<LibHandle> ElfLoader::GetHandleByPtr(void* addr) {
+  AutoLock lock(&handlesMutex);
+  /* Scan the list of handles we already have for a match */
+  for (LibHandleList::iterator it = handles.begin(); it < handles.end(); ++it) {
+    if ((*it)->Contains(addr)) {
+      RefPtr<LibHandle> lib = *it;
+      return lib.forget();
+    }
+  }
+  return nullptr;
+}
+
+Mappable* ElfLoader::GetMappableFromPath(const char* path) {
+  const char* name = LeafName(path);
+  Mappable* mappable = nullptr;
+  RefPtr<Zip> zip;
+  const char* subpath;
+  if ((subpath = strchr(path, '!'))) {
+    char* zip_path = strndup(path, subpath - path);
+    while (*(++subpath) == '/') {
+    }
+    zip = ZipCollection::GetZip(zip_path);
+    free(zip_path);
+    Zip::Stream s;
+    if (zip && zip->GetStream(subpath, &s)) {
+      /* When the MOZ_LINKER_EXTRACT environment variable is set to "1",
+       * compressed libraries are going to be (temporarily) extracted as
+       * files, in the directory pointed by the MOZ_LINKER_CACHE
+       * environment variable. */
+      const char* extract = getenv("MOZ_LINKER_EXTRACT");
+      if (extract && !strncmp(extract, "1", 2 /* Including '\0' */))
+        mappable = MappableExtractFile::Create(name, zip, &s);
+      if (!mappable) {
+        if (s.GetType() == Zip::Stream::DEFLATE) {
+          mappable = MappableDeflate::Create(name, zip, &s);
+        }
+      }
+    }
+  }
+  /* If we couldn't load above, try with a MappableFile */
+  if (!mappable && !zip) mappable = MappableFile::Create(path);
+
+  return mappable;
+}
+
+void ElfLoader::Register(LibHandle* handle) {
+  AutoLock lock(&handlesMutex);
+  handles.push_back(handle);
+}
+
+void ElfLoader::Register(CustomElf* handle) {
+  Register(static_cast<LibHandle*>(handle));
+  if (dbg) {
+    // We could race with the system linker when modifying the debug map, so
+    // only do so while holding the system linker's internal lock.
+    RunWithSystemLinkerLock([this, handle] { dbg.Add(handle); });
+  }
+}
+
+void ElfLoader::Forget(LibHandle* handle) {
+  /* Ensure logging is initialized or refresh if environment changed. */
+  Logging::Init();
+
+  AutoLock lock(&handlesMutex);
+  LibHandleList::iterator it =
+      std::find(handles.begin(), handles.end(), handle);
+  if (it != handles.end()) {
+    DEBUG_LOG("ElfLoader::Forget(%p [\"%s\"])", reinterpret_cast<void*>(handle),
+              handle->GetPath());
+    handles.erase(it);
+  } else {
+    DEBUG_LOG("ElfLoader::Forget(%p [\"%s\"]): Handle not found",
+              reinterpret_cast<void*>(handle), handle->GetPath());
+  }
+}
+
+void ElfLoader::Forget(CustomElf* handle) {
+  Forget(static_cast<LibHandle*>(handle));
+  if (dbg) {
+    // We could race with the system linker when modifying the debug map, so
+    // only do so while holding the system linker's internal lock.
+    RunWithSystemLinkerLock([this, handle] { dbg.Remove(handle); });
+  }
+}
+
+void ElfLoader::Init() {
+  Dl_info info;
+  /* On Android < 4.1 can't reenter dl* functions. So when the library
+   * containing this code is dlopen()ed, it can't call dladdr from a
+   * static initializer. */
+  if (dladdr(_DYNAMIC, &info) != 0) {
+    self_elf = LoadedElf::Create(info.dli_fname, info.dli_fbase);
+  }
+#if defined(ANDROID)
+  // On Android < 5.0, resolving weak symbols via dlsym doesn't work.
+  // The weak symbols Gecko uses are in either libc or libm, so we
+  // wrap those such that this linker does symbol resolution for them.
+  if (GetAndroidSDKVersion() < 21) {
+    if (dladdr(FunctionPtr(syscall), &info) != 0) {
+      libc = LoadedElf::Create(info.dli_fname, info.dli_fbase);
+    }
+    if (dladdr(FunctionPtr<int (*)(double)>(isnan), &info) != 0) {
+      libm = LoadedElf::Create(info.dli_fname, info.dli_fbase);
+    }
+  }
+#endif
+}
+
+ElfLoader::~ElfLoader() {
+  LibHandleList list;
+
+  if (!Singleton.IsShutdownExpected()) {
+    MOZ_CRASH("Unexpected shutdown");
+  }
+
+  /* Release self_elf and libc */
+  self_elf = nullptr;
+#if defined(ANDROID)
+  libc = nullptr;
+  libm = nullptr;
+#endif
+
+  AutoLock lock(&handlesMutex);
+  /* Build up a list of all library handles with direct (external) references.
+   * We actually skip system library handles because we want to keep at least
+   * some of these open. Most notably, Mozilla codebase keeps a few libgnome
+   * libraries deliberately open because of the mess that libORBit destruction
+   * is. dlclose()ing these libraries actually leads to problems. */
+  for (LibHandleList::reverse_iterator it = handles.rbegin();
+       it < handles.rend(); ++it) {
+    if ((*it)->DirectRefCount()) {
+      if (SystemElf* se = (*it)->AsSystemElf()) {
+        se->Forget();
+      } else {
+        list.push_back(*it);
+      }
+    }
+  }
+  /* Force release all external references to the handles collected above */
+  for (LibHandleList::iterator it = list.begin(); it < list.end(); ++it) {
+    while ((*it)->ReleaseDirectRef()) {
+    }
+  }
+  /* Remove the remaining system handles. */
+  if (handles.size()) {
+    list = handles;
+    for (LibHandleList::reverse_iterator it = list.rbegin(); it < list.rend();
+         ++it) {
+      if ((*it)->AsSystemElf()) {
+        DEBUG_LOG(
+            "ElfLoader::~ElfLoader(): Remaining handle for \"%s\" "
+            "[%" PRIdPTR " direct refs, %" PRIdPTR " refs total]",
+            (*it)->GetPath(), (*it)->DirectRefCount(), (*it)->refCount());
+      } else {
+        DEBUG_LOG(
+            "ElfLoader::~ElfLoader(): Unexpected remaining handle for \"%s\" "
+            "[%" PRIdPTR " direct refs, %" PRIdPTR " refs total]",
+            (*it)->GetPath(), (*it)->DirectRefCount(), (*it)->refCount());
+        /* Not removing, since it could have references to other libraries,
+         * destroying them as a side effect, and possibly leaving dangling
+         * pointers in the handle list we're scanning */
+      }
+    }
+  }
+  pthread_mutex_destroy(&handlesMutex);
+}
+
+#ifdef __ARM_EABI__
+int ElfLoader::__wrap_aeabi_atexit(void* that, ElfLoader::Destructor destructor,
+                                   void* dso_handle) {
+  Singleton.destructors.push_back(
+      DestructorCaller(destructor, that, dso_handle));
+  return 0;
+}
+#else
+int ElfLoader::__wrap_cxa_atexit(ElfLoader::Destructor destructor, void* that,
+                                 void* dso_handle) {
+  Singleton.destructors.push_back(
+      DestructorCaller(destructor, that, dso_handle));
+  return 0;
+}
+#endif
+
+void ElfLoader::__wrap_cxa_finalize(void* dso_handle) {
+  /* Call all destructors for the given DSO handle in reverse order they were
+   * registered. */
+  std::vector<DestructorCaller>::reverse_iterator it;
+  for (it = Singleton.destructors.rbegin(); it < Singleton.destructors.rend();
+       ++it) {
+    if (it->IsForHandle(dso_handle)) {
+      it->Call();
+    }
+  }
+}
+
+void ElfLoader::DestructorCaller::Call() {
+  if (destructor) {
+    DEBUG_LOG("ElfLoader::DestructorCaller::Call(%p, %p, %p)",
+              FunctionPtr(destructor), object, dso_handle);
+    destructor(object);
+    destructor = nullptr;
+  }
+}
+
+ElfLoader::DebuggerHelper::DebuggerHelper()
+    : dbg(nullptr), firstAdded(nullptr) {
+  /* Find ELF auxiliary vectors.
+   *
+   * The kernel stores the following data on the stack when starting a
+   * program:
+   *   argc
+   *   argv[0] (pointer into argv strings defined below)
+   *   argv[1] (likewise)
+   *   ...
+   *   argv[argc - 1] (likewise)
+   *   nullptr
+   *   envp[0] (pointer into environment strings defined below)
+   *   envp[1] (likewise)
+   *   ...
+   *   envp[n] (likewise)
+   *   nullptr
+   *   ... (more NULLs on some platforms such as Android 4.3)
+   *   auxv[0] (first ELF auxiliary vector)
+   *   auxv[1] (second ELF auxiliary vector)
+   *   ...
+   *   auxv[p] (last ELF auxiliary vector)
+   *   (AT_NULL, nullptr)
+   *   padding
+   *   argv strings, separated with '\0'
+   *   environment strings, separated with '\0'
+   *   nullptr
+   *
+   * What we are after are the auxv values defined by the following struct.
+   */
+  struct AuxVector {
+    Elf::Addr type;
+    Elf::Addr value;
+  };
+
+  /* Pointer to the environment variables list */
+  extern char** environ;
+
+  /* The environment may have changed since the program started, in which
+   * case the environ variables list isn't the list the kernel put on stack
+   * anymore. But in this new list, variables that didn't change still point
+   * to the strings the kernel put on stack. It is quite unlikely that two
+   * modified environment variables point to two consecutive strings in memory,
+   * so we assume that if two consecutive environment variables point to two
+   * consecutive strings, we found strings the kernel put on stack. */
+  char** env;
+  for (env = environ; *env; env++)
+    if (*env + strlen(*env) + 1 == env[1]) break;
+  if (!*env) return;
+
+  /* Next, we scan the stack backwards to find a pointer to one of those
+   * strings we found above, which will give us the location of the original
+   * envp list. As we are looking for pointers, we need to look at 32-bits or
+   * 64-bits aligned values, depening on the architecture. */
+  char** scan = reinterpret_cast<char**>(reinterpret_cast<uintptr_t>(*env) &
+                                         ~(sizeof(void*) - 1));
+  while (*env != *scan) scan--;
+
+  /* Finally, scan forward to find the last environment variable pointer and
+   * thus the first auxiliary vector. */
+  while (*scan++)
+    ;
+
+  /* Some platforms have more NULLs here, so skip them if we encounter them */
+  while (!*scan) scan++;
+
+  AuxVector* auxv = reinterpret_cast<AuxVector*>(scan);
+
+  /* The two values of interest in the auxiliary vectors are AT_PHDR and
+   * AT_PHNUM, which gives us the the location and size of the ELF program
+   * headers. */
+  Array<Elf::Phdr> phdrs;
+  char* base = nullptr;
+  while (auxv->type) {
+    if (auxv->type == AT_PHDR) {
+      phdrs.Init(reinterpret_cast<Elf::Phdr*>(auxv->value));
+      /* Assume the base address is the first byte of the same page */
+      base = reinterpret_cast<char*>(PageAlignedPtr(auxv->value));
+    }
+    if (auxv->type == AT_PHNUM) phdrs.Init(auxv->value);
+    auxv++;
+  }
+
+  if (!phdrs) {
+    DEBUG_LOG("Couldn't find program headers");
+    return;
+  }
+
+  /* In some cases, the address for the program headers we get from the
+   * auxiliary vectors is not mapped, because of the PT_LOAD segments
+   * definitions in the program executable. Trying to map anonymous memory
+   * with a hint giving the base address will return a different address
+   * if something is mapped there, and the base address otherwise. */
+  MappedPtr mem(MemoryRange::mmap(base, PageSize(), PROT_NONE,
+                                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+  if (mem == base) {
+    /* If program headers aren't mapped, try to map them */
+    int fd = open("/proc/self/exe", O_RDONLY);
+    if (fd == -1) {
+      DEBUG_LOG("Failed to open /proc/self/exe");
+      return;
+    }
+    mem.Assign(
+        MemoryRange::mmap(base, PageSize(), PROT_READ, MAP_PRIVATE, fd, 0));
+    /* If we don't manage to map at the right address, just give up. */
+    if (mem != base) {
+      DEBUG_LOG("Couldn't read program headers");
+      return;
+    }
+  }
+  /* Sanity check: the first bytes at the base address should be an ELF
+   * header. */
+  if (!Elf::Ehdr::validate(base)) {
+    DEBUG_LOG("Couldn't find program base");
+    return;
+  }
+
+  /* Search for the program PT_DYNAMIC segment */
+  Array<Elf::Dyn> dyns;
+  for (Array<Elf::Phdr>::iterator phdr = phdrs.begin(); phdr < phdrs.end();
+       ++phdr) {
+    /* While the program headers are expected within the first mapped page of
+     * the program executable, the executable PT_LOADs may actually make them
+     * loaded at an address that is not the wanted base address of the
+     * library. We thus need to adjust the base address, compensating for the
+     * virtual address of the PT_LOAD segment corresponding to offset 0. */
+    if (phdr->p_type == PT_LOAD && phdr->p_offset == 0) base -= phdr->p_vaddr;
+    if (phdr->p_type == PT_DYNAMIC)
+      dyns.Init(base + phdr->p_vaddr, phdr->p_filesz);
+  }
+  if (!dyns) {
+    DEBUG_LOG("Failed to find PT_DYNAMIC section in program");
+    return;
+  }
+
+  /* Search for the DT_DEBUG information */
+  for (Array<Elf::Dyn>::iterator dyn = dyns.begin(); dyn < dyns.end(); ++dyn) {
+    if (dyn->d_tag == DT_DEBUG) {
+      dbg = reinterpret_cast<r_debug*>(dyn->d_un.d_ptr);
+      break;
+    }
+  }
+  DEBUG_LOG("DT_DEBUG points at %p", static_cast<void*>(dbg));
+}
+
+/**
+ * Helper class to ensure the given pointer is writable within the scope of
+ * an instance. Permissions to the memory page where the pointer lies are
+ * restored to their original value when the instance is destroyed.
+ */
+class EnsureWritable {
+ public:
+  template <typename T>
+  explicit EnsureWritable(T* ptr, size_t length_ = sizeof(T)) {
+    MOZ_ASSERT(length_ < PageSize());
+    prot = -1;
+    page = MAP_FAILED;
+
+    char* firstPage = PageAlignedPtr(reinterpret_cast<char*>(ptr));
+    char* lastPageEnd =
+        PageAlignedEndPtr(reinterpret_cast<char*>(ptr) + length_);
+    length = lastPageEnd - firstPage;
+    uintptr_t start = reinterpret_cast<uintptr_t>(firstPage);
+    uintptr_t end;
+
+    prot = getProt(start, &end);
+    if (prot == -1 || (start + length) > end) MOZ_CRASH();
+
+    if (prot & PROT_WRITE) {
+      success = true;
+      return;
+    }
+
+    page = firstPage;
+    int ret = mprotect(page, length, prot | PROT_WRITE);
+    success = ret == 0;
+    if (!success) {
+      ERROR("mprotect(%p, %zu, %d) = %d (errno=%d; %s)", page, length,
+            prot | PROT_WRITE, ret, errno, strerror(errno));
+    }
+  }
+
+  bool IsWritable() const { return success; }
+
+  ~EnsureWritable() {
+    if (success && page != MAP_FAILED) {
+      mprotect(page, length, prot);
+    }
+  }
+
+ private:
+  int getProt(uintptr_t addr, uintptr_t* end) {
+    /* The interesting part of the /proc/self/maps format looks like:
+     * startAddr-endAddr rwxp */
+    int result = 0;
+    AutoCloseFILE f(fopen("/proc/self/maps", "r"));
+    while (f) {
+      unsigned long long startAddr, endAddr;
+      char perms[5];
+      if (fscanf(f, "%llx-%llx %4s %*1024[^\n] ", &startAddr, &endAddr,
+                 perms) != 3)
+        return -1;
+      if (addr < startAddr || addr >= endAddr) continue;
+      if (perms[0] == 'r')
+        result |= PROT_READ;
+      else if (perms[0] != '-')
+        return -1;
+      if (perms[1] == 'w')
+        result |= PROT_WRITE;
+      else if (perms[1] != '-')
+        return -1;
+      if (perms[2] == 'x')
+        result |= PROT_EXEC;
+      else if (perms[2] != '-')
+        return -1;
+      *end = endAddr;
+      return result;
+    }
+    return -1;
+  }
+
+  int prot;
+  void* page;
+  size_t length;
+  bool success;
+};
+
+/**
+ * The system linker maintains a doubly linked list of library it loads
+ * for use by the debugger. Unfortunately, it also uses the list pointers
+ * in a lot of operations and adding our data in the list is likely to
+ * trigger crashes when the linker tries to use data we don't provide or
+ * that fall off the amount data we allocated. Fortunately, the linker only
+ * traverses the list forward and accesses the head of the list from a
+ * private pointer instead of using the value in the r_debug structure.
+ * This means we can safely add members at the beginning of the list.
+ * Unfortunately, gdb checks the coherency of l_prev values, so we have
+ * to adjust the l_prev value for the first element the system linker
+ * knows about. Fortunately, it doesn't use l_prev, and the first element
+ * is not ever going to be released before our elements, since it is the
+ * program executable, so the system linker should not be changing
+ * r_debug::r_map.
+ */
+void ElfLoader::DebuggerHelper::Add(ElfLoader::link_map* map) {
+  if (!dbg->r_brk) return;
+
+  dbg->r_state = r_debug::RT_ADD;
+  dbg->r_brk();
+
+  if (!firstAdded) {
+    /* When adding a library for the first time, r_map points to data
+     * handled by the system linker, and that data may be read-only */
+    EnsureWritable w(&dbg->r_map->l_prev);
+    if (!w.IsWritable()) {
+      dbg->r_state = r_debug::RT_CONSISTENT;
+      dbg->r_brk();
+      return;
+    }
+
+    firstAdded = map;
+    dbg->r_map->l_prev = map;
+  } else
+    dbg->r_map->l_prev = map;
+
+  map->l_prev = nullptr;
+  map->l_next = dbg->r_map;
+
+  dbg->r_map = map;
+  dbg->r_state = r_debug::RT_CONSISTENT;
+  dbg->r_brk();
+}
+
+void ElfLoader::DebuggerHelper::Remove(ElfLoader::link_map* map) {
+  if (!dbg->r_brk) return;
+
+  dbg->r_state = r_debug::RT_DELETE;
+  dbg->r_brk();
+
+  if (map == firstAdded) {
+    /* When removing the first added library, its l_next is going to be
+     * data handled by the system linker, and that data may be read-only */
+    EnsureWritable w(&map->l_next->l_prev);
+    if (!w.IsWritable()) {
+      dbg->r_state = r_debug::RT_CONSISTENT;
+      dbg->r_brk();
+      return;
+    }
+
+    firstAdded = map->l_prev;
+    map->l_next->l_prev = map->l_prev;
+  } else if (map->l_next) {
+    map->l_next->l_prev = map->l_prev;
+  }
+
+  if (dbg->r_map == map)
+    dbg->r_map = map->l_next;
+  else if (map->l_prev) {
+    map->l_prev->l_next = map->l_next;
+  }
+  dbg->r_state = r_debug::RT_CONSISTENT;
+  dbg->r_brk();
+}
+
+#if defined(ANDROID) && defined(__NR_sigaction)
+/* As some system libraries may be calling signal() or sigaction() to
+ * set a SIGSEGV handler, effectively breaking MappableSeekableZStream,
+ * or worse, restore our SIGSEGV handler with wrong flags (which using
+ * signal() will do), we want to hook into the system's sigaction() to
+ * replace it with our own wrapper instead, so that our handler is never
+ * replaced. We used to only do that with libraries this linker loads,
+ * but it turns out at least one system library does call signal() and
+ * breaks us (libsc-a3xx.so on the Samsung Galaxy S4).
+ * As libc's signal (bsd_signal/sysv_signal, really) calls sigaction
+ * under the hood, instead of calling the signal system call directly,
+ * we only need to hook sigaction. This is true for both bionic and
+ * glibc.
+ */
+
+/* libc's sigaction */
+extern "C" int sigaction(int signum, const struct sigaction* act,
+                         struct sigaction* oldact);
+
+/* Simple reimplementation of sigaction. This is roughly equivalent
+ * to the assembly that comes in bionic, but not quite equivalent to
+ * glibc's implementation, so we only use this on Android. */
+int sys_sigaction(int signum, const struct sigaction* act,
+                  struct sigaction* oldact) {
+  return syscall(__NR_sigaction, signum, act, oldact);
+}
+
+/* Replace the first instructions of the given function with a jump
+ * to the given new function. */
+template <typename T>
+static bool Divert(T func, T new_func) {
+  void* ptr = FunctionPtr(func);
+  uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+
+#  if defined(__i386__)
+  // A 32-bit jump is a 5 bytes instruction.
+  EnsureWritable w(ptr, 5);
+  *reinterpret_cast<unsigned char*>(addr) = 0xe9;  // jmp
+  *reinterpret_cast<intptr_t*>(addr + 1) =
+      reinterpret_cast<uintptr_t>(new_func) - addr - 5;  // target displacement
+  return true;
+#  elif defined(__arm__) || defined(__aarch64__)
+  const unsigned char trampoline[] = {
+#    ifdef __arm__
+      // .thumb
+      0x46, 0x04,              // nop
+      0x78, 0x47,              // bx pc
+      0x46, 0x04,              // nop
+                               // .arm
+      0x04, 0xf0, 0x1f, 0xe5,  // ldr pc, [pc, #-4]
+                               // .word <new_func>
+#    else  // __aarch64__
+      0x50, 0x00,
+      0x00, 0x58,  // ldr x16, [pc, #8]   ; x16 (aka ip0) is the first
+      0x00, 0x02,
+      0x1f, 0xd6,  // br x16              ; intra-procedure-call
+                   // .word <new_func.lo> ; scratch register.
+                   // .word <new_func.hi>
+#    endif
+  };
+  const unsigned char* start;
+#    ifdef __arm__
+  if (addr & 0x01) {
+    /* Function is thumb, the actual address of the code is without the
+     * least significant bit. */
+    addr--;
+    /* The arm part of the trampoline needs to be 32-bit aligned */
+    if (addr & 0x02)
+      start = trampoline;
+    else
+      start = trampoline + 2;
+  } else {
+    /* Function is arm, we only need the arm part of the trampoline */
+    start = trampoline + 6;
+  }
+#    else  // __aarch64__
+  start = trampoline;
+#    endif
+
+  size_t len = sizeof(trampoline) - (start - trampoline);
+  EnsureWritable w(reinterpret_cast<void*>(addr), len + sizeof(void*));
+  memcpy(reinterpret_cast<void*>(addr), start, len);
+  *reinterpret_cast<void**>(addr + len) = FunctionPtr(new_func);
+  __builtin___clear_cache(reinterpret_cast<char*>(addr),
+                          reinterpret_cast<char*>(addr + len + sizeof(void*)));
+  return true;
+#  else
+  return false;
+#  endif
+}
+#else
+#  define sys_sigaction sigaction
+template <typename T>
+static bool Divert(T func, T new_func) {
+  return false;
+}
+#endif
+
+namespace {
+
+/* Clock that only accounts for time spent in the current process. */
+static uint64_t ProcessTimeStamp_Now() {
+  struct timespec ts;
+  int rv = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+
+  if (rv != 0) {
+    return 0;
+  }
+
+  uint64_t baseNs = (uint64_t)ts.tv_sec * 1000000000;
+  return baseNs + (uint64_t)ts.tv_nsec;
+}
+
+}  // namespace
+
+/* Data structure used to pass data to the temporary signal handler,
+ * as well as triggering a test crash. */
+struct TmpData {
+  volatile int crash_int;
+  volatile uint64_t crash_timestamp;
+};
+
+SEGVHandler::SEGVHandler()
+    : initialized(false),
+      registeredHandler(false),
+      signalHandlingBroken(true),
+      signalHandlingSlow(true) {
+  /* Ensure logging is initialized before the DEBUG_LOG in the test_handler.
+   * As this constructor runs before the ElfLoader constructor (by effect
+   * of ElfLoader inheriting from this class), this also initializes on behalf
+   * of ElfLoader and DebuggerHelper. */
+  Logging::Init();
+
+  /* Initialize oldStack.ss_flags to an invalid value when used to set
+   * an alternative stack, meaning we haven't got information about the
+   * original alternative stack and thus don't mean to restore it in
+   * the destructor. */
+  oldStack.ss_flags = SS_ONSTACK;
+
+  /* Get the current segfault signal handler. */
+  struct sigaction old_action;
+  sys_sigaction(SIGSEGV, nullptr, &old_action);
+
+  /* Some devices don't provide useful information to their SIGSEGV handlers,
+   * making it impossible for on-demand decompression to work. To check if
+   * we're on such a device, setup a temporary handler and deliberately
+   * trigger a segfault. The handler will set signalHandlingBroken if the
+   * provided information is bogus.
+   * Some other devices have a kernel option enabled that makes SIGSEGV handler
+   * have an overhead so high that it affects how on-demand decompression
+   * performs. The handler will also set signalHandlingSlow if the triggered
+   * SIGSEGV took too much time. */
+  struct sigaction action;
+  action.sa_sigaction = &SEGVHandler::test_handler;
+  sigemptyset(&action.sa_mask);
+  action.sa_flags = SA_SIGINFO | SA_NODEFER;
+  action.sa_restorer = nullptr;
+  stackPtr.Assign(MemoryRange::mmap(nullptr, PageSize(), PROT_READ | PROT_WRITE,
+                                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+  if (stackPtr.get() == MAP_FAILED) return;
+  if (sys_sigaction(SIGSEGV, &action, nullptr)) return;
+
+  TmpData* data = reinterpret_cast<TmpData*>(stackPtr.get());
+  data->crash_timestamp = ProcessTimeStamp_Now();
+  mprotect(stackPtr, stackPtr.GetLength(), PROT_NONE);
+  data->crash_int = 123;
+  /* Restore the original segfault signal handler. */
+  sys_sigaction(SIGSEGV, &old_action, nullptr);
+  stackPtr.Assign(MAP_FAILED, 0);
+}
+
+void SEGVHandler::FinishInitialization() {
+  /* Ideally, we'd need some locking here, but in practice, we're not
+   * going to race with another thread. */
+  initialized = true;
+
+  if (signalHandlingBroken || signalHandlingSlow) return;
+
+  typedef int (*sigaction_func)(int, const struct sigaction*,
+                                struct sigaction*);
+
+  sigaction_func libc_sigaction;
+
+#if defined(ANDROID)
+  /* Android > 4.4 comes with a sigaction wrapper in a LD_PRELOADed library
+   * (libsigchain) for ART. That wrapper kind of does the same trick as we
+   * do, so we need extra care in handling it.
+   * - Divert the libc's sigaction, assuming the LD_PRELOADed library uses
+   *   it under the hood (which is more or less true according to the source
+   *   of that library, since it's doing a lookup in RTLD_NEXT)
+   * - With the LD_PRELOADed library in place, all calls to sigaction from
+   *   from system libraries will go to the LD_PRELOADed library.
+   * - The LD_PRELOADed library calls to sigaction go to our __wrap_sigaction.
+   * - The calls to sigaction from libraries faulty.lib loads are sent to
+   *   the LD_PRELOADed library.
+   * In practice, for signal handling, this means:
+   * - The signal handler registered to the kernel is ours.
+   * - Our handler redispatches to the LD_PRELOADed library's if there's a
+   *   segfault we don't handle.
+   * - The LD_PRELOADed library redispatches according to whatever system
+   *   library or faulty.lib-loaded library set with sigaction.
+   *
+   * When there is no sigaction wrapper in place:
+   * - Divert the libc's sigaction.
+   * - Calls to sigaction from system library and faulty.lib-loaded libraries
+   *   all go to the libc's sigaction, which end up in our __wrap_sigaction.
+   * - The signal handler registered to the kernel is ours.
+   * - Our handler redispatches according to whatever system library or
+   *   faulty.lib-loaded library set with sigaction.
+   */
+  void* libc = dlopen("libc.so", RTLD_GLOBAL | RTLD_LAZY);
+  if (libc) {
+    /*
+     * Lollipop bionic only has a small trampoline in sigaction, with the real
+     * work happening in __sigaction. Divert there instead of sigaction if it
+     * exists. Bug 1154803
+     */
+    libc_sigaction =
+        reinterpret_cast<sigaction_func>(dlsym(libc, "__sigaction"));
+
+    if (!libc_sigaction) {
+      libc_sigaction =
+          reinterpret_cast<sigaction_func>(dlsym(libc, "sigaction"));
+    }
+  } else
+#endif
+  {
+    libc_sigaction = sigaction;
+  }
+
+  if (!Divert(libc_sigaction, __wrap_sigaction)) return;
+
+  /* Setup an alternative stack if the already existing one is not big
+   * enough, or if there is none. */
+  if (sigaltstack(nullptr, &oldStack) == 0) {
+    if (oldStack.ss_flags == SS_ONSTACK) oldStack.ss_flags = 0;
+    if (!oldStack.ss_sp || oldStack.ss_size < stackSize) {
+      stackPtr.Assign(MemoryRange::mmap(nullptr, stackSize,
+                                        PROT_READ | PROT_WRITE,
+                                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+      if (stackPtr.get() == MAP_FAILED) return;
+      stack_t stack;
+      stack.ss_sp = stackPtr;
+      stack.ss_size = stackSize;
+      stack.ss_flags = 0;
+      if (sigaltstack(&stack, nullptr) != 0) return;
+    }
+  }
+  /* Register our own handler, and store the already registered one in
+   * SEGVHandler's struct sigaction member */
+  action.sa_sigaction = &SEGVHandler::handler;
+  action.sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK;
+  registeredHandler = !sys_sigaction(SIGSEGV, &action, &this->action);
+}
+
+SEGVHandler::~SEGVHandler() {
+  /* Restore alternative stack for signals */
+  if (oldStack.ss_flags != SS_ONSTACK) sigaltstack(&oldStack, nullptr);
+  /* Restore original signal handler */
+  if (registeredHandler) sys_sigaction(SIGSEGV, &this->action, nullptr);
+}
+
+/* Test handler for a deliberately triggered SIGSEGV that determines whether
+ * useful information is provided to signal handlers, particularly whether
+ * si_addr is filled in properly, and whether the segfault handler is called
+ * quickly enough. */
+void SEGVHandler::test_handler(int signum, siginfo_t* info, void* context) {
+  SEGVHandler& that = ElfLoader::Singleton;
+  if (signum == SIGSEGV && info && info->si_addr == that.stackPtr.get())
+    that.signalHandlingBroken = false;
+  mprotect(that.stackPtr, that.stackPtr.GetLength(), PROT_READ | PROT_WRITE);
+  TmpData* data = reinterpret_cast<TmpData*>(that.stackPtr.get());
+  uint64_t latency = ProcessTimeStamp_Now() - data->crash_timestamp;
+  DEBUG_LOG("SEGVHandler latency: %" PRIu64, latency);
+  /* See bug 886736 for timings on different devices, 150 µs is reasonably above
+   * the latency on "working" devices and seems to be short enough to not incur
+   * a huge overhead to on-demand decompression. */
+  if (latency <= 150000) that.signalHandlingSlow = false;
+}
+
+/* TODO: "properly" handle signal masks and flags */
+void SEGVHandler::handler(int signum, siginfo_t* info, void* context) {
+  // ASSERT(signum == SIGSEGV);
+  DEBUG_LOG("Caught segmentation fault @%p", info->si_addr);
+
+  /* Redispatch to the registered handler */
+  SEGVHandler& that = ElfLoader::Singleton;
+  if (that.action.sa_flags & SA_SIGINFO) {
+    DEBUG_LOG("Redispatching to registered handler @%p",
+              FunctionPtr(that.action.sa_sigaction));
+    that.action.sa_sigaction(signum, info, context);
+  } else if (that.action.sa_handler == SIG_DFL) {
+    DEBUG_LOG("Redispatching to default handler");
+    /* Reset the handler to the default one, and trigger it. */
+    sys_sigaction(signum, &that.action, nullptr);
+    raise(signum);
+  } else if (that.action.sa_handler != SIG_IGN) {
+    DEBUG_LOG("Redispatching to registered handler @%p",
+              FunctionPtr(that.action.sa_handler));
+    that.action.sa_handler(signum);
+  } else {
+    DEBUG_LOG("Ignoring");
+  }
+}
+
+int SEGVHandler::__wrap_sigaction(int signum, const struct sigaction* act,
+                                  struct sigaction* oldact) {
+  SEGVHandler& that = ElfLoader::Singleton;
+
+  /* Use system sigaction() function for all but SIGSEGV signals. */
+  if (!that.registeredHandler || (signum != SIGSEGV))
+    return sys_sigaction(signum, act, oldact);
+
+  if (oldact) *oldact = that.action;
+  if (act) that.action = *act;
+  return 0;
+}
diff --git a/mozglue/linker/ElfLoader.h b/mozglue/linker/ElfLoader.h
new file mode 100644
index 0000000000..059c092f6d
--- /dev/null
+++ b/mozglue/linker/ElfLoader.h
@@ -0,0 +1,634 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ElfLoader_h
+#define ElfLoader_h
+
+#include <vector>
+#include <dlfcn.h>
+#include <signal.h>
+#include "mozilla/Atomics.h"
+#include "mozilla/RefCounted.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/UniquePtr.h"
+#include "Zip.h"
+#include "Elfxx.h"
+#include "Mappable.h"
+
+/**
+ * dlfcn.h replacement functions
+ */
+extern "C" {
+void* __wrap_dlopen(const char* path, int flags);
+const char* __wrap_dlerror(void);
+void* __wrap_dlsym(void* handle, const char* symbol);
+int __wrap_dlclose(void* handle);
+
+#ifndef HAVE_DLADDR
+typedef struct {
+  const char* dli_fname;
+  void* dli_fbase;
+  const char* dli_sname;
+  void* dli_saddr;
+} Dl_info;
+#endif
+int __wrap_dladdr(const void* addr, Dl_info* info);
+
+struct dl_phdr_info {
+  Elf::Addr dlpi_addr;
+  const char* dlpi_name;
+  const Elf::Phdr* dlpi_phdr;
+  Elf::Half dlpi_phnum;
+};
+
+typedef int (*dl_phdr_cb)(struct dl_phdr_info*, size_t, void*);
+int __wrap_dl_iterate_phdr(dl_phdr_cb callback, void* data);
+
+#ifdef __ARM_EABI__
+const void* __wrap___gnu_Unwind_Find_exidx(void* pc, int* pcount);
+#endif
+
+/**
+ * faulty.lib public API
+ */
+MFBT_API size_t __dl_get_mappable_length(void* handle);
+
+MFBT_API void* __dl_mmap(void* handle, void* addr, size_t length, off_t offset);
+
+MFBT_API void __dl_munmap(void* handle, void* addr, size_t length);
+
+MFBT_API bool IsSignalHandlingBroken();
+}
+
+/* Forward declarations for use in LibHandle */
+class BaseElf;
+class CustomElf;
+class SystemElf;
+
+/**
+ * Specialize RefCounted template for LibHandle. We may get references to
+ * LibHandles during the execution of their destructor, so we need
+ * RefCounted<LibHandle>::Release to support some reentrancy. See further
+ * below.
+ */
+class LibHandle;
+
+namespace mozilla {
+namespace detail {
+
+template <>
+inline void RefCounted<LibHandle, AtomicRefCount>::Release() const;
+
+#ifdef DEBUG
+template <>
+inline RefCounted<LibHandle, AtomicRefCount>::~RefCounted() {
+  MOZ_ASSERT(mRefCnt == 0x7fffdead);
+}
+#endif
+
+} /* namespace detail */
+} /* namespace mozilla */
+
+/**
+ * Abstract class for loaded libraries. Libraries may be loaded through the
+ * system linker or this linker, both cases will be derived from this class.
+ */
+class LibHandle : public mozilla::external::AtomicRefCounted<LibHandle> {
+ public:
+  MOZ_DECLARE_REFCOUNTED_TYPENAME(LibHandle)
+  /**
+   * Constructor. Takes the path of the loaded library and will store a copy
+   * of the leaf name.
+   */
+  LibHandle(const char* path)
+      : directRefCnt(0),
+        path(path ? strdup(path) : nullptr),
+        mappable(nullptr) {}
+
+  /**
+   * Destructor.
+   */
+  virtual ~LibHandle();
+
+  /**
+   * Returns the pointer to the address to which the given symbol resolves
+   * inside the library. It is not supposed to resolve the symbol in other
+   * libraries, although in practice, it will for system libraries.
+   */
+  virtual void* GetSymbolPtr(const char* symbol) const = 0;
+
+  /**
+   * Returns whether the given address is part of the virtual address space
+   * covered by the loaded library.
+   */
+  virtual bool Contains(void* addr) const = 0;
+
+  /**
+   * Returns the base address of the loaded library.
+   */
+  virtual void* GetBase() const = 0;
+
+  /**
+   * Returns the file name of the library without the containing directory.
+   */
+  const char* GetName() const;
+
+  /**
+   * Returns the full path of the library, when available. Otherwise, returns
+   * the file name.
+   */
+  const char* GetPath() const { return path; }
+
+  /**
+   * Library handles can be referenced from other library handles or
+   * externally (when dlopen()ing using this linker). We need to be
+   * able to distinguish between the two kind of referencing for better
+   * bookkeeping.
+   */
+  void AddDirectRef() {
+    mozilla::external::AtomicRefCounted<LibHandle>::AddRef();
+    ++directRefCnt;
+  }
+
+  /**
+   * Releases a direct reference, and returns whether there are any direct
+   * references left.
+   */
+  bool ReleaseDirectRef() {
+    const MozRefCountType count = --directRefCnt;
+    MOZ_ASSERT(count + 1 > 0);
+    MOZ_ASSERT(count + 1 <=
+               mozilla::external::AtomicRefCounted<LibHandle>::refCount());
+    mozilla::external::AtomicRefCounted<LibHandle>::Release();
+    return !!count;
+  }
+
+  /**
+   * Returns the number of direct references
+   */
+  MozRefCountType DirectRefCount() { return directRefCnt; }
+
+  /**
+   * Returns the complete size of the file or stream behind the library
+   * handle.
+   */
+  size_t GetMappableLength() const;
+
+  /**
+   * Returns a memory mapping of the file or stream behind the library
+   * handle.
+   */
+  void* MappableMMap(void* addr, size_t length, off_t offset) const;
+
+  /**
+   * Unmaps a memory mapping of the file or stream behind the library
+   * handle.
+   */
+  void MappableMUnmap(void* addr, size_t length) const;
+
+#ifdef __ARM_EABI__
+  /**
+   * Find the address and entry count of the ARM.exidx section
+   * associated with the library
+   */
+  virtual const void* FindExidx(int* pcount) const = 0;
+#endif
+
+ protected:
+  /**
+   * Returns a mappable object for use by MappableMMap and related functions.
+   */
+  virtual Mappable* GetMappable() const = 0;
+
+  /**
+   * Returns the instance, casted as the wanted type. Returns nullptr if
+   * that's not the actual type. (short of a better way to do this without
+   * RTTI)
+   */
+  friend class ElfLoader;
+  friend class CustomElf;
+  friend class SEGVHandler;
+  friend int __wrap_dl_iterate_phdr(dl_phdr_cb callback, void* data);
+  virtual BaseElf* AsBaseElf() { return nullptr; }
+  virtual SystemElf* AsSystemElf() { return nullptr; }
+
+ private:
+  mozilla::Atomic<MozRefCountType> directRefCnt;
+  char* path;
+
+  /* Mappable object keeping the result of GetMappable() */
+  mutable RefPtr<Mappable> mappable;
+};
+
+/**
+ * Specialized RefCounted<LibHandle>::Release. Under normal operation, when
+ * mRefCnt reaches 0, the LibHandle is deleted. Its mRefCnt is however
+ * increased to 1 on normal builds, and 0x7fffdead on debug builds so that the
+ * LibHandle can still be referenced while the destructor is executing. The
+ * mRefCnt is allowed to grow > 0x7fffdead, but not to decrease under that
+ * value, which would mean too many Releases from within the destructor.
+ */
+namespace mozilla {
+namespace detail {
+
+template <>
+inline void RefCounted<LibHandle, AtomicRefCount>::Release() const {
+#ifdef DEBUG
+  if (mRefCnt > 0x7fff0000) MOZ_ASSERT(mRefCnt > 0x7fffdead);
+#endif
+  MOZ_ASSERT(mRefCnt > 0);
+  if (mRefCnt > 0) {
+    if (0 == --mRefCnt) {
+#ifdef DEBUG
+      mRefCnt = 0x7fffdead;
+#else
+      ++mRefCnt;
+#endif
+      delete static_cast<const LibHandle*>(this);
+    }
+  }
+}
+
+} /* namespace detail */
+} /* namespace mozilla */
+
+/**
+ * Class handling libraries loaded by the system linker
+ */
+class SystemElf : public LibHandle {
+ public:
+  /**
+   * Returns a new SystemElf for the given path. The given flags are passed
+   * to dlopen().
+   */
+  static already_AddRefed<LibHandle> Load(const char* path, int flags);
+
+  /**
+   * Inherited from LibHandle
+   */
+  virtual ~SystemElf();
+  virtual void* GetSymbolPtr(const char* symbol) const;
+  virtual bool Contains(void* addr) const { return false; /* UNIMPLEMENTED */ }
+  virtual void* GetBase() const { return nullptr; /* UNIMPLEMENTED */ }
+
+#ifdef __ARM_EABI__
+  virtual const void* FindExidx(int* pcount) const;
+#endif
+
+ protected:
+  virtual Mappable* GetMappable() const;
+
+  /**
+   * Returns the instance, casted as SystemElf. (short of a better way to do
+   * this without RTTI)
+   */
+  friend class ElfLoader;
+  virtual SystemElf* AsSystemElf() { return this; }
+
+  /**
+   * Remove the reference to the system linker handle. This avoids dlclose()
+   * being called when the instance is destroyed.
+   */
+  void Forget() { dlhandle = nullptr; }
+
+ private:
+  /**
+   * Private constructor
+   */
+  SystemElf(const char* path, void* handle)
+      : LibHandle(path), dlhandle(handle) {}
+
+  /* Handle as returned by system dlopen() */
+  void* dlhandle;
+};
+
+/**
+ * The ElfLoader registers its own SIGSEGV handler to handle segmentation
+ * faults within the address space of the loaded libraries. It however
+ * allows a handler to be set for faults in other places, and redispatches
+ * to the handler set through signal() or sigaction().
+ */
+class SEGVHandler {
+ public:
+  bool hasRegisteredHandler() {
+    if (!initialized) FinishInitialization();
+    return registeredHandler;
+  }
+
+  bool isSignalHandlingBroken() { return signalHandlingBroken; }
+
+  static int __wrap_sigaction(int signum, const struct sigaction* act,
+                              struct sigaction* oldact);
+
+ protected:
+  SEGVHandler();
+  ~SEGVHandler();
+
+ private:
+  /**
+   * The constructor doesn't do all initialization, and the tail is done
+   * at a later time.
+   */
+  void FinishInitialization();
+
+  /**
+   * SIGSEGV handler registered with __wrap_signal or __wrap_sigaction.
+   */
+  struct sigaction action;
+
+  /**
+   * ElfLoader SIGSEGV handler.
+   */
+  static void handler(int signum, siginfo_t* info, void* context);
+
+  /**
+   * Temporary test handler.
+   */
+  static void test_handler(int signum, siginfo_t* info, void* context);
+
+  /**
+   * Size of the alternative stack. The printf family requires more than 8KB
+   * of stack, and our signal handler may print a few things.
+   */
+  static const size_t stackSize = 12 * 1024;
+
+  /**
+   * Alternative stack information used before initialization.
+   */
+  stack_t oldStack;
+
+  /**
+   * Pointer to an alternative stack for signals. Only set if oldStack is
+   * not set or not big enough.
+   */
+  MappedPtr stackPtr;
+
+  bool initialized;
+  bool registeredHandler;
+  bool signalHandlingBroken;
+  bool signalHandlingSlow;
+};
+
+/**
+ * Elf Loader class in charge of loading and bookkeeping libraries.
+ */
+class ElfLoader : public SEGVHandler {
+ public:
+  /**
+   * The Elf Loader instance
+   */
+  static ElfLoader Singleton;
+
+  /**
+   * Loads the given library with the given flags. Equivalent to dlopen()
+   * The extra "parent" argument optionally gives the handle of the library
+   * requesting the given library to be loaded. The loader may look in the
+   * directory containing that parent library for the library to load.
+   */
+  already_AddRefed<LibHandle> Load(const char* path, int flags,
+                                   LibHandle* parent = nullptr);
+
+  /**
+   * Returns the handle of the library containing the given address in
+   * its virtual address space, i.e. the library handle for which
+   * LibHandle::Contains returns true. Its purpose is to allow to
+   * implement dladdr().
+   */
+  already_AddRefed<LibHandle> GetHandleByPtr(void* addr);
+
+  /**
+   * Returns a Mappable object for the path. Paths in the form
+   *   /foo/bar/baz/archive!/directory/lib.so
+   * try to load the directory/lib.so in /foo/bar/baz/archive, provided
+   * that file is a Zip archive.
+   */
+  static Mappable* GetMappableFromPath(const char* path);
+
+  void ExpectShutdown(bool val) { expect_shutdown = val; }
+  bool IsShutdownExpected() { return expect_shutdown; }
+
+ private:
+  bool expect_shutdown;
+
+ protected:
+  /**
+   * Registers the given handle. This method is meant to be called by
+   * LibHandle subclass creators.
+   */
+  void Register(LibHandle* handle);
+  void Register(CustomElf* handle);
+
+  /**
+   * Forget about the given handle. This method is meant to be called by
+   * LibHandle subclass destructors.
+   */
+  void Forget(LibHandle* handle);
+  void Forget(CustomElf* handle);
+
+  friend class SystemElf;
+  friend const char* __wrap_dlerror(void);
+  friend void* __wrap_dlsym(void* handle, const char* symbol);
+  friend int __wrap_dlclose(void* handle);
+  /* __wrap_dlerror() returns this custom last error if non-null or the system
+   * dlerror() value if this is null. Must refer to a string constant. */
+  mozilla::Atomic<const char*, mozilla::Relaxed> lastError;
+
+ private:
+  ElfLoader() : expect_shutdown(true), lastError(nullptr) {
+    pthread_mutex_init(&handlesMutex, nullptr);
+  }
+
+  ~ElfLoader();
+
+  /* Initialization code that can't run during static initialization. */
+  void Init();
+
+  /* System loader handle for the library/program containing our code. This
+   * is used to resolve wrapped functions. */
+  RefPtr<LibHandle> self_elf;
+
+#if defined(ANDROID)
+  /* System loader handle for the libc. This is used to resolve weak symbols
+   * that some libcs contain that the Android linker won't dlsym(). Normally,
+   * we wouldn't treat non-Android differently, but glibc uses versioned
+   * symbols which this linker doesn't support. */
+  RefPtr<LibHandle> libc;
+
+  /* And for libm. */
+  RefPtr<LibHandle> libm;
+#endif
+
+  /* Bookkeeping */
+  typedef std::vector<LibHandle*> LibHandleList;
+  LibHandleList handles;
+
+  pthread_mutex_t handlesMutex;
+
+ protected:
+  friend class CustomElf;
+  friend class LoadedElf;
+
+  /* Definition of static destructors as to be used for C++ ABI compatibility */
+  typedef void (*Destructor)(void* object);
+
+  /**
+   * C++ ABI makes static initializers register destructors through a specific
+   * atexit interface. On glibc/linux systems, the dso_handle is a pointer
+   * within a given library. On bionic/android systems, it is an undefined
+   * symbol. Making sense of the value is not really important, and all that
+   * is really important is that it is different for each loaded library, so
+   * that they can be discriminated when shutting down. For convenience, on
+   * systems where the dso handle is a symbol, that symbol is resolved to
+   * point at corresponding CustomElf.
+   *
+   * Destructors are registered with __*_atexit with an associated object to
+   * be passed as argument when it is called.
+   *
+   * When __cxa_finalize is called, destructors registered for the given
+   * DSO handle are called in the reverse order they were registered.
+   */
+#ifdef __ARM_EABI__
+  static int __wrap_aeabi_atexit(void* that, Destructor destructor,
+                                 void* dso_handle);
+#else
+  static int __wrap_cxa_atexit(Destructor destructor, void* that,
+                               void* dso_handle);
+#endif
+
+  static void __wrap_cxa_finalize(void* dso_handle);
+
+  /**
+   * Registered destructor. Keeps track of the destructor function pointer,
+   * associated object to call it with, and DSO handle.
+   */
+  class DestructorCaller {
+   public:
+    DestructorCaller(Destructor destructor, void* object, void* dso_handle)
+        : destructor(destructor), object(object), dso_handle(dso_handle) {}
+
+    /**
+     * Call the destructor function with the associated object.
+     * Call only once, see CustomElf::~CustomElf.
+     */
+    void Call();
+
+    /**
+     * Returns whether the destructor is associated to the given DSO handle
+     */
+    bool IsForHandle(void* handle) const { return handle == dso_handle; }
+
+   private:
+    Destructor destructor;
+    void* object;
+    void* dso_handle;
+  };
+
+ private:
+  /* Keep track of all registered destructors */
+  std::vector<DestructorCaller> destructors;
+
+  /* Forward declaration, see further below */
+  class DebuggerHelper;
+
+ public:
+  /* Loaded object descriptor for the debugger interface below*/
+  struct link_map {
+    /* Base address of the loaded object. */
+    const void* l_addr;
+    /* File name */
+    const char* l_name;
+    /* Address of the PT_DYNAMIC segment. */
+    const void* l_ld;
+
+   private:
+    friend class ElfLoader::DebuggerHelper;
+    /* Double linked list of loaded objects. */
+    link_map *l_next, *l_prev;
+  };
+
+ private:
+  /* Data structure used by the linker to give details about shared objects it
+   * loaded to debuggers. This is normally defined in link.h, but Android
+   * headers lack this file. */
+  struct r_debug {
+    /* Version number of the protocol. */
+    int r_version;
+
+    /* Head of the linked list of loaded objects. */
+    link_map* r_map;
+
+    /* Function to be called when updates to the linked list of loaded objects
+     * are going to occur. The function is to be called before and after
+     * changes. */
+    void (*r_brk)(void);
+
+    /* Indicates to the debugger what state the linked list of loaded objects
+     * is in when the function above is called. */
+    enum {
+      RT_CONSISTENT, /* Changes are complete */
+      RT_ADD,        /* Beginning to add a new object */
+      RT_DELETE      /* Beginning to remove an object */
+    } r_state;
+  };
+
+  /* Memory representation of ELF Auxiliary Vectors */
+  struct AuxVector {
+    Elf::Addr type;
+    Elf::Addr value;
+  };
+
+  /* Helper class used to integrate libraries loaded by this linker in
+   * r_debug */
+  class DebuggerHelper {
+   public:
+    DebuggerHelper();
+
+    void Init(AuxVector* auvx);
+
+    explicit operator bool() { return dbg; }
+
+    /* Make the debugger aware of a new loaded object */
+    void Add(link_map* map);
+
+    /* Make the debugger aware of the unloading of an object */
+    void Remove(link_map* map);
+
+    /* Iterates over all link_maps */
+    class iterator {
+     public:
+      const link_map* operator->() const { return item; }
+
+      const link_map& operator++() {
+        item = item->l_next;
+        return *item;
+      }
+
+      bool operator<(const iterator& other) const {
+        if (other.item == nullptr) return item ? true : false;
+        MOZ_CRASH(
+            "DebuggerHelper::iterator::operator< called with something else "
+            "than DebuggerHelper::end()");
+      }
+
+     protected:
+      friend class DebuggerHelper;
+      explicit iterator(const link_map* item) : item(item) {}
+
+     private:
+      const link_map* item;
+    };
+
+    iterator begin() const { return iterator(dbg ? dbg->r_map : nullptr); }
+
+    iterator end() const { return iterator(nullptr); }
+
+   private:
+    r_debug* dbg;
+    link_map* firstAdded;
+  };
+  friend int __wrap_dl_iterate_phdr(dl_phdr_cb callback, void* data);
+  DebuggerHelper dbg;
+};
+
+#endif /* ElfLoader_h */
diff --git a/mozglue/linker/Elfxx.h b/mozglue/linker/Elfxx.h
new file mode 100644
index 0000000000..a871280213
--- /dev/null
+++ b/mozglue/linker/Elfxx.h
@@ -0,0 +1,244 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Elfxx_h
+#define Elfxx_h
+
+/**
+ * Android system headers have two different elf.h file. The one under linux/
+ * is the most complete on older Android API versions without unified headers.
+ */
+#if defined(ANDROID) && __ANDROID_API__ < 21 && !defined(__ANDROID_API_L__)
+#  include <linux/elf.h>
+#else
+#  include <elf.h>
+#endif
+#include <endian.h>
+
+#if defined(__ARM_EABI__) && !defined(PT_ARM_EXIDX)
+#  define PT_ARM_EXIDX 0x70000001
+#endif
+
+/**
+ * Generic ELF macros for the target system
+ */
+#ifdef __LP64__
+#  define Elf_(type) Elf64_##type
+#  define ELFCLASS ELFCLASS64
+#  define ELF_R_TYPE ELF64_R_TYPE
+#  define ELF_R_SYM ELF64_R_SYM
+#  ifndef ELF_ST_BIND
+#    define ELF_ST_BIND ELF64_ST_BIND
+#  endif
+#else
+#  define Elf_(type) Elf32_##type
+#  define ELFCLASS ELFCLASS32
+#  define ELF_R_TYPE ELF32_R_TYPE
+#  define ELF_R_SYM ELF32_R_SYM
+#  ifndef ELF_ST_BIND
+#    define ELF_ST_BIND ELF32_ST_BIND
+#  endif
+#endif
+
+#ifndef __BYTE_ORDER
+#  error Cannot find endianness
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define ELFDATA ELFDATA2LSB
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#  define ELFDATA ELFDATA2MSB
+#endif
+
+#ifdef __linux__
+#  define ELFOSABI ELFOSABI_LINUX
+#  ifdef EI_ABIVERSION
+#    define ELFABIVERSION 0
+#  endif
+#else
+#  error Unknown ELF OSABI
+#endif
+
+#if defined(__i386__)
+#  define ELFMACHINE EM_386
+
+// Doing this way probably doesn't scale to other architectures
+#  define R_ABS R_386_32
+#  define R_GLOB_DAT R_386_GLOB_DAT
+#  define R_JMP_SLOT R_386_JMP_SLOT
+#  define R_RELATIVE R_386_RELATIVE
+#  define RELOC(n) DT_REL##n
+#  define UNSUPPORTED_RELOC(n) DT_RELA##n
+#  define STR_RELOC(n) "DT_REL" #  n
+#  define Reloc Rel
+
+#elif defined(__x86_64__)
+#  define ELFMACHINE EM_X86_64
+
+#  define R_ABS R_X86_64_64
+#  define R_GLOB_DAT R_X86_64_GLOB_DAT
+#  define R_JMP_SLOT R_X86_64_JUMP_SLOT
+#  define R_RELATIVE R_X86_64_RELATIVE
+#  define RELOC(n) DT_RELA##n
+#  define UNSUPPORTED_RELOC(n) DT_REL##n
+#  define STR_RELOC(n) "DT_RELA" #  n
+#  define Reloc Rela
+
+#elif defined(__arm__)
+#  define ELFMACHINE EM_ARM
+
+#  ifndef R_ARM_ABS32
+#    define R_ARM_ABS32 2
+#  endif
+#  ifndef R_ARM_GLOB_DAT
+#    define R_ARM_GLOB_DAT 21
+#  endif
+#  ifndef R_ARM_JUMP_SLOT
+#    define R_ARM_JUMP_SLOT 22
+#  endif
+#  ifndef R_ARM_RELATIVE
+#    define R_ARM_RELATIVE 23
+#  endif
+
+#  define R_ABS R_ARM_ABS32
+#  define R_GLOB_DAT R_ARM_GLOB_DAT
+#  define R_JMP_SLOT R_ARM_JUMP_SLOT
+#  define R_RELATIVE R_ARM_RELATIVE
+#  define RELOC(n) DT_REL##n
+#  define UNSUPPORTED_RELOC(n) DT_RELA##n
+#  define STR_RELOC(n) "DT_REL" #  n
+#  define Reloc Rel
+
+#elif defined(__aarch64__)
+#  define ELFMACHINE EM_AARCH64
+
+#  define R_ABS R_AARCH64_ABS64
+#  define R_GLOB_DAT R_AARCH64_GLOB_DAT
+#  define R_JMP_SLOT R_AARCH64_JUMP_SLOT
+#  define R_RELATIVE R_AARCH64_RELATIVE
+#  define RELOC(n) DT_RELA##n
+#  define UNSUPPORTED_RELOC(n) DT_REL##n
+#  define STR_RELOC(n) "DT_RELA" #  n
+#  define Reloc Rela
+
+#else
+#  error Unknown ELF machine type
+#endif
+
+/**
+ * Android system headers don't have all definitions
+ */
+#ifndef STN_UNDEF
+#  define STN_UNDEF 0
+#endif
+#ifndef DT_INIT_ARRAY
+#  define DT_INIT_ARRAY 25
+#endif
+#ifndef DT_FINI_ARRAY
+#  define DT_FINI_ARRAY 26
+#endif
+#ifndef DT_INIT_ARRAYSZ
+#  define DT_INIT_ARRAYSZ 27
+#endif
+#ifndef DT_FINI_ARRAYSZ
+#  define DT_FINI_ARRAYSZ 28
+#endif
+#ifndef DT_RELACOUNT
+#  define DT_RELACOUNT 0x6ffffff9
+#endif
+#ifndef DT_RELCOUNT
+#  define DT_RELCOUNT 0x6ffffffa
+#endif
+#ifndef DT_VERSYM
+#  define DT_VERSYM 0x6ffffff0
+#endif
+#ifndef DT_VERDEF
+#  define DT_VERDEF 0x6ffffffc
+#endif
+#ifndef DT_VERDEFNUM
+#  define DT_VERDEFNUM 0x6ffffffd
+#endif
+#ifndef DT_VERNEED
+#  define DT_VERNEED 0x6ffffffe
+#endif
+#ifndef DT_VERNEEDNUM
+#  define DT_VERNEEDNUM 0x6fffffff
+#endif
+#ifndef DT_FLAGS_1
+#  define DT_FLAGS_1 0x6ffffffb
+#endif
+#ifndef DT_FLAGS
+#  define DT_FLAGS 30
+#endif
+#ifndef DF_SYMBOLIC
+#  define DF_SYMBOLIC 0x00000002
+#endif
+#ifndef DF_TEXTREL
+#  define DF_TEXTREL 0x00000004
+#endif
+
+namespace Elf {
+
+/**
+ * Define a few basic Elf Types
+ */
+typedef Elf_(Phdr) Phdr;
+typedef Elf_(Dyn) Dyn;
+typedef Elf_(Sym) Sym;
+typedef Elf_(Addr) Addr;
+typedef Elf_(Word) Word;
+typedef Elf_(Half) Half;
+
+/**
+ * Helper class around the standard Elf header struct
+ */
+struct Ehdr : public Elf_(Ehdr) {
+  /**
+   * Equivalent to reinterpret_cast<const Ehdr *>(buf), but additionally
+   * checking that this is indeed an Elf header and that the Elf type
+   * corresponds to that of the system
+   */
+  static const Ehdr* validate(const void* buf);
+};
+
+/**
+ * Elf String table
+ */
+class Strtab : public UnsizedArray<const char> {
+ public:
+  /**
+   * Returns the string at the given index in the table
+   */
+  const char* GetStringAt(off_t index) const {
+    return &UnsizedArray<const char>::operator[](index);
+  }
+};
+
+/**
+ * Helper class around Elf relocation.
+ */
+struct Rel : public Elf_(Rel) {
+  /**
+   * Returns the addend for the relocation, which is the value stored
+   * at r_offset.
+   */
+  Addr GetAddend(void* base) const {
+    return *(reinterpret_cast<const Addr*>(reinterpret_cast<const char*>(base) +
+                                           r_offset));
+  }
+};
+
+/**
+ * Helper class around Elf relocation with addend.
+ */
+struct Rela : public Elf_(Rela) {
+  /**
+   * Returns the addend for the relocation.
+   */
+  Addr GetAddend(void* base) const { return r_addend; }
+};
+
+} /* namespace Elf */
+
+#endif /* Elfxx_h */
diff --git a/mozglue/linker/Linker.h b/mozglue/linker/Linker.h
new file mode 100644
index 0000000000..77ddb06ecc
--- /dev/null
+++ b/mozglue/linker/Linker.h
@@ -0,0 +1,24 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Linker_h
+#define Linker_h
+
+#ifdef MOZ_LINKER
+#  include "ElfLoader.h"
+#  define __wrap_sigaction SEGVHandler::__wrap_sigaction
+#else
+#  include <dlfcn.h>
+#  include <link.h>
+#  include <signal.h>
+#  define __wrap_sigaction sigaction
+#  define __wrap_dlopen dlopen
+#  define __wrap_dlerror dlerror
+#  define __wrap_dlsym dlsym
+#  define __wrap_dlclose dlclose
+#  define __wrap_dladdr dladdr
+#  define __wrap_dl_iterate_phdr dl_iterate_phdr
+#endif
+
+#endif
diff --git a/mozglue/linker/Logging.cpp b/mozglue/linker/Logging.cpp
new file mode 100644
index 0000000000..e61c7835d2
--- /dev/null
+++ b/mozglue/linker/Logging.cpp
@@ -0,0 +1,7 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Logging.h"
+
+Logging Logging::Singleton;
diff --git a/mozglue/linker/Logging.h b/mozglue/linker/Logging.h
new file mode 100644
index 0000000000..1e66ea41de
--- /dev/null
+++ b/mozglue/linker/Logging.h
@@ -0,0 +1,72 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Logging_h
+#define Logging_h
+
+#include <cstdlib>
+#include "mozilla/Likely.h"
+#include "mozilla/MacroArgs.h"
+
+#ifdef ANDROID
+#  include <android/log.h>
+#  define LOG(...) \
+    __android_log_print(ANDROID_LOG_INFO, "GeckoLinker", __VA_ARGS__)
+#  define WARN(...) \
+    __android_log_print(ANDROID_LOG_WARN, "GeckoLinker", __VA_ARGS__)
+#  define ERROR(...) \
+    __android_log_print(ANDROID_LOG_ERROR, "GeckoLinker", __VA_ARGS__)
+#else
+#  include <cstdio>
+
+/* Expand to 1 or m depending on whether there is one argument or more
+ * given. */
+#  define MOZ_ONE_OR_MORE_ARGS_IMPL2(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, \
+                                     ...)                                   \
+    N
+#  define MOZ_ONE_OR_MORE_ARGS_IMPL(args) MOZ_ONE_OR_MORE_ARGS_IMPL2 args
+#  define MOZ_ONE_OR_MORE_ARGS(...) \
+    MOZ_ONE_OR_MORE_ARGS_IMPL((__VA_ARGS__, m, m, m, m, m, m, m, m, 1, 0))
+
+#  define MOZ_MACRO_GLUE(a, b) a b
+
+/* Some magic to choose between LOG1 and LOGm depending on the number of
+ * arguments */
+#  define MOZ_CHOOSE_LOG(...)                                          \
+    MOZ_MACRO_GLUE(MOZ_CONCAT(LOG, MOZ_ONE_OR_MORE_ARGS(__VA_ARGS__)), \
+                   (__VA_ARGS__))
+
+#  define LOG1(format) fprintf(stderr, format "\n")
+#  define LOGm(format, ...) fprintf(stderr, format "\n", __VA_ARGS__)
+#  define LOG(...) MOZ_CHOOSE_LOG(__VA_ARGS__)
+#  define WARN(...) MOZ_CHOOSE_LOG("Warning: " __VA_ARGS__)
+#  define ERROR(...) MOZ_CHOOSE_LOG("Error: " __VA_ARGS__)
+
+#endif
+
+class Logging {
+ public:
+  static bool isVerbose() { return Singleton.verbose; }
+
+ private:
+  bool verbose;
+
+ public:
+  static void Init() {
+    const char* env = getenv("MOZ_DEBUG_LINKER");
+    if (env && *env == '1') Singleton.verbose = true;
+  }
+
+ private:
+  static Logging Singleton;
+};
+
+#define DEBUG_LOG(...)                        \
+  do {                                        \
+    if (MOZ_UNLIKELY(Logging::isVerbose())) { \
+      LOG(__VA_ARGS__);                       \
+    }                                         \
+  } while (0)
+
+#endif /* Logging_h */
diff --git a/mozglue/linker/Mappable.cpp b/mozglue/linker/Mappable.cpp
new file mode 100644
index 0000000000..cacd6a46f6
--- /dev/null
+++ b/mozglue/linker/Mappable.cpp
@@ -0,0 +1,376 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+#include <string>
+
+#include "Mappable.h"
+
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/UniquePtr.h"
+
+#ifdef ANDROID
+#  include "mozilla/Ashmem.h"
+#endif
+#include <sys/stat.h>
+#include <errno.h>
+#include "ElfLoader.h"
+#include "XZStream.h"
+#include "Logging.h"
+
+using mozilla::MakeUnique;
+using mozilla::UniquePtr;
+
+class CacheValidator {
+ public:
+  CacheValidator(const char* aCachedLibPath, Zip* aZip, Zip::Stream* aStream)
+      : mCachedLibPath(aCachedLibPath) {
+    static const char kChecksumSuffix[] = ".crc";
+
+    mCachedChecksumPath =
+        MakeUnique<char[]>(strlen(aCachedLibPath) + sizeof(kChecksumSuffix));
+    sprintf(mCachedChecksumPath.get(), "%s%s", aCachedLibPath, kChecksumSuffix);
+    DEBUG_LOG("mCachedChecksumPath: %s", mCachedChecksumPath.get());
+
+    mChecksum = aStream->GetCRC32();
+    DEBUG_LOG("mChecksum: %x", mChecksum);
+  }
+
+  // Returns whether the cache is valid and up-to-date.
+  bool IsValid() const {
+    // Validate based on checksum.
+    RefPtr<Mappable> checksumMap =
+        MappableFile::Create(mCachedChecksumPath.get());
+    if (!checksumMap) {
+      // Force caching if checksum is missing in cache.
+      return false;
+    }
+
+    DEBUG_LOG("Comparing %x with %s", mChecksum, mCachedChecksumPath.get());
+    MappedPtr checksumBuf = checksumMap->mmap(nullptr, checksumMap->GetLength(),
+                                              PROT_READ, MAP_PRIVATE, 0);
+    if (checksumBuf == MAP_FAILED) {
+      WARN("Couldn't map %s to validate checksum", mCachedChecksumPath.get());
+      return false;
+    }
+    if (memcmp(checksumBuf, &mChecksum, sizeof(mChecksum))) {
+      return false;
+    }
+    return !access(mCachedLibPath.c_str(), R_OK);
+  }
+
+  // Caches the APK-provided checksum used in future cache validations.
+  void CacheChecksum() const {
+    AutoCloseFD fd(open(mCachedChecksumPath.get(),
+                        O_TRUNC | O_RDWR | O_CREAT | O_NOATIME,
+                        S_IRUSR | S_IWUSR));
+    if (fd == -1) {
+      WARN("Couldn't open %s to update checksum", mCachedChecksumPath.get());
+      return;
+    }
+
+    DEBUG_LOG("Updating checksum %s", mCachedChecksumPath.get());
+
+    const size_t size = sizeof(mChecksum);
+    size_t written = 0;
+    while (written < size) {
+      ssize_t ret =
+          write(fd, reinterpret_cast<const uint8_t*>(&mChecksum) + written,
+                size - written);
+      if (ret >= 0) {
+        written += ret;
+      } else if (errno != EINTR) {
+        WARN("Writing checksum %s failed with errno %d",
+             mCachedChecksumPath.get(), errno);
+        break;
+      }
+    }
+  }
+
+ private:
+  const std::string mCachedLibPath;
+  UniquePtr<char[]> mCachedChecksumPath;
+  uint32_t mChecksum;
+};
+
+Mappable* MappableFile::Create(const char* path) {
+  int fd = open(path, O_RDONLY);
+  if (fd != -1) return new MappableFile(fd);
+  return nullptr;
+}
+
+MemoryRange MappableFile::mmap(const void* addr, size_t length, int prot,
+                               int flags, off_t offset) {
+  MOZ_ASSERT(fd != -1);
+  MOZ_ASSERT(!(flags & MAP_SHARED));
+  flags |= MAP_PRIVATE;
+
+  return MemoryRange::mmap(const_cast<void*>(addr), length, prot, flags, fd,
+                           offset);
+}
+
+void MappableFile::finalize() {
+  /* Close file ; equivalent to close(fd.forget()) */
+  fd = -1;
+}
+
+size_t MappableFile::GetLength() const {
+  struct stat st;
+  return fstat(fd, &st) ? 0 : st.st_size;
+}
+
+Mappable* MappableExtractFile::Create(const char* name, Zip* zip,
+                                      Zip::Stream* stream) {
+  MOZ_ASSERT(zip && stream);
+
+  const char* cachePath = getenv("MOZ_LINKER_CACHE");
+  if (!cachePath || !*cachePath) {
+    WARN(
+        "MOZ_LINKER_EXTRACT is set, but not MOZ_LINKER_CACHE; "
+        "not extracting");
+    return nullptr;
+  }
+
+  // Ensure that the cache dir is private.
+  chmod(cachePath, 0770);
+
+  UniquePtr<char[]> path =
+      MakeUnique<char[]>(strlen(cachePath) + strlen(name) + 2);
+  sprintf(path.get(), "%s/%s", cachePath, name);
+
+  CacheValidator validator(path.get(), zip, stream);
+  if (validator.IsValid()) {
+    DEBUG_LOG("Reusing %s", static_cast<char*>(path.get()));
+    return MappableFile::Create(path.get());
+  }
+  DEBUG_LOG("Extracting to %s", static_cast<char*>(path.get()));
+  AutoCloseFD fd;
+  fd = open(path.get(), O_TRUNC | O_RDWR | O_CREAT | O_NOATIME,
+            S_IRUSR | S_IWUSR);
+  if (fd == -1) {
+    ERROR("Couldn't open %s to decompress library", path.get());
+    return nullptr;
+  }
+  AutoUnlinkFile file(path.release());
+  if (stream->GetType() == Zip::Stream::DEFLATE) {
+    if (ftruncate(fd, stream->GetUncompressedSize()) == -1) {
+      ERROR("Couldn't ftruncate %s to decompress library", file.get());
+      return nullptr;
+    }
+    /* Map the temporary file for use as inflate buffer */
+    MappedPtr buffer(MemoryRange::mmap(nullptr, stream->GetUncompressedSize(),
+                                       PROT_WRITE, MAP_SHARED, fd, 0));
+    if (buffer == MAP_FAILED) {
+      ERROR("Couldn't map %s to decompress library", file.get());
+      return nullptr;
+    }
+
+    z_stream zStream = stream->GetZStream(buffer);
+
+    /* Decompress */
+    if (inflateInit2(&zStream, -MAX_WBITS) != Z_OK) {
+      ERROR("inflateInit failed: %s", zStream.msg);
+      return nullptr;
+    }
+    if (inflate(&zStream, Z_FINISH) != Z_STREAM_END) {
+      ERROR("inflate failed: %s", zStream.msg);
+      return nullptr;
+    }
+    if (inflateEnd(&zStream) != Z_OK) {
+      ERROR("inflateEnd failed: %s", zStream.msg);
+      return nullptr;
+    }
+    if (zStream.total_out != stream->GetUncompressedSize()) {
+      ERROR("File not fully uncompressed! %ld / %d", zStream.total_out,
+            static_cast<unsigned int>(stream->GetUncompressedSize()));
+      return nullptr;
+    }
+  } else if (XZStream::IsXZ(stream->GetBuffer(), stream->GetSize())) {
+    XZStream xzStream(stream->GetBuffer(), stream->GetSize());
+
+    if (!xzStream.Init()) {
+      ERROR("Couldn't initialize XZ decoder");
+      return nullptr;
+    }
+    DEBUG_LOG("XZStream created, compressed=%" PRIuPTR
+              ", uncompressed=%" PRIuPTR,
+              xzStream.Size(), xzStream.UncompressedSize());
+
+    if (ftruncate(fd, xzStream.UncompressedSize()) == -1) {
+      ERROR("Couldn't ftruncate %s to decompress library", file.get());
+      return nullptr;
+    }
+    MappedPtr buffer(MemoryRange::mmap(nullptr, xzStream.UncompressedSize(),
+                                       PROT_WRITE, MAP_SHARED, fd, 0));
+    if (buffer == MAP_FAILED) {
+      ERROR("Couldn't map %s to decompress library", file.get());
+      return nullptr;
+    }
+    const size_t written = xzStream.Decode(buffer, buffer.GetLength());
+    DEBUG_LOG("XZStream decoded %" PRIuPTR, written);
+    if (written != buffer.GetLength()) {
+      ERROR("Error decoding XZ file %s", file.get());
+      return nullptr;
+    }
+  } else {
+    return nullptr;
+  }
+
+  validator.CacheChecksum();
+  return new MappableExtractFile(fd.forget(), file.release());
+}
+
+/**
+ * _MappableBuffer is a buffer which content can be mapped at different
+ * locations in the virtual address space.
+ * On Linux, uses a (deleted) temporary file on a tmpfs for sharable content.
+ * On Android, uses ashmem.
+ */
+class _MappableBuffer : public MappedPtr {
+ public:
+  /**
+   * Returns a _MappableBuffer instance with the given name and the given
+   * length.
+   */
+  static _MappableBuffer* Create(const char* name, size_t length) {
+    AutoCloseFD fd;
+    const char* ident;
+#ifdef ANDROID
+    /* On Android, initialize an ashmem region with the given length */
+    fd = mozilla::android::ashmem_create(name, length);
+    ident = name;
+#else
+    /* On Linux, use /dev/shm as base directory for temporary files, assuming
+     * it's on tmpfs */
+    /* TODO: check that /dev/shm is tmpfs */
+    char path[256];
+    sprintf(path, "/dev/shm/%s.XXXXXX", name);
+    fd = mkstemp(path);
+    if (fd == -1) return nullptr;
+    unlink(path);
+    ftruncate(fd, length);
+    ident = path;
+#endif
+
+    void* buf =
+        ::mmap(nullptr, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+    if (buf != MAP_FAILED) {
+      DEBUG_LOG("Decompression buffer of size 0x%" PRIxPTR
+                " in "
+#ifdef ANDROID
+                "ashmem "
+#endif
+                "\"%s\", mapped @%p",
+                length, ident, buf);
+      return new _MappableBuffer(fd.forget(), buf, length);
+    }
+    return nullptr;
+  }
+
+  void* mmap(const void* addr, size_t length, int prot, int flags,
+             off_t offset) {
+    MOZ_ASSERT(fd != -1);
+#ifdef ANDROID
+    /* Mapping ashmem MAP_PRIVATE is like mapping anonymous memory, even when
+     * there is content in the ashmem */
+    if (flags & MAP_PRIVATE) {
+      flags &= ~MAP_PRIVATE;
+      flags |= MAP_SHARED;
+    }
+#endif
+    return ::mmap(const_cast<void*>(addr), length, prot, flags, fd, offset);
+  }
+
+ private:
+  _MappableBuffer(int fd, void* buf, size_t length)
+      : MappedPtr(buf, length), fd(fd) {}
+
+  /* File descriptor for the temporary file or ashmem */
+  AutoCloseFD fd;
+};
+
+Mappable* MappableDeflate::Create(const char* name, Zip* zip,
+                                  Zip::Stream* stream) {
+  MOZ_ASSERT(stream->GetType() == Zip::Stream::DEFLATE);
+  _MappableBuffer* buf =
+      _MappableBuffer::Create(name, stream->GetUncompressedSize());
+  if (buf) return new MappableDeflate(buf, zip, stream);
+  return nullptr;
+}
+
+MappableDeflate::MappableDeflate(_MappableBuffer* buf, Zip* zip,
+                                 Zip::Stream* stream)
+    : zip(zip), buffer(buf), zStream(stream->GetZStream(*buf)) {}
+
+MappableDeflate::~MappableDeflate() {}
+
+MemoryRange MappableDeflate::mmap(const void* addr, size_t length, int prot,
+                                  int flags, off_t offset) {
+  MOZ_ASSERT(buffer);
+  MOZ_ASSERT(!(flags & MAP_SHARED));
+  flags |= MAP_PRIVATE;
+
+  /* The deflate stream is uncompressed up to the required offset + length, if
+   * it hasn't previously been uncompressed */
+  ssize_t missing = offset + length + zStream.avail_out - buffer->GetLength();
+  if (missing > 0) {
+    uInt avail_out = zStream.avail_out;
+    zStream.avail_out = missing;
+    if ((*buffer == zStream.next_out) &&
+        (inflateInit2(&zStream, -MAX_WBITS) != Z_OK)) {
+      ERROR("inflateInit failed: %s", zStream.msg);
+      return MemoryRange(MAP_FAILED, 0);
+    }
+    int ret = inflate(&zStream, Z_SYNC_FLUSH);
+    if (ret < 0) {
+      ERROR("inflate failed: %s", zStream.msg);
+      return MemoryRange(MAP_FAILED, 0);
+    }
+    if (ret == Z_NEED_DICT) {
+      ERROR("zstream requires a dictionary. %s", zStream.msg);
+      return MemoryRange(MAP_FAILED, 0);
+    }
+    zStream.avail_out = avail_out - missing + zStream.avail_out;
+    if (ret == Z_STREAM_END) {
+      if (inflateEnd(&zStream) != Z_OK) {
+        ERROR("inflateEnd failed: %s", zStream.msg);
+        return MemoryRange(MAP_FAILED, 0);
+      }
+      if (zStream.total_out != buffer->GetLength()) {
+        ERROR("File not fully uncompressed! %ld / %d", zStream.total_out,
+              static_cast<unsigned int>(buffer->GetLength()));
+        return MemoryRange(MAP_FAILED, 0);
+      }
+    }
+  }
+#if defined(ANDROID) && defined(__arm__)
+  if (prot & PROT_EXEC) {
+    /* We just extracted data that may be executed in the future.
+     * We thus need to ensure Instruction and Data cache coherency. */
+    DEBUG_LOG("cacheflush(%p, %p)", *buffer + offset,
+              *buffer + (offset + length));
+    cacheflush(reinterpret_cast<uintptr_t>(*buffer + offset),
+               reinterpret_cast<uintptr_t>(*buffer + (offset + length)), 0);
+  }
+#endif
+
+  return MemoryRange(buffer->mmap(addr, length, prot, flags, offset), length);
+}
+
+void MappableDeflate::finalize() {
+  /* Free zlib internal buffers */
+  inflateEnd(&zStream);
+  /* Free decompression buffer */
+  buffer = nullptr;
+  /* Remove reference to Zip archive */
+  zip = nullptr;
+}
+
+size_t MappableDeflate::GetLength() const { return buffer->GetLength(); }
diff --git a/mozglue/linker/Mappable.h b/mozglue/linker/Mappable.h
new file mode 100644
index 0000000000..8468aaaccb
--- /dev/null
+++ b/mozglue/linker/Mappable.h
@@ -0,0 +1,161 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Mappable_h
+#define Mappable_h
+
+#include "Zip.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/UniquePtr.h"
+#include "zlib.h"
+
+/**
+ * Abstract class to handle mmap()ing from various kind of entities, such as
+ * plain files or Zip entries. The virtual members are meant to act as the
+ * equivalent system functions, except mapped memory is always MAP_PRIVATE,
+ * even though a given implementation may use something different internally.
+ */
+class Mappable : public mozilla::RefCounted<Mappable> {
+ public:
+  MOZ_DECLARE_REFCOUNTED_TYPENAME(Mappable)
+  virtual ~Mappable() {}
+
+  virtual MemoryRange mmap(const void* addr, size_t length, int prot, int flags,
+                           off_t offset) = 0;
+
+  enum Kind {
+    MAPPABLE_FILE,
+    MAPPABLE_EXTRACT_FILE,
+    MAPPABLE_DEFLATE,
+    MAPPABLE_SEEKABLE_ZSTREAM
+  };
+
+  virtual Kind GetKind() const = 0;
+
+ private:
+  virtual void munmap(void* addr, size_t length) { ::munmap(addr, length); }
+  /* Limit use of Mappable::munmap to classes that keep track of the address
+   * and size of the mapping. This allows to ignore ::munmap return value. */
+  friend class Mappable1stPagePtr;
+  friend class LibHandle;
+
+ public:
+  /**
+   * Indicate to a Mappable instance that no further mmap is going to happen.
+   */
+  virtual void finalize() = 0;
+
+  /**
+   * Returns the maximum length that can be mapped from this Mappable for
+   * offset = 0.
+   */
+  virtual size_t GetLength() const = 0;
+};
+
+/**
+ * Mappable implementation for plain files
+ */
+class MappableFile : public Mappable {
+ public:
+  ~MappableFile() {}
+
+  /**
+   * Create a MappableFile instance for the given file path.
+   */
+  static Mappable* Create(const char* path);
+
+  /* Inherited from Mappable */
+  virtual MemoryRange mmap(const void* addr, size_t length, int prot, int flags,
+                           off_t offset);
+  virtual void finalize();
+  virtual size_t GetLength() const;
+
+  virtual Kind GetKind() const { return MAPPABLE_FILE; };
+
+ protected:
+  explicit MappableFile(int fd) : fd(fd) {}
+
+ private:
+  /* File descriptor */
+  AutoCloseFD fd;
+};
+
+/**
+ * Mappable implementation for deflated stream in a Zip archive
+ * Inflates the complete stream into a cache file.
+ */
+class MappableExtractFile : public MappableFile {
+ public:
+  ~MappableExtractFile() = default;
+
+  /**
+   * Create a MappableExtractFile instance for the given Zip stream. The name
+   * argument is used to create the cache file in the cache directory.
+   */
+  static Mappable* Create(const char* name, Zip* zip, Zip::Stream* stream);
+
+  /* Override finalize from MappableFile */
+  virtual void finalize() {}
+
+  virtual Kind GetKind() const { return MAPPABLE_EXTRACT_FILE; };
+
+ private:
+  /**
+   * AutoUnlinkFile keeps track of a file name and removes (unlinks) the file
+   * when the instance is destroyed.
+   */
+  struct UnlinkFile {
+    void operator()(char* value) {
+      unlink(value);
+      delete[] value;
+    }
+  };
+  typedef mozilla::UniquePtr<char[], UnlinkFile> AutoUnlinkFile;
+
+  MappableExtractFile(int fd, const char* path)
+      : MappableFile(fd), path(path) {}
+
+  /* Extracted file path */
+  mozilla::UniquePtr<const char[]> path;
+};
+
+class _MappableBuffer;
+
+/**
+ * Mappable implementation for deflated stream in a Zip archive.
+ * Inflates the mapped bits in a temporary buffer.
+ */
+class MappableDeflate : public Mappable {
+ public:
+  ~MappableDeflate();
+
+  /**
+   * Create a MappableDeflate instance for the given Zip stream. The name
+   * argument is used for an appropriately named temporary file, and the Zip
+   * instance is given for the MappableDeflate to keep a reference of it.
+   */
+  static Mappable* Create(const char* name, Zip* zip, Zip::Stream* stream);
+
+  /* Inherited from Mappable */
+  virtual MemoryRange mmap(const void* addr, size_t length, int prot, int flags,
+                           off_t offset);
+  virtual void finalize();
+  virtual size_t GetLength() const;
+
+  virtual Kind GetKind() const { return MAPPABLE_DEFLATE; };
+
+ private:
+  MappableDeflate(_MappableBuffer* buf, Zip* zip, Zip::Stream* stream);
+
+  /* Zip reference */
+  RefPtr<Zip> zip;
+
+  /* Decompression buffer */
+  mozilla::UniquePtr<_MappableBuffer> buffer;
+
+  /* Zlib data */
+  z_stream zStream;
+};
+
+#endif /* Mappable_h */
diff --git a/mozglue/linker/Utils.h b/mozglue/linker/Utils.h
new file mode 100644
index 0000000000..d3827f1f41
--- /dev/null
+++ b/mozglue/linker/Utils.h
@@ -0,0 +1,532 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Utils_h
+#define Utils_h
+
+#include <pthread.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "mozilla/Assertions.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/Scoped.h"
+
+/**
+ * On architectures that are little endian and that support unaligned reads,
+ * we can use direct type, but on others, we want to have a special class
+ * to handle conversion and alignment issues.
+ */
+#if !defined(DEBUG) && (defined(__i386__) || defined(__x86_64__))
+typedef uint16_t le_uint16;
+typedef uint32_t le_uint32;
+#else
+
+/**
+ * Template that allows to find an unsigned int type from a (computed) bit size
+ */
+template <int s>
+struct UInt {};
+template <>
+struct UInt<16> {
+  typedef uint16_t Type;
+};
+template <>
+struct UInt<32> {
+  typedef uint32_t Type;
+};
+
+/**
+ * Template to access 2 n-bit sized words as a 2*n-bit sized word, doing
+ * conversion from little endian and avoiding alignment issues.
+ */
+template <typename T>
+class le_to_cpu {
+ public:
+  typedef typename UInt<16 * sizeof(T)>::Type Type;
+
+  operator Type() const { return (b << (sizeof(T) * 8)) | a; }
+
+  const le_to_cpu& operator=(const Type& v) {
+    a = v & ((1 << (sizeof(T) * 8)) - 1);
+    b = v >> (sizeof(T) * 8);
+    return *this;
+  }
+
+  le_to_cpu() {}
+  explicit le_to_cpu(const Type& v) { operator=(v); }
+
+  const le_to_cpu& operator+=(const Type& v) {
+    return operator=(operator Type() + v);
+  }
+
+  const le_to_cpu& operator++(int) { return operator=(operator Type() + 1); }
+
+ private:
+  T a, b;
+};
+
+/**
+ * Type definitions
+ */
+typedef le_to_cpu<unsigned char> le_uint16;
+typedef le_to_cpu<le_uint16> le_uint32;
+#endif
+
+/**
+ * AutoCloseFD is a RAII wrapper for POSIX file descriptors
+ */
+struct AutoCloseFDTraits {
+  typedef int type;
+  static int empty() { return -1; }
+  static void release(int fd) {
+    if (fd != -1) close(fd);
+  }
+};
+typedef mozilla::Scoped<AutoCloseFDTraits> AutoCloseFD;
+
+/**
+ * AutoCloseFILE is a RAII wrapper for POSIX streams
+ */
+struct AutoCloseFILETraits {
+  typedef FILE* type;
+  static FILE* empty() { return nullptr; }
+  static void release(FILE* f) {
+    if (f) fclose(f);
+  }
+};
+typedef mozilla::Scoped<AutoCloseFILETraits> AutoCloseFILE;
+
+extern mozilla::Atomic<size_t, mozilla::ReleaseAcquire> gPageSize;
+
+/**
+ * Page alignment helpers
+ */
+static size_t PageSize() {
+  if (!gPageSize) {
+    gPageSize = sysconf(_SC_PAGESIZE);
+  }
+
+  return gPageSize;
+}
+
+static inline uintptr_t AlignedPtr(uintptr_t ptr, size_t alignment) {
+  return ptr & ~(alignment - 1);
+}
+
+template <typename T>
+static inline T* AlignedPtr(T* ptr, size_t alignment) {
+  return reinterpret_cast<T*>(
+      AlignedPtr(reinterpret_cast<uintptr_t>(ptr), alignment));
+}
+
+template <typename T>
+static inline T PageAlignedPtr(T ptr) {
+  return AlignedPtr(ptr, PageSize());
+}
+
+static inline uintptr_t AlignedEndPtr(uintptr_t ptr, size_t alignment) {
+  return AlignedPtr(ptr + alignment - 1, alignment);
+}
+
+template <typename T>
+static inline T* AlignedEndPtr(T* ptr, size_t alignment) {
+  return reinterpret_cast<T*>(
+      AlignedEndPtr(reinterpret_cast<uintptr_t>(ptr), alignment));
+}
+
+template <typename T>
+static inline T PageAlignedEndPtr(T ptr) {
+  return AlignedEndPtr(ptr, PageSize());
+}
+
+static inline size_t AlignedSize(size_t size, size_t alignment) {
+  return (size + alignment - 1) & ~(alignment - 1);
+}
+
+static inline size_t PageAlignedSize(size_t size) {
+  return AlignedSize(size, PageSize());
+}
+
+static inline bool IsAlignedPtr(uintptr_t ptr, size_t alignment) {
+  return ptr % alignment == 0;
+}
+
+template <typename T>
+static inline bool IsAlignedPtr(T* ptr, size_t alignment) {
+  return IsAlignedPtr(reinterpret_cast<uintptr_t>(ptr), alignment);
+}
+
+template <typename T>
+static inline bool IsPageAlignedPtr(T ptr) {
+  return IsAlignedPtr(ptr, PageSize());
+}
+
+static inline bool IsAlignedSize(size_t size, size_t alignment) {
+  return size % alignment == 0;
+}
+
+static inline bool IsPageAlignedSize(size_t size) {
+  return IsAlignedSize(size, PageSize());
+}
+
+static inline size_t PageNumber(size_t size) {
+  return (size + PageSize() - 1) / PageSize();
+}
+
+/**
+ * MemoryRange stores a pointer, size pair.
+ */
+class MemoryRange {
+ public:
+  MemoryRange(void* buf, size_t length) : buf(buf), length(length) {}
+
+  void Assign(void* b, size_t len) {
+    buf = b;
+    length = len;
+  }
+
+  void Assign(const MemoryRange& other) {
+    buf = other.buf;
+    length = other.length;
+  }
+
+  void* get() const { return buf; }
+
+  operator void*() const { return buf; }
+
+  operator unsigned char*() const {
+    return reinterpret_cast<unsigned char*>(buf);
+  }
+
+  bool operator==(void* ptr) const { return buf == ptr; }
+
+  bool operator==(unsigned char* ptr) const { return buf == ptr; }
+
+  void* operator+(off_t offset) const {
+    return reinterpret_cast<char*>(buf) + offset;
+  }
+
+  /**
+   * Returns whether the given address is within the mapped range
+   */
+  bool Contains(void* ptr) const {
+    return (ptr >= buf) && (ptr < reinterpret_cast<char*>(buf) + length);
+  }
+
+  /**
+   * Returns the length of the mapped range
+   */
+  size_t GetLength() const { return length; }
+
+  static MemoryRange mmap(void* addr, size_t length, int prot, int flags,
+                          int fd, off_t offset) {
+    return MemoryRange(::mmap(addr, length, prot, flags, fd, offset), length);
+  }
+
+ private:
+  void* buf;
+  size_t length;
+};
+
+/**
+ * MappedPtr is a RAII wrapper for mmap()ed memory. It can be used as
+ * a simple void * or unsigned char *.
+ *
+ * It is defined as a derivative of a template that allows to use a
+ * different unmapping strategy.
+ */
+template <typename T>
+class GenericMappedPtr : public MemoryRange {
+ public:
+  GenericMappedPtr(void* buf, size_t length) : MemoryRange(buf, length) {}
+  explicit GenericMappedPtr(const MemoryRange& other) : MemoryRange(other) {}
+  GenericMappedPtr() : MemoryRange(MAP_FAILED, 0) {}
+
+  void Assign(void* b, size_t len) {
+    if (get() != MAP_FAILED) static_cast<T*>(this)->munmap(get(), GetLength());
+    MemoryRange::Assign(b, len);
+  }
+
+  void Assign(const MemoryRange& other) {
+    Assign(other.get(), other.GetLength());
+  }
+
+  ~GenericMappedPtr() {
+    if (get() != MAP_FAILED) static_cast<T*>(this)->munmap(get(), GetLength());
+  }
+
+  void release() { MemoryRange::Assign(MAP_FAILED, 0); }
+};
+
+struct MappedPtr : public GenericMappedPtr<MappedPtr> {
+  MappedPtr(void* buf, size_t length)
+      : GenericMappedPtr<MappedPtr>(buf, length) {}
+  MOZ_IMPLICIT MappedPtr(const MemoryRange& other)
+      : GenericMappedPtr<MappedPtr>(other) {}
+  MappedPtr() : GenericMappedPtr<MappedPtr>() {}
+
+ private:
+  friend class GenericMappedPtr<MappedPtr>;
+  void munmap(void* buf, size_t length) { ::munmap(buf, length); }
+};
+
+/**
+ * UnsizedArray is a way to access raw arrays of data in memory.
+ *
+ *   struct S { ... };
+ *   UnsizedArray<S> a(buf);
+ *   UnsizedArray<S> b; b.Init(buf);
+ *
+ * This is roughly equivalent to
+ *   const S *a = reinterpret_cast<const S *>(buf);
+ *   const S *b = nullptr; b = reinterpret_cast<const S *>(buf);
+ *
+ * An UnsizedArray has no known length, and it's up to the caller to make
+ * sure the accessed memory is mapped and makes sense.
+ */
+template <typename T>
+class UnsizedArray {
+ public:
+  typedef size_t idx_t;
+
+  /**
+   * Constructors and Initializers
+   */
+  UnsizedArray() : contents(nullptr) {}
+  explicit UnsizedArray(const void* buf)
+      : contents(reinterpret_cast<const T*>(buf)) {}
+
+  void Init(const void* buf) {
+    MOZ_ASSERT(contents == nullptr);
+    contents = reinterpret_cast<const T*>(buf);
+  }
+
+  /**
+   * Returns the nth element of the array
+   */
+  const T& operator[](const idx_t index) const {
+    MOZ_ASSERT(contents);
+    return contents[index];
+  }
+
+  operator const T*() const { return contents; }
+  /**
+   * Returns whether the array points somewhere
+   */
+  explicit operator bool() const { return contents != nullptr; }
+
+ private:
+  const T* contents;
+};
+
+/**
+ * Array, like UnsizedArray, is a way to access raw arrays of data in memory.
+ * Unlike UnsizedArray, it has a known length, and is enumerable with an
+ * iterator.
+ *
+ *   struct S { ... };
+ *   Array<S> a(buf, len);
+ *   UnsizedArray<S> b; b.Init(buf, len);
+ *
+ * In the above examples, len is the number of elements in the array. It is
+ * also possible to initialize an Array with the buffer size:
+ *
+ *   Array<S> c; c.InitSize(buf, size);
+ *
+ * It is also possible to initialize an Array in two steps, only providing
+ * one data at a time:
+ *
+ *   Array<S> d;
+ *   d.Init(buf);
+ *   d.Init(len); // or d.InitSize(size);
+ *
+ */
+template <typename T>
+class Array : public UnsizedArray<T> {
+ public:
+  typedef typename UnsizedArray<T>::idx_t idx_t;
+
+  /**
+   * Constructors and Initializers
+   */
+  Array() : UnsizedArray<T>(), length(0) {}
+  Array(const void* buf, const idx_t length)
+      : UnsizedArray<T>(buf), length(length) {}
+
+  void Init(const void* buf) { UnsizedArray<T>::Init(buf); }
+
+  void Init(const idx_t len) {
+    MOZ_ASSERT(length == 0);
+    length = len;
+  }
+
+  void InitSize(const idx_t size) { Init(size / sizeof(T)); }
+
+  void Init(const void* buf, const idx_t len) {
+    UnsizedArray<T>::Init(buf);
+    Init(len);
+  }
+
+  void InitSize(const void* buf, const idx_t size) {
+    UnsizedArray<T>::Init(buf);
+    InitSize(size);
+  }
+
+  /**
+   * Returns the nth element of the array
+   */
+  const T& operator[](const idx_t index) const {
+    MOZ_ASSERT(index < length);
+    MOZ_ASSERT(operator bool());
+    return UnsizedArray<T>::operator[](index);
+  }
+
+  /**
+   * Returns the number of elements in the array
+   */
+  idx_t numElements() const { return length; }
+
+  /**
+   * Returns whether the array points somewhere and has at least one element.
+   */
+  explicit operator bool() const {
+    return (length > 0) && UnsizedArray<T>::operator bool();
+  }
+
+  /**
+   * Iterator for an Array. Use is similar to that of STL const_iterators:
+   *
+   *   struct S { ... };
+   *   Array<S> a(buf, len);
+   *   for (Array<S>::iterator it = a.begin(); it < a.end(); ++it) {
+   *     // Do something with *it.
+   *   }
+   */
+  class iterator {
+   public:
+    iterator() : item(nullptr) {}
+
+    const T& operator*() const { return *item; }
+
+    const T* operator->() const { return item; }
+
+    iterator& operator++() {
+      ++item;
+      return *this;
+    }
+
+    bool operator<(const iterator& other) const { return item < other.item; }
+
+   protected:
+    friend class Array<T>;
+    explicit iterator(const T& item) : item(&item) {}
+
+   private:
+    const T* item;
+  };
+
+  /**
+   * Returns an iterator pointing at the beginning of the Array
+   */
+  iterator begin() const {
+    if (length) return iterator(UnsizedArray<T>::operator[](0));
+    return iterator();
+  }
+
+  /**
+   * Returns an iterator pointing past the end of the Array
+   */
+  iterator end() const {
+    if (length) return iterator(UnsizedArray<T>::operator[](length));
+    return iterator();
+  }
+
+  /**
+   * Reverse iterator for an Array. Use is similar to that of STL
+   * const_reverse_iterators:
+   *
+   *   struct S { ... };
+   *   Array<S> a(buf, len);
+   *   for (Array<S>::reverse_iterator it = a.rbegin(); it < a.rend(); ++it) {
+   *     // Do something with *it.
+   *   }
+   */
+  class reverse_iterator {
+   public:
+    reverse_iterator() : item(nullptr) {}
+
+    const T& operator*() const {
+      const T* tmp = item;
+      return *--tmp;
+    }
+
+    const T* operator->() const { return &operator*(); }
+
+    reverse_iterator& operator++() {
+      --item;
+      return *this;
+    }
+
+    bool operator<(const reverse_iterator& other) const {
+      return item > other.item;
+    }
+
+   protected:
+    friend class Array<T>;
+    explicit reverse_iterator(const T& item) : item(&item) {}
+
+   private:
+    const T* item;
+  };
+
+  /**
+   * Returns a reverse iterator pointing at the end of the Array
+   */
+  reverse_iterator rbegin() const {
+    if (length) return reverse_iterator(UnsizedArray<T>::operator[](length));
+    return reverse_iterator();
+  }
+
+  /**
+   * Returns a reverse iterator pointing past the beginning of the Array
+   */
+  reverse_iterator rend() const {
+    if (length) return reverse_iterator(UnsizedArray<T>::operator[](0));
+    return reverse_iterator();
+  }
+
+ private:
+  idx_t length;
+};
+
+/**
+ * Transforms a pointer-to-function to a pointer-to-object pointing at the
+ * same address.
+ */
+template <typename T>
+void* FunctionPtr(T func) {
+  union {
+    void* ptr;
+    T func;
+  } f;
+  f.func = func;
+  return f.ptr;
+}
+
+class AutoLock {
+ public:
+  explicit AutoLock(pthread_mutex_t* mutex) : mutex(mutex) {
+    if (pthread_mutex_lock(mutex)) MOZ_CRASH("pthread_mutex_lock failed");
+  }
+  ~AutoLock() {
+    if (pthread_mutex_unlock(mutex)) MOZ_CRASH("pthread_mutex_unlock failed");
+  }
+
+ private:
+  pthread_mutex_t* mutex;
+};
+
+#endif /* Utils_h */
diff --git a/mozglue/linker/XZStream.cpp b/mozglue/linker/XZStream.cpp
new file mode 100644
index 0000000000..db154d12aa
--- /dev/null
+++ b/mozglue/linker/XZStream.cpp
@@ -0,0 +1,221 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "XZStream.h"
+
+#include <algorithm>
+#include <cstring>
+#include "mozilla/Assertions.h"
+#include "mozilla/CheckedInt.h"
+#include "Logging.h"
+
+// LZMA dictionary size, should have a minimum size for the given compression
+// rate, see XZ Utils docs for details.
+static const uint32_t kDictSize = 1 << 24;
+
+static const size_t kFooterSize = 12;
+
+// Parses a variable-length integer (VLI),
+// see http://tukaani.org/xz/xz-file-format.txt for details.
+static size_t ParseVarLenInt(const uint8_t* aBuf, size_t aBufSize,
+                             uint64_t* aValue) {
+  if (!aBufSize) {
+    return 0;
+  }
+  aBufSize = std::min(size_t(9), aBufSize);
+
+  *aValue = aBuf[0] & 0x7F;
+  size_t i = 0;
+
+  while (aBuf[i++] & 0x80) {
+    if (i >= aBufSize || aBuf[i] == 0x0) {
+      return 0;
+    }
+    *aValue |= static_cast<uint64_t>(aBuf[i] & 0x7F) << (i * 7);
+  }
+  return i;
+}
+
+/* static */
+bool XZStream::IsXZ(const void* aBuf, size_t aBufSize) {
+  static const uint8_t kXzMagic[] = {0xfd, '7', 'z', 'X', 'Z', 0x0};
+  MOZ_ASSERT(aBuf);
+  return aBufSize > sizeof(kXzMagic) &&
+         !memcmp(reinterpret_cast<const void*>(kXzMagic), aBuf,
+                 sizeof(kXzMagic));
+}
+
+XZStream::XZStream(const void* aInBuf, size_t aInSize)
+    : mInBuf(static_cast<const uint8_t*>(aInBuf)),
+      mUncompSize(0),
+      mDec(nullptr) {
+  mBuffers.in = mInBuf;
+  mBuffers.in_pos = 0;
+  mBuffers.in_size = aInSize;
+}
+
+XZStream::~XZStream() { xz_dec_end(mDec); }
+
+bool XZStream::Init() {
+#ifdef XZ_USE_CRC64
+  xz_crc64_init();
+#endif
+  xz_crc32_init();
+
+  mDec = xz_dec_init(XZ_DYNALLOC, kDictSize);
+
+  if (!mDec) {
+    return false;
+  }
+
+  mUncompSize = ParseUncompressedSize();
+  if (!mUncompSize) {
+    return false;
+  }
+
+  return true;
+}
+
+size_t XZStream::Decode(void* aOutBuf, size_t aOutSize) {
+  if (!mDec) {
+    return 0;
+  }
+
+  mBuffers.out = static_cast<uint8_t*>(aOutBuf);
+  mBuffers.out_pos = 0;
+  mBuffers.out_size = aOutSize;
+
+  while (mBuffers.in_pos < mBuffers.in_size &&
+         mBuffers.out_pos < mBuffers.out_size) {
+    const xz_ret ret = xz_dec_run(mDec, &mBuffers);
+
+    switch (ret) {
+      case XZ_STREAM_END:
+        // Stream ended, the next loop iteration should terminate.
+        MOZ_ASSERT(mBuffers.in_pos == mBuffers.in_size);
+        [[fallthrough]];
+#ifdef XZ_DEC_ANY_CHECK
+      case XZ_UNSUPPORTED_CHECK:
+        // Ignore unsupported check.
+        [[fallthrough]];
+#endif
+      case XZ_OK:
+        // Chunk decoded, proceed.
+        break;
+
+      case XZ_MEM_ERROR:
+        ERROR("XZ decoding: memory allocation failed");
+        return 0;
+
+      case XZ_MEMLIMIT_ERROR:
+        ERROR("XZ decoding: memory usage limit reached");
+        return 0;
+
+      case XZ_FORMAT_ERROR:
+        ERROR("XZ decoding: invalid stream format");
+        return 0;
+
+      case XZ_OPTIONS_ERROR:
+        ERROR("XZ decoding: unsupported header options");
+        return 0;
+
+      case XZ_DATA_ERROR:
+        [[fallthrough]];
+      case XZ_BUF_ERROR:
+        ERROR("XZ decoding: corrupt input stream");
+        return 0;
+
+      default:
+        MOZ_ASSERT_UNREACHABLE("XZ decoding: unknown error condition");
+        return 0;
+    }
+  }
+  return mBuffers.out_pos;
+}
+
+size_t XZStream::RemainingInput() const {
+  return mBuffers.in_size - mBuffers.in_pos;
+}
+
+size_t XZStream::Size() const { return mBuffers.in_size; }
+
+size_t XZStream::UncompressedSize() const { return mUncompSize; }
+
+size_t XZStream::ParseIndexSize() const {
+  static const uint8_t kFooterMagic[] = {'Y', 'Z'};
+
+  const uint8_t* footer = mInBuf + mBuffers.in_size - kFooterSize;
+  // The magic bytes are at the end of the footer.
+  if (memcmp(reinterpret_cast<const void*>(kFooterMagic),
+             footer + kFooterSize - sizeof(kFooterMagic),
+             sizeof(kFooterMagic))) {
+    // Not a valid footer at stream end.
+    ERROR("XZ parsing: Invalid footer at end of stream");
+    return 0;
+  }
+  // Backward size is a 32 bit LE integer field positioned after the 32 bit
+  // CRC32 code. It encodes the index size as a multiple of 4 bytes with a
+  // minimum size of 4 bytes.
+  const uint32_t backwardSizeRaw = *(footer + 4);
+  // Check for overflow.
+  mozilla::CheckedInt<size_t> backwardSizeBytes(backwardSizeRaw);
+  backwardSizeBytes = (backwardSizeBytes + 1) * 4;
+  if (!backwardSizeBytes.isValid()) {
+    ERROR("XZ parsing: Cannot parse index size");
+    return 0;
+  }
+  return backwardSizeBytes.value();
+}
+
+size_t XZStream::ParseUncompressedSize() const {
+  static const uint8_t kIndexIndicator[] = {0x0};
+
+  const size_t indexSize = ParseIndexSize();
+  if (!indexSize) {
+    return 0;
+  }
+  // The footer follows directly the index, so we can use it as a reference.
+  const uint8_t* end = mInBuf + mBuffers.in_size;
+  const uint8_t* index = end - kFooterSize - indexSize;
+
+  // The xz stream index consists of three concatenated elements:
+  //  (1) 1 byte indicator (always OxOO)
+  //  (2) a Variable Length Integer (VLI) field for the number of records
+  //  (3) a list of records
+  // See https://tukaani.org/xz/xz-file-format-1.0.4.txt
+  // Each record contains a VLI field for unpadded size followed by a var field
+  // for uncompressed size. We only support xz streams with a single record.
+
+  if (memcmp(reinterpret_cast<const void*>(kIndexIndicator), index,
+             sizeof(kIndexIndicator))) {
+    ERROR("XZ parsing: Invalid stream index");
+    return 0;
+  }
+
+  index += sizeof(kIndexIndicator);
+  uint64_t numRecords = 0;
+  index += ParseVarLenInt(index, end - index, &numRecords);
+  // Only streams with a single record are supported.
+  if (numRecords != 1) {
+    ERROR("XZ parsing: Multiple records not supported");
+    return 0;
+  }
+  uint64_t unpaddedSize = 0;
+  index += ParseVarLenInt(index, end - index, &unpaddedSize);
+  if (!unpaddedSize) {
+    ERROR("XZ parsing: Unpadded size is 0");
+    return 0;
+  }
+  uint64_t uncompressedSize = 0;
+  index += ParseVarLenInt(index, end - index, &uncompressedSize);
+  mozilla::CheckedInt<size_t> checkedSize(uncompressedSize);
+  if (!checkedSize.isValid()) {
+    ERROR("XZ parsing: Uncompressed stream size is too large");
+    return 0;
+  }
+
+  return checkedSize.value();
+}
diff --git a/mozglue/linker/XZStream.h b/mozglue/linker/XZStream.h
new file mode 100644
index 0000000000..bab5520e37
--- /dev/null
+++ b/mozglue/linker/XZStream.h
@@ -0,0 +1,49 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef XZSTREAM_h
+#define XZSTREAM_h
+
+#include <cstdlib>
+#include <stdint.h>
+
+#define XZ_DEC_DYNALLOC
+#include "xz.h"
+
+// Used to decode XZ stream buffers.
+class XZStream {
+ public:
+  // Returns whether the provided buffer is likely a XZ stream.
+  static bool IsXZ(const void* aBuf, size_t aBufSize);
+
+  // Creates a XZ stream object for the given input buffer.
+  XZStream(const void* aInBuf, size_t aInSize);
+  ~XZStream();
+
+  // Initializes the decoder and returns whether decoding may commence.
+  bool Init();
+  // Decodes the next chunk of input into the given output buffer.
+  size_t Decode(void* aOutBuf, size_t aOutSize);
+  // Returns the number of yet undecoded bytes in the input buffer.
+  size_t RemainingInput() const;
+  // Returns the total number of bytes in the input buffer (compressed size).
+  size_t Size() const;
+  // Returns the expected final number of bytes in the output buffer.
+  // Note: will return 0 before successful Init().
+  size_t UncompressedSize() const;
+
+ private:
+  // Parses the stream footer and returns the size of the index in bytes.
+  size_t ParseIndexSize() const;
+  // Parses the stream index and returns the expected uncompressed size in
+  // bytes.
+  size_t ParseUncompressedSize() const;
+
+  const uint8_t* mInBuf;
+  size_t mUncompSize;
+  xz_buf mBuffers;
+  xz_dec* mDec;
+};
+
+#endif  // XZSTREAM_h
diff --git a/mozglue/linker/Zip.cpp b/mozglue/linker/Zip.cpp
new file mode 100644
index 0000000000..7ecc6b9a74
--- /dev/null
+++ b/mozglue/linker/Zip.cpp
@@ -0,0 +1,277 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <cstdlib>
+#include <algorithm>
+#include "Logging.h"
+#include "Zip.h"
+
+already_AddRefed<Zip> Zip::Create(const char* filename) {
+  /* Open and map the file in memory */
+  AutoCloseFD fd(open(filename, O_RDONLY));
+  if (fd == -1) {
+    ERROR("Error opening %s: %s", filename, strerror(errno));
+    return nullptr;
+  }
+  struct stat st;
+  if (fstat(fd, &st) == -1) {
+    ERROR("Error stating %s: %s", filename, strerror(errno));
+    return nullptr;
+  }
+  size_t size = st.st_size;
+  if (size <= sizeof(CentralDirectoryEnd)) {
+    ERROR("Error reading %s: too short", filename);
+    return nullptr;
+  }
+  void* mapped = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd, 0);
+  if (mapped == MAP_FAILED) {
+    ERROR("Error mmapping %s: %s", filename, strerror(errno));
+    return nullptr;
+  }
+  DEBUG_LOG("Mapped %s @%p", filename, mapped);
+
+  return Create(filename, mapped, size);
+}
+
+already_AddRefed<Zip> Zip::Create(const char* filename, void* mapped,
+                                  size_t size) {
+  RefPtr<Zip> zip = new Zip(filename, mapped, size);
+
+  // If neither the first Local File entry nor central directory entries
+  // have been found, the zip was invalid.
+  if (!zip->nextFile && !zip->entries) {
+    ERROR("%s - Invalid zip", filename);
+    return nullptr;
+  }
+
+  ZipCollection::Singleton.Register(zip);
+  return zip.forget();
+}
+
+Zip::Zip(const char* filename, void* mapped, size_t size)
+    : name(filename ? strdup(filename) : nullptr),
+      mapped(mapped),
+      size(size),
+      nextFile(LocalFile::validate(mapped))  // first Local File entry
+      ,
+      nextDir(nullptr),
+      entries(nullptr) {
+  pthread_mutex_init(&mutex, nullptr);
+  // If the first local file entry couldn't be found (which can happen
+  // with optimized jars), check the first central directory entry.
+  if (!nextFile) GetFirstEntry();
+}
+
+Zip::~Zip() {
+  if (name) {
+    munmap(mapped, size);
+    DEBUG_LOG("Unmapped %s @%p", name, mapped);
+    free(name);
+  }
+  pthread_mutex_destroy(&mutex);
+}
+
+bool Zip::GetStream(const char* path, Zip::Stream* out) const {
+  AutoLock lock(&mutex);
+
+  DEBUG_LOG("%s - GetFile %s", name, path);
+  /* Fast path: if the Local File header on store matches, we can return the
+   * corresponding stream right away.
+   * However, the Local File header may not contain enough information, in
+   * which case the 3rd bit on the generalFlag is set. Unfortunately, this
+   * bit is also set in some archives even when we do have the data (most
+   * notably the android packages as built by the Mozilla build system).
+   * So instead of testing the generalFlag bit, only use the fast path when
+   * we haven't read the central directory entries yet, and when the
+   * compressed size as defined in the header is not filled (which is a
+   * normal condition for the bit to be set). */
+  if (nextFile && nextFile->GetName().Equals(path) && !entries &&
+      (nextFile->compressedSize != 0)) {
+    DEBUG_LOG("%s - %s was next file: fast path", name, path);
+    /* Fill Stream info from Local File header content */
+    const char* data = reinterpret_cast<const char*>(nextFile->GetData());
+    out->compressedBuf = data;
+    out->compressedSize = nextFile->compressedSize;
+    out->uncompressedSize = nextFile->uncompressedSize;
+    out->CRC32 = nextFile->CRC32;
+    out->type = static_cast<Stream::Type>(uint16_t(nextFile->compression));
+
+    /* Find the next Local File header. It is usually simply following the
+     * compressed stream, but in cases where the 3rd bit of the generalFlag
+     * is set, there is a Data Descriptor header before. */
+    data += nextFile->compressedSize;
+    if ((nextFile->generalFlag & 0x8) && DataDescriptor::validate(data)) {
+      data += sizeof(DataDescriptor);
+    }
+    nextFile = LocalFile::validate(data);
+    return true;
+  }
+
+  /* If the directory entry we have in store doesn't match, scan the Central
+   * Directory for the entry corresponding to the given path */
+  if (!nextDir || !nextDir->GetName().Equals(path)) {
+    const DirectoryEntry* entry = GetFirstEntry();
+    DEBUG_LOG("%s - Scan directory entries in search for %s", name, path);
+    while (entry && !entry->GetName().Equals(path)) {
+      entry = entry->GetNext();
+    }
+    nextDir = entry;
+  }
+  if (!nextDir) {
+    DEBUG_LOG("%s - Couldn't find %s", name, path);
+    return false;
+  }
+
+  /* Find the Local File header corresponding to the Directory entry that
+   * was found. */
+  nextFile =
+      LocalFile::validate(static_cast<const char*>(mapped) + nextDir->offset);
+  if (!nextFile) {
+    ERROR("%s - Couldn't find the Local File header for %s", name, path);
+    return false;
+  }
+
+  /* Fill Stream info from Directory entry content */
+  const char* data = reinterpret_cast<const char*>(nextFile->GetData());
+  out->compressedBuf = data;
+  out->compressedSize = nextDir->compressedSize;
+  out->uncompressedSize = nextDir->uncompressedSize;
+  out->CRC32 = nextDir->CRC32;
+  out->type = static_cast<Stream::Type>(uint16_t(nextDir->compression));
+
+  /* Store the next directory entry */
+  nextDir = nextDir->GetNext();
+  nextFile = nullptr;
+  return true;
+}
+
+const Zip::DirectoryEntry* Zip::GetFirstEntry() const {
+  if (entries) return entries;
+
+  const CentralDirectoryEnd* end = nullptr;
+  const char* _end =
+      static_cast<const char*>(mapped) + size - sizeof(CentralDirectoryEnd);
+
+  /* Scan for the Central Directory End */
+  for (; _end > mapped && !end; _end--)
+    end = CentralDirectoryEnd::validate(_end);
+  if (!end) {
+    ERROR("%s - Couldn't find end of central directory record", name);
+    return nullptr;
+  }
+
+  entries =
+      DirectoryEntry::validate(static_cast<const char*>(mapped) + end->offset);
+  if (!entries) {
+    ERROR("%s - Couldn't find central directory record", name);
+  }
+  return entries;
+}
+
+bool Zip::VerifyCRCs() const {
+  AutoLock lock(&mutex);
+
+  for (const DirectoryEntry* entry = GetFirstEntry(); entry;
+       entry = entry->GetNext()) {
+    const LocalFile* file =
+        LocalFile::validate(static_cast<const char*>(mapped) + entry->offset);
+    uint32_t crc = crc32(0, nullptr, 0);
+
+    DEBUG_LOG("%.*s: crc=%08x", int(entry->filenameSize),
+              reinterpret_cast<const char*>(entry) + sizeof(*entry),
+              uint32_t(entry->CRC32));
+
+    if (entry->compression == Stream::Type::STORE) {
+      crc = crc32(crc, static_cast<const uint8_t*>(file->GetData()),
+                  entry->compressedSize);
+      DEBUG_LOG(" STORE size=%d crc=%08x", int(entry->compressedSize), crc);
+
+    } else if (entry->compression == Stream::Type::DEFLATE) {
+      z_stream zstream;
+      Bytef buffer[1024];
+      zstream.avail_in = entry->compressedSize;
+      zstream.next_in =
+          reinterpret_cast<Bytef*>(const_cast<void*>(file->GetData()));
+      zstream.zalloc = nullptr;
+      zstream.zfree = nullptr;
+      zstream.opaque = nullptr;
+
+      if (inflateInit2(&zstream, -MAX_WBITS) != Z_OK) {
+        return false;
+      }
+
+      for (;;) {
+        zstream.avail_out = sizeof(buffer);
+        zstream.next_out = buffer;
+
+        int ret = inflate(&zstream, Z_SYNC_FLUSH);
+        crc = crc32(crc, buffer, sizeof(buffer) - zstream.avail_out);
+
+        if (ret == Z_STREAM_END) {
+          break;
+        } else if (ret != Z_OK) {
+          return false;
+        }
+      }
+
+      inflateEnd(&zstream);
+      DEBUG_LOG(" DEFLATE size=%d crc=%08x", int(zstream.total_out), crc);
+
+    } else {
+      MOZ_ASSERT_UNREACHABLE("Unexpected stream type");
+      continue;
+    }
+
+    if (entry->CRC32 != crc) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+ZipCollection ZipCollection::Singleton;
+
+static pthread_mutex_t sZipCollectionMutex = PTHREAD_MUTEX_INITIALIZER;
+
+already_AddRefed<Zip> ZipCollection::GetZip(const char* path) {
+  {
+    AutoLock lock(&sZipCollectionMutex);
+    /* Search the list of Zips we already have for a match */
+    for (const auto& zip : Singleton.zips) {
+      if (zip->GetName() && (strcmp(zip->GetName(), path) == 0)) {
+        return RefPtr<Zip>(zip).forget();
+      }
+    }
+  }
+  return Zip::Create(path);
+}
+
+void ZipCollection::Register(Zip* zip) {
+  AutoLock lock(&sZipCollectionMutex);
+  DEBUG_LOG("ZipCollection::Register(\"%s\")", zip->GetName());
+  Singleton.zips.push_back(zip);
+}
+
+void ZipCollection::Forget(const Zip* zip) {
+  AutoLock lock(&sZipCollectionMutex);
+  if (zip->refCount() > 1) {
+    // Someone has acquired a reference before we had acquired the lock,
+    // ignore this request.
+    return;
+  }
+  DEBUG_LOG("ZipCollection::Forget(\"%s\")", zip->GetName());
+  const auto it = std::find(Singleton.zips.begin(), Singleton.zips.end(), zip);
+  if (*it == zip) {
+    Singleton.zips.erase(it);
+  } else {
+    DEBUG_LOG("ZipCollection::Forget: didn't find \"%s\" in bookkeeping",
+              zip->GetName());
+  }
+}
diff --git a/mozglue/linker/Zip.h b/mozglue/linker/Zip.h
new file mode 100644
index 0000000000..3e596c3c4c
--- /dev/null
+++ b/mozglue/linker/Zip.h
@@ -0,0 +1,388 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Zip_h
+#define Zip_h
+
+#include <cstring>
+#include <stdint.h>
+#include <vector>
+#include <zlib.h>
+#include <pthread.h>
+#include "Utils.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/RefCounted.h"
+#include "mozilla/RefPtr.h"
+
+/**
+ * Forward declaration
+ */
+class ZipCollection;
+
+/**
+ * Class to handle access to Zip archive streams. The Zip archive is mapped
+ * in memory, and streams are direct references to that mapped memory.
+ * Zip files are assumed to be correctly formed. No boundary checks are
+ * performed, which means hand-crafted malicious Zip archives can make the
+ * code fail in bad ways. However, since the only intended use is to load
+ * libraries from Zip archives, there is no interest in making this code
+ * safe, since the libraries could contain malicious code anyways.
+ */
+class Zip : public mozilla::external::AtomicRefCounted<Zip> {
+ public:
+  MOZ_DECLARE_REFCOUNTED_TYPENAME(Zip)
+  /**
+   * Create a Zip instance for the given file name. Returns nullptr in case
+   * of failure.
+   */
+  static already_AddRefed<Zip> Create(const char* filename);
+
+  /**
+   * Create a Zip instance using the given buffer.
+   */
+  static already_AddRefed<Zip> Create(void* buffer, size_t size) {
+    return Create(nullptr, buffer, size);
+  }
+
+ private:
+  static already_AddRefed<Zip> Create(const char* filename, void* buffer,
+                                      size_t size);
+
+  /**
+   * Private constructor
+   */
+  Zip(const char* filename, void* buffer, size_t size);
+
+ public:
+  /**
+   * Destructor
+   */
+  ~Zip();
+
+  /**
+   * Class used to access Zip archive item streams
+   */
+  class Stream {
+   public:
+    /**
+     * Stream types
+     */
+    enum Type { STORE = 0, DEFLATE = 8 };
+
+    /**
+     * Constructor
+     */
+    Stream()
+        : compressedBuf(nullptr),
+          compressedSize(0),
+          uncompressedSize(0),
+          CRC32(0),
+          type(STORE) {}
+
+    /**
+     * Getters
+     */
+    const void* GetBuffer() { return compressedBuf; }
+    size_t GetSize() { return compressedSize; }
+    size_t GetUncompressedSize() { return uncompressedSize; }
+    size_t GetCRC32() { return CRC32; }
+    Type GetType() { return type; }
+
+    /**
+     * Returns a z_stream for use with inflate functions using the given
+     * buffer as inflate output. The caller is expected to allocate enough
+     * memory for the Stream uncompressed size.
+     */
+    z_stream GetZStream(void* buf) {
+      z_stream zStream;
+      zStream.avail_in = compressedSize;
+      zStream.next_in =
+          reinterpret_cast<Bytef*>(const_cast<void*>(compressedBuf));
+      zStream.avail_out = uncompressedSize;
+      zStream.next_out = static_cast<Bytef*>(buf);
+      zStream.zalloc = nullptr;
+      zStream.zfree = nullptr;
+      zStream.opaque = nullptr;
+      return zStream;
+    }
+
+   protected:
+    friend class Zip;
+    const void* compressedBuf;
+    size_t compressedSize;
+    size_t uncompressedSize;
+    size_t CRC32;
+    Type type;
+  };
+
+  /**
+   * Returns a stream from the Zip archive.
+   */
+  bool GetStream(const char* path, Stream* out) const;
+
+  /**
+   * Returns the file name of the archive
+   */
+  const char* GetName() const { return name; }
+
+  /**
+   * Returns whether all files have correct CRC checksum.
+   */
+  bool VerifyCRCs() const;
+
+ private:
+  /* File name of the archive */
+  char* name;
+  /* Address where the Zip archive is mapped */
+  void* mapped;
+  /* Size of the archive */
+  size_t size;
+
+  /**
+   * Strings (file names, comments, etc.) in the Zip headers are NOT zero
+   * terminated. This class is a helper around them.
+   */
+  class StringBuf {
+   public:
+    /**
+     * Constructor
+     */
+    StringBuf(const char* buf, size_t length) : buf(buf), length(length) {}
+
+    /**
+     * Returns whether the string has the same content as the given zero
+     * terminated string.
+     */
+    bool Equals(const char* str) const {
+      return (strncmp(str, buf, length) == 0 && str[length] == '\0');
+    }
+
+   private:
+    const char* buf;
+    size_t length;
+  };
+
+/* All the following types need to be packed */
+#pragma pack(1)
+ public:
+  /**
+   * A Zip archive is an aggregate of entities which all start with a
+   * signature giving their type. This template is to be used as a base
+   * class for these entities.
+   */
+  template <typename T>
+  class SignedEntity {
+   public:
+    /**
+     * Equivalent to reinterpret_cast<const T *>(buf), with an additional
+     * check of the signature.
+     */
+    static const T* validate(const void* buf) {
+      const T* ret = static_cast<const T*>(buf);
+      if (ret->signature == T::magic) return ret;
+      return nullptr;
+    }
+
+    explicit SignedEntity(uint32_t magic) : signature(magic) {}
+
+   private:
+    le_uint32 signature;
+  };
+
+ private:
+  /**
+   * Header used to describe a Local File entry. The header is followed by
+   * the file name and an extra field, then by the data stream.
+   */
+  struct LocalFile : public SignedEntity<LocalFile> {
+    /* Signature for a Local File header */
+    static const uint32_t magic = 0x04034b50;
+
+    /**
+     * Returns the file name
+     */
+    StringBuf GetName() const {
+      return StringBuf(reinterpret_cast<const char*>(this) + sizeof(*this),
+                       filenameSize);
+    }
+
+    /**
+     * Returns a pointer to the data associated with this header
+     */
+    const void* GetData() const {
+      return reinterpret_cast<const char*>(this) + sizeof(*this) +
+             filenameSize + extraFieldSize;
+    }
+
+    le_uint16 minVersion;
+    le_uint16 generalFlag;
+    le_uint16 compression;
+    le_uint16 lastModifiedTime;
+    le_uint16 lastModifiedDate;
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+    le_uint16 filenameSize;
+    le_uint16 extraFieldSize;
+  };
+
+  /**
+   * In some cases, when a zip archive is created, compressed size and CRC
+   * are not known when writing the Local File header. In these cases, the
+   * 3rd bit of the general flag in the Local File header is set, and there
+   * is an additional header following the compressed data.
+   */
+  struct DataDescriptor : public SignedEntity<DataDescriptor> {
+    /* Signature for a Data Descriptor header */
+    static const uint32_t magic = 0x08074b50;
+
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+  };
+
+  /**
+   * Header used to describe a Central Directory Entry. The header is
+   * followed by the file name, an extra field, and a comment.
+   */
+  struct DirectoryEntry : public SignedEntity<DirectoryEntry> {
+    /* Signature for a Central Directory Entry header */
+    static const uint32_t magic = 0x02014b50;
+
+    /**
+     * Returns the file name
+     */
+    StringBuf GetName() const {
+      return StringBuf(reinterpret_cast<const char*>(this) + sizeof(*this),
+                       filenameSize);
+    }
+
+    /**
+     * Returns  the Central Directory Entry following this one.
+     */
+    const DirectoryEntry* GetNext() const {
+      return validate(reinterpret_cast<const char*>(this) + sizeof(*this) +
+                      filenameSize + extraFieldSize + fileCommentSize);
+    }
+
+    le_uint16 creatorVersion;
+    le_uint16 minVersion;
+    le_uint16 generalFlag;
+    le_uint16 compression;
+    le_uint16 lastModifiedTime;
+    le_uint16 lastModifiedDate;
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+    le_uint16 filenameSize;
+    le_uint16 extraFieldSize;
+    le_uint16 fileCommentSize;
+    le_uint16 diskNum;
+    le_uint16 internalAttributes;
+    le_uint32 externalAttributes;
+    le_uint32 offset;
+  };
+
+  /**
+   * Header used to describe the End of Central Directory Record.
+   */
+  struct CentralDirectoryEnd : public SignedEntity<CentralDirectoryEnd> {
+    /* Signature for the End of Central Directory Record */
+    static const uint32_t magic = 0x06054b50;
+
+    le_uint16 diskNum;
+    le_uint16 startDisk;
+    le_uint16 recordsOnDisk;
+    le_uint16 records;
+    le_uint32 size;
+    le_uint32 offset;
+    le_uint16 commentSize;
+  };
+#pragma pack()
+
+  /**
+   * Returns the first Directory entry
+   */
+  const DirectoryEntry* GetFirstEntry() const;
+
+  /* Pointer to the Local File Entry following the last one GetStream() used.
+   * This is used by GetStream to avoid scanning the Directory Entries when the
+   * requested entry is that one. */
+  mutable const LocalFile* nextFile;
+
+  /* Likewise for the next Directory entry */
+  mutable const DirectoryEntry* nextDir;
+
+  /* Pointer to the Directory entries */
+  mutable const DirectoryEntry* entries;
+
+  mutable pthread_mutex_t mutex;
+};
+
+/**
+ * Class for bookkeeping Zip instances
+ */
+class ZipCollection {
+ public:
+  static ZipCollection Singleton;
+
+  /**
+   * Get a Zip instance for the given path. If there is an existing one
+   * already, return that one, otherwise create a new one.
+   */
+  static already_AddRefed<Zip> GetZip(const char* path);
+
+ protected:
+  friend class Zip;
+  friend class mozilla::detail::RefCounted<Zip,
+                                           mozilla::detail::AtomicRefCount>;
+
+  /**
+   * Register the given Zip instance. This method is meant to be called
+   * by Zip::Create.
+   */
+  static void Register(Zip* zip);
+
+  /**
+   * Forget about the given Zip instance. This method is meant to be called
+   * by the Zip destructor.
+   */
+  static void Forget(const Zip* zip);
+
+ private:
+  /* Zip instances bookkept in this collection */
+  std::vector<RefPtr<Zip>> zips;
+};
+
+namespace mozilla {
+namespace detail {
+
+template <>
+inline void RefCounted<Zip, AtomicRefCount>::Release() const {
+  MOZ_ASSERT(static_cast<int32_t>(mRefCnt) > 0);
+  const auto count = --mRefCnt;
+  if (count == 1) {
+    // No external references are left, attempt to remove it from the
+    // collection. If it's successfully removed from the collection, Release()
+    // will be called with mRefCnt = 1, which will finally delete this zip.
+    ZipCollection::Forget(static_cast<const Zip*>(this));
+  } else if (count == 0) {
+#ifdef DEBUG
+    mRefCnt = detail::DEAD;
+#endif
+    delete static_cast<const Zip*>(this);
+  }
+}
+
+#ifdef DEBUG
+template <>
+inline RefCounted<Zip, AtomicRefCount>::~RefCounted() {
+  MOZ_ASSERT(mRefCnt == detail::DEAD);
+}
+#endif
+
+}  // namespace detail
+}  // namespace mozilla
+
+#endif /* Zip_h */
diff --git a/mozglue/linker/moz.build b/mozglue/linker/moz.build
new file mode 100644
index 0000000000..f7dcb0c0e5
--- /dev/null
+++ b/mozglue/linker/moz.build
@@ -0,0 +1,36 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG["MOZ_LINKER"]:
+    SOURCES += [
+        "BaseElf.cpp",
+        "CustomElf.cpp",
+        "ElfLoader.cpp",
+        "Mappable.cpp",
+        "XZStream.cpp",
+    ]
+
+# When the linker is disabled, we still need Zip for mozglue/android.
+# Logging is a required dependency.
+SOURCES += [
+    "Logging.cpp",
+    "Zip.cpp",
+]
+
+Library("linker")
+
+FINAL_LIBRARY = "mozglue"
+
+TEST_DIRS += ["tests"]
+
+if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+    CXXFLAGS += ["-Wno-error=shadow"]
+
+DEFINES["XZ_USE_CRC64"] = 1
+
+USE_LIBS += [
+    "xz-embedded",
+]
diff --git a/mozglue/linker/tests/TestZip.cpp b/mozglue/linker/tests/TestZip.cpp
new file mode 100644
index 0000000000..a2d2b10bdd
--- /dev/null
+++ b/mozglue/linker/tests/TestZip.cpp
@@ -0,0 +1,61 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <cstdio>
+#include <unistd.h>
+#include "Zip.h"
+#include "mozilla/RefPtr.h"
+
+#include "gtest/gtest.h"
+
+Logging Logging::Singleton;
+
+/**
+ * test.zip is a basic test zip file with a central directory. It contains
+ * four entries, in the following order:
+ * "foo", "bar", "baz", "qux".
+ * The entries are going to be read out of order.
+ */
+extern const unsigned char TEST_ZIP[];
+extern const unsigned int TEST_ZIP_SIZE;
+const char* test_entries[] = {"baz", "foo", "bar", "qux"};
+
+/**
+ * no_central_dir.zip is a hand crafted test zip with no central directory
+ * entries. The Zip reader is expected to be able to traverse these entries
+ * if requested in order, without reading a central directory
+ * - First entry is a file "a", STOREd.
+ * - Second entry is a file "b", STOREd, using a data descriptor. CRC is
+ *   unknown, but compressed and uncompressed sizes are known in the local
+ *   file header.
+ * - Third entry is a file "c", DEFLATEd, using a data descriptor. CRC,
+ *   compressed and uncompressed sizes are known in the local file header.
+ *   This is the kind of entry that can be found in a zip that went through
+ *   zipalign if it had a data descriptor originally.
+ * - Fourth entry is a file "d", STOREd.
+ */
+extern const unsigned char NO_CENTRAL_DIR_ZIP[];
+extern const unsigned int NO_CENTRAL_DIR_ZIP_SIZE;
+const char* no_central_dir_entries[] = {"a", "b", "c", "d"};
+
+TEST(Zip, TestZip)
+{
+  Zip::Stream s;
+  RefPtr<Zip> z = Zip::Create((void*)TEST_ZIP, TEST_ZIP_SIZE);
+  for (auto& entry : test_entries) {
+    ASSERT_TRUE(z->GetStream(entry, &s))
+    << "Could not get entry \"" << entry << "\"";
+  }
+}
+
+TEST(Zip, NoCentralDir)
+{
+  Zip::Stream s;
+  RefPtr<Zip> z =
+      Zip::Create((void*)NO_CENTRAL_DIR_ZIP, NO_CENTRAL_DIR_ZIP_SIZE);
+  for (auto& entry : no_central_dir_entries) {
+    ASSERT_TRUE(z->GetStream(entry, &s))
+    << "Could not get entry \"" << entry << "\"";
+  }
+}
diff --git a/mozglue/linker/tests/TestZipData.S b/mozglue/linker/tests/TestZipData.S
new file mode 100644
index 0000000000..5fbb825451
--- /dev/null
+++ b/mozglue/linker/tests/TestZipData.S
@@ -0,0 +1,17 @@
+.macro zip_data name, path
+  .global \name
+  .data
+  .balign 16
+  \name:
+  .incbin "\path"
+  .L\name\()_END:
+  .size \name, .L\name\()_END-\name
+  .global \name\()_SIZE
+  .data
+  .balign 4
+  \name\()_SIZE:
+  .int .L\name\()_END-\name
+.endm
+
+zip_data TEST_ZIP, "test.zip"
+zip_data NO_CENTRAL_DIR_ZIP, "no_central_dir.zip"
diff --git a/mozglue/linker/tests/moz.build b/mozglue/linker/tests/moz.build
new file mode 100644
index 0000000000..b40b9f1152
--- /dev/null
+++ b/mozglue/linker/tests/moz.build
@@ -0,0 +1,23 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+FINAL_LIBRARY = "xul-gtest"
+
+UNIFIED_SOURCES += [
+    "../Zip.cpp",
+    "TestZip.cpp",
+]
+
+SOURCES += [
+    "TestZipData.S",
+]
+
+LOCAL_INCLUDES += [".."]
+
+if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+    CXXFLAGS += ["-Wno-error=shadow"]
+
+ASFLAGS += ["-I", SRCDIR]
diff --git a/mozglue/linker/tests/no_central_dir.zip b/mozglue/linker/tests/no_central_dir.zip
new file mode 100644
index 0000000000..df882220d1
--- /dev/null
+++ b/mozglue/linker/tests/no_central_dir.zip
diff --git a/mozglue/linker/tests/test.zip b/mozglue/linker/tests/test.zip
new file mode 100644
index 0000000000..657835b0ca
--- /dev/null
+++ b/mozglue/linker/tests/test.zip
diff --git a/mozglue/misc/AutoProfilerLabel.cpp b/mozglue/misc/AutoProfilerLabel.cpp
new file mode 100644
index 0000000000..9bfdc19ef7
--- /dev/null
+++ b/mozglue/misc/AutoProfilerLabel.cpp
@@ -0,0 +1,109 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/AutoProfilerLabel.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/PlatformMutex.h"
+
+namespace mozilla {
+
+// RAII class that encapsulates all shared static data, and enforces locking
+// when accessing this data.
+class MOZ_RAII AutoProfilerLabelData {
+ public:
+  AutoProfilerLabelData() { sAPLMutex.Lock(); }
+
+  ~AutoProfilerLabelData() { sAPLMutex.Unlock(); }
+
+  AutoProfilerLabelData(const AutoProfilerLabelData&) = delete;
+  void operator=(const AutoProfilerLabelData&) = delete;
+
+  const ProfilerLabelEnter& EnterCRef() const { return sEnter; }
+  ProfilerLabelEnter& EnterRef() { return sEnter; }
+
+  const ProfilerLabelExit& ExitCRef() const { return sExit; }
+  ProfilerLabelExit& ExitRef() { return sExit; }
+
+  const uint32_t& GenerationCRef() const { return sGeneration; }
+  uint32_t& GenerationRef() { return sGeneration; }
+
+  static bool RacyIsProfilerPresent() { return !!sGeneration; }
+
+ private:
+  // Thin shell around mozglue PlatformMutex, for local internal use.
+  // Does not preserve behavior in JS record/replay.
+  class Mutex : private mozilla::detail::MutexImpl {
+   public:
+    Mutex() : mozilla::detail::MutexImpl() {}
+    void Lock() { mozilla::detail::MutexImpl::lock(); }
+    void Unlock() { mozilla::detail::MutexImpl::unlock(); }
+  };
+
+  // Mutex protecting access to the following static members.
+  static Mutex sAPLMutex;
+
+  static ProfilerLabelEnter sEnter;
+  static ProfilerLabelExit sExit;
+
+  // Current "generation" of RegisterProfilerLabelEnterExit calls.
+  static uint32_t sGeneration;
+};
+
+/* static */ AutoProfilerLabelData::Mutex AutoProfilerLabelData::sAPLMutex;
+/* static */ ProfilerLabelEnter AutoProfilerLabelData::sEnter = nullptr;
+/* static */ ProfilerLabelExit AutoProfilerLabelData::sExit = nullptr;
+/* static */ uint32_t AutoProfilerLabelData::sGeneration = 0;
+
+void RegisterProfilerLabelEnterExit(ProfilerLabelEnter aEnter,
+                                    ProfilerLabelExit aExit) {
+  MOZ_ASSERT(!aEnter == !aExit, "Must provide both null or both non-null");
+
+  AutoProfilerLabelData data;
+  MOZ_ASSERT(!aEnter != !data.EnterRef(),
+             "Must go from null to non-null, or from non-null to null");
+  data.EnterRef() = aEnter;
+  data.ExitRef() = aExit;
+  ++data.GenerationRef();
+}
+
+bool IsProfilerPresent() {
+  return AutoProfilerLabelData::RacyIsProfilerPresent();
+}
+
+ProfilerLabel ProfilerLabelBegin(const char* aLabelName,
+                                 const char* aDynamicString, void* aSp) {
+  const AutoProfilerLabelData data;
+  void* entryContext = (data.EnterCRef())
+                           ? data.EnterCRef()(aLabelName, aDynamicString, aSp)
+                           : nullptr;
+  uint32_t generation = data.GenerationCRef();
+
+  return MakeTuple(entryContext, generation);
+}
+
+void ProfilerLabelEnd(const ProfilerLabel& aLabel) {
+  if (!IsValidProfilerLabel(aLabel)) {
+    return;
+  }
+
+  const AutoProfilerLabelData data;
+  if (data.ExitCRef() && (Get<1>(aLabel) == data.GenerationCRef())) {
+    data.ExitCRef()(Get<0>(aLabel));
+  }
+}
+
+AutoProfilerLabel::AutoProfilerLabel(const char* aLabel,
+                                     const char* aDynamicString) {
+  Tie(mEntryContext, mGeneration) =
+      ProfilerLabelBegin(aLabel, aDynamicString, this);
+}
+
+AutoProfilerLabel::~AutoProfilerLabel() {
+  ProfilerLabelEnd(MakeTuple(mEntryContext, mGeneration));
+}
+
+}  // namespace mozilla
diff --git a/mozglue/misc/AutoProfilerLabel.h b/mozglue/misc/AutoProfilerLabel.h
new file mode 100644
index 0000000000..2cbd8252c9
--- /dev/null
+++ b/mozglue/misc/AutoProfilerLabel.h
@@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_AutoProfilerLabel_h
+#define mozilla_AutoProfilerLabel_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/Tuple.h"
+#include "mozilla/Types.h"
+
+// The Gecko Profiler defines AutoProfilerLabel, an RAII class for
+// pushing/popping frames to/from the ProfilingStack.
+//
+// This file defines a class of the same name that does much the same thing,
+// but which can be used in (and only in) mozglue. A different class is
+// necessary because mozglue cannot directly access sProfilingStack.
+//
+// Note that this class is slightly slower than the other AutoProfilerLabel,
+// and it lacks the macro wrappers. It also is effectively hardwired to use
+// JS::ProfilingCategory::OTHER as the category pair, because that's what
+// the callbacks provided by the profiler use. (Specifying the categories in
+// this file would require #including ProfilingCategory.h in mozglue, which we
+// don't want to do.)
+
+namespace mozilla {
+
+// Enter should return a pointer that will be given to Exit.
+typedef void* (*ProfilerLabelEnter)(const char* aLabel,
+                                    const char* aDynamicString, void* aSp);
+typedef void (*ProfilerLabelExit)(void* EntryContext);
+
+// Register callbacks that do the entry/exit work involving sProfilingStack.
+MFBT_API void RegisterProfilerLabelEnterExit(ProfilerLabelEnter aEnter,
+                                             ProfilerLabelExit aExit);
+
+// This #ifdef prevents this AutoProfilerLabel from being defined in libxul,
+// which would conflict with the one in the profiler.
+#ifdef IMPL_MFBT
+
+class MOZ_RAII AutoProfilerLabel {
+ public:
+  AutoProfilerLabel(const char* aLabel, const char* aDynamicString);
+  ~AutoProfilerLabel();
+
+ private:
+  void* mEntryContext;
+  // Number of RegisterProfilerLabelEnterExit calls, to avoid giving an entry
+  // context from one generation to the next.
+  uint32_t mGeneration;
+};
+
+using ProfilerLabel = Tuple<void*, uint32_t>;
+
+bool IsProfilerPresent();
+ProfilerLabel ProfilerLabelBegin(const char* aLabelName,
+                                 const char* aDynamicString, void* aSp);
+void ProfilerLabelEnd(const ProfilerLabel& aLabel);
+
+inline bool IsValidProfilerLabel(const ProfilerLabel& aLabel) {
+  return !!Get<0>(aLabel);
+}
+
+#endif
+
+}  // namespace mozilla
+
+#endif  // mozilla_AutoProfilerLabel_h
diff --git a/mozglue/misc/ConditionVariable_posix.cpp b/mozglue/misc/ConditionVariable_posix.cpp
new file mode 100644
index 0000000000..d15c87f4e8
--- /dev/null
+++ b/mozglue/misc/ConditionVariable_posix.cpp
@@ -0,0 +1,161 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+#include "mozilla/CheckedInt.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "mozilla/PlatformConditionVariable.h"
+#include "mozilla/PlatformMutex.h"
+#include "MutexPlatformData_posix.h"
+
+using mozilla::CheckedInt;
+using mozilla::TimeDuration;
+
+static const long NanoSecPerSec = 1000000000;
+
+// Android 32-bit & macOS 10.12 has the clock functions, but not
+// pthread_condattr_setclock.
+#if defined(HAVE_CLOCK_MONOTONIC) && \
+    !(defined(__ANDROID__) && !defined(__LP64__)) && !defined(__APPLE__)
+#  define CV_USE_CLOCK_API
+#endif
+
+#ifdef CV_USE_CLOCK_API
+// The C++ specification defines std::condition_variable::wait_for in terms of
+// std::chrono::steady_clock, which is closest to CLOCK_MONOTONIC.
+static const clockid_t WhichClock = CLOCK_MONOTONIC;
+
+// While timevaladd is widely available to work with timevals, the newer
+// timespec structure is largely lacking such conveniences. Thankfully, the
+// utilities available in MFBT make implementing our own quite easy.
+static void moz_timespecadd(struct timespec* lhs, struct timespec* rhs,
+                            struct timespec* result) {
+  // Add nanoseconds. This may wrap, but not above 2 billion.
+  MOZ_RELEASE_ASSERT(lhs->tv_nsec < NanoSecPerSec);
+  MOZ_RELEASE_ASSERT(rhs->tv_nsec < NanoSecPerSec);
+  result->tv_nsec = lhs->tv_nsec + rhs->tv_nsec;
+
+  // Add seconds, checking for overflow in the platform specific time_t type.
+  CheckedInt<time_t> sec = CheckedInt<time_t>(lhs->tv_sec) + rhs->tv_sec;
+
+  // If nanoseconds overflowed, carry the result over into seconds.
+  if (result->tv_nsec >= NanoSecPerSec) {
+    MOZ_RELEASE_ASSERT(result->tv_nsec < 2 * NanoSecPerSec);
+    result->tv_nsec -= NanoSecPerSec;
+    sec += 1;
+  }
+
+  // Extracting the value asserts that there was no overflow.
+  MOZ_RELEASE_ASSERT(sec.isValid());
+  result->tv_sec = sec.value();
+}
+#endif
+
+struct mozilla::detail::ConditionVariableImpl::PlatformData {
+  pthread_cond_t ptCond;
+};
+
+mozilla::detail::ConditionVariableImpl::ConditionVariableImpl() {
+  pthread_cond_t* ptCond = &platformData()->ptCond;
+
+#ifdef CV_USE_CLOCK_API
+  pthread_condattr_t attr;
+  int r0 = pthread_condattr_init(&attr);
+  MOZ_RELEASE_ASSERT(!r0);
+
+  int r1 = pthread_condattr_setclock(&attr, WhichClock);
+  MOZ_RELEASE_ASSERT(!r1);
+
+  int r2 = pthread_cond_init(ptCond, &attr);
+  MOZ_RELEASE_ASSERT(!r2);
+
+  int r3 = pthread_condattr_destroy(&attr);
+  MOZ_RELEASE_ASSERT(!r3);
+#else
+  int r = pthread_cond_init(ptCond, NULL);
+  MOZ_RELEASE_ASSERT(!r);
+#endif
+}
+
+mozilla::detail::ConditionVariableImpl::~ConditionVariableImpl() {
+  int r = pthread_cond_destroy(&platformData()->ptCond);
+  MOZ_RELEASE_ASSERT(r == 0);
+}
+
+void mozilla::detail::ConditionVariableImpl::notify_one() {
+  int r = pthread_cond_signal(&platformData()->ptCond);
+  MOZ_RELEASE_ASSERT(r == 0);
+}
+
+void mozilla::detail::ConditionVariableImpl::notify_all() {
+  int r = pthread_cond_broadcast(&platformData()->ptCond);
+  MOZ_RELEASE_ASSERT(r == 0);
+}
+
+void mozilla::detail::ConditionVariableImpl::wait(MutexImpl& lock) {
+  pthread_cond_t* ptCond = &platformData()->ptCond;
+  pthread_mutex_t* ptMutex = &lock.platformData()->ptMutex;
+
+  int r = pthread_cond_wait(ptCond, ptMutex);
+  MOZ_RELEASE_ASSERT(r == 0);
+}
+
+mozilla::CVStatus mozilla::detail::ConditionVariableImpl::wait_for(
+    MutexImpl& lock, const TimeDuration& a_rel_time) {
+  if (a_rel_time == TimeDuration::Forever()) {
+    wait(lock);
+    return CVStatus::NoTimeout;
+  }
+
+  pthread_cond_t* ptCond = &platformData()->ptCond;
+  pthread_mutex_t* ptMutex = &lock.platformData()->ptMutex;
+  int r;
+
+  // Clamp to 0, as time_t is unsigned.
+  TimeDuration rel_time = a_rel_time < TimeDuration::FromSeconds(0)
+                              ? TimeDuration::FromSeconds(0)
+                              : a_rel_time;
+
+  // Convert the duration to a timespec.
+  struct timespec rel_ts;
+  rel_ts.tv_sec = static_cast<time_t>(rel_time.ToSeconds());
+  rel_ts.tv_nsec =
+      static_cast<uint64_t>(rel_time.ToMicroseconds() * 1000.0) % NanoSecPerSec;
+
+#ifdef CV_USE_CLOCK_API
+  struct timespec now_ts;
+  r = clock_gettime(WhichClock, &now_ts);
+  MOZ_RELEASE_ASSERT(!r);
+
+  struct timespec abs_ts;
+  moz_timespecadd(&now_ts, &rel_ts, &abs_ts);
+
+  r = pthread_cond_timedwait(ptCond, ptMutex, &abs_ts);
+#else
+  // Our non-clock-supporting platforms, OS X and Android, do support waiting
+  // on a condition variable with a relative timeout.
+  r = pthread_cond_timedwait_relative_np(ptCond, ptMutex, &rel_ts);
+#endif
+
+  if (r == 0) {
+    return CVStatus::NoTimeout;
+  }
+  MOZ_RELEASE_ASSERT(r == ETIMEDOUT);
+  return CVStatus::Timeout;
+}
+
+mozilla::detail::ConditionVariableImpl::PlatformData*
+mozilla::detail::ConditionVariableImpl::platformData() {
+  static_assert(sizeof platformData_ >= sizeof(PlatformData),
+                "platformData_ is too small");
+  return reinterpret_cast<PlatformData*>(platformData_);
+}
diff --git a/mozglue/misc/ConditionVariable_windows.cpp b/mozglue/misc/ConditionVariable_windows.cpp
new file mode 100644
index 0000000000..0c0151f1d3
--- /dev/null
+++ b/mozglue/misc/ConditionVariable_windows.cpp
@@ -0,0 +1,98 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+
+#include <float.h>
+#include <intrin.h>
+#include <stdlib.h>
+#include <windows.h>
+
+#include "mozilla/PlatformConditionVariable.h"
+#include "mozilla/PlatformMutex.h"
+#include "MutexPlatformData_windows.h"
+
+// Some versions of the Windows SDK have a bug where some interlocked functions
+// are not redefined as compiler intrinsics. Fix that for the interlocked
+// functions that are used in this file.
+#if defined(_MSC_VER) && !defined(InterlockedExchangeAdd)
+#  define InterlockedExchangeAdd(addend, value) \
+    _InterlockedExchangeAdd((volatile long*)(addend), (long)(value))
+#endif
+
+#if defined(_MSC_VER) && !defined(InterlockedIncrement)
+#  define InterlockedIncrement(addend) \
+    _InterlockedIncrement((volatile long*)(addend))
+#endif
+
+// Wrapper for native condition variable APIs.
+struct mozilla::detail::ConditionVariableImpl::PlatformData {
+  CONDITION_VARIABLE cv_;
+};
+
+mozilla::detail::ConditionVariableImpl::ConditionVariableImpl() {
+  InitializeConditionVariable(&platformData()->cv_);
+}
+
+void mozilla::detail::ConditionVariableImpl::notify_one() {
+  WakeConditionVariable(&platformData()->cv_);
+}
+
+void mozilla::detail::ConditionVariableImpl::notify_all() {
+  WakeAllConditionVariable(&platformData()->cv_);
+}
+
+void mozilla::detail::ConditionVariableImpl::wait(MutexImpl& lock) {
+  SRWLOCK* srwlock = &lock.platformData()->lock;
+  bool r =
+      SleepConditionVariableSRW(&platformData()->cv_, srwlock, INFINITE, 0);
+  MOZ_RELEASE_ASSERT(r);
+}
+
+mozilla::CVStatus mozilla::detail::ConditionVariableImpl::wait_for(
+    MutexImpl& lock, const mozilla::TimeDuration& rel_time) {
+  if (rel_time == mozilla::TimeDuration::Forever()) {
+    wait(lock);
+    return CVStatus::NoTimeout;
+  }
+
+  SRWLOCK* srwlock = &lock.platformData()->lock;
+
+  // Note that DWORD is unsigned, so we have to be careful to clamp at 0. If
+  // rel_time is Forever, then ToMilliseconds is +inf, which evaluates as
+  // greater than UINT32_MAX, resulting in the correct INFINITE wait. We also
+  // don't want to round sub-millisecond waits to 0, as that wastes energy (see
+  // bug 1437167 comment 6), so we instead round submillisecond waits to 1ms.
+  double msecd = rel_time.ToMilliseconds();
+  DWORD msec;
+  if (msecd < 0.0) {
+    msec = 0;
+  } else if (msecd > UINT32_MAX) {
+    msec = INFINITE;
+  } else {
+    msec = static_cast<DWORD>(msecd);
+    // Round submillisecond waits to 1ms.
+    if (msec == 0 && !rel_time.IsZero()) {
+      msec = 1;
+    }
+  }
+
+  BOOL r = SleepConditionVariableSRW(&platformData()->cv_, srwlock, msec, 0);
+  if (r) return CVStatus::NoTimeout;
+  MOZ_RELEASE_ASSERT(GetLastError() == ERROR_TIMEOUT);
+  return CVStatus::Timeout;
+}
+
+mozilla::detail::ConditionVariableImpl::~ConditionVariableImpl() {
+  // Native condition variables don't require cleanup.
+}
+
+inline mozilla::detail::ConditionVariableImpl::PlatformData*
+mozilla::detail::ConditionVariableImpl::platformData() {
+  static_assert(sizeof platformData_ >= sizeof(PlatformData),
+                "platformData_ is too small");
+  return reinterpret_cast<PlatformData*>(platformData_);
+}
diff --git a/mozglue/misc/Debug.h b/mozglue/misc/Debug.h
new file mode 100644
index 0000000000..adc8d02c7b
--- /dev/null
+++ b/mozglue/misc/Debug.h
@@ -0,0 +1,63 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_glue_Debug_h
+#define mozilla_glue_Debug_h
+
+/* This header file intends to supply debugging utilities for use in code
+ * that cannot use XPCOM debugging facilities like nsDebug.h.
+ * e.g. mozglue, browser/app
+ *
+ * NB: printf_stderr() is in the global namespace, so include this file with
+ * care; avoid including from header files.
+ */
+
+#include <io.h>
+#if defined(XP_WIN)
+#  include <windows.h>
+#endif  // defined(XP_WIN)
+#include "mozilla/Attributes.h"
+#include "mozilla/Sprintf.h"
+
+#if defined(MOZILLA_INTERNAL_API)
+#  error Do not include this file from XUL sources.
+#endif
+
+// Though this is a separate implementation than nsDebug's, we want to make the
+// declarations compatible to avoid confusing the linker if both headers are
+// included.
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+inline void printf_stderr(const char* fmt, ...) MOZ_FORMAT_PRINTF(1, 2) {
+#if defined(XP_WIN)
+  if (IsDebuggerPresent()) {
+    char buf[2048];
+    va_list args;
+    va_start(args, fmt);
+    VsprintfLiteral(buf, fmt, args);
+    va_end(args);
+    OutputDebugStringA(buf);
+  }
+#endif  // defined(XP_WIN)
+
+  FILE* fp = _fdopen(_dup(2), "a");
+  if (!fp) return;
+
+  va_list args;
+  va_start(args, fmt);
+  vfprintf(fp, fmt, args);
+  va_end(args);
+
+  fclose(fp);
+}
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+
+#endif  // mozilla_glue_Debug_h
diff --git a/mozglue/misc/DynamicallyLinkedFunctionPtr.h b/mozglue/misc/DynamicallyLinkedFunctionPtr.h
new file mode 100644
index 0000000000..4313974ec5
--- /dev/null
+++ b/mozglue/misc/DynamicallyLinkedFunctionPtr.h
@@ -0,0 +1,137 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_DynamicallyLinkedFunctionPtr_h
+#define mozilla_DynamicallyLinkedFunctionPtr_h
+
+#include <windows.h>
+
+#include <utility>
+
+#include "mozilla/Attributes.h"
+
+namespace mozilla {
+namespace detail {
+
+template <typename T>
+struct FunctionPtrCracker;
+
+template <typename R, typename... Args>
+struct FunctionPtrCracker<R (*)(Args...)> {
+  using ReturnT = R;
+  using FunctionPtrT = R (*)(Args...);
+};
+
+#if defined(_M_IX86)
+template <typename R, typename... Args>
+struct FunctionPtrCracker<R(__stdcall*)(Args...)> {
+  using ReturnT = R;
+  using FunctionPtrT = R(__stdcall*)(Args...);
+};
+
+template <typename R, typename... Args>
+struct FunctionPtrCracker<R(__fastcall*)(Args...)> {
+  using ReturnT = R;
+  using FunctionPtrT = R(__fastcall*)(Args...);
+};
+#endif  // defined(_M_IX86)
+
+template <typename T>
+class DynamicallyLinkedFunctionPtrBase {
+ public:
+  using ReturnT = typename FunctionPtrCracker<T>::ReturnT;
+  using FunctionPtrT = typename FunctionPtrCracker<T>::FunctionPtrT;
+
+  DynamicallyLinkedFunctionPtrBase(const wchar_t* aLibName,
+                                   const char* aFuncName)
+      : mModule(::LoadLibraryW(aLibName)), mFunction(nullptr) {
+    if (!mModule) {
+      return;
+    }
+
+    mFunction =
+        reinterpret_cast<FunctionPtrT>(::GetProcAddress(mModule, aFuncName));
+
+    if (!mFunction) {
+      // Since the function doesn't exist, there is no point in holding a
+      // reference to mModule anymore.
+      ::FreeLibrary(mModule);
+      mModule = nullptr;
+    }
+  }
+
+  DynamicallyLinkedFunctionPtrBase(const DynamicallyLinkedFunctionPtrBase&) =
+      delete;
+  DynamicallyLinkedFunctionPtrBase& operator=(
+      const DynamicallyLinkedFunctionPtrBase&) = delete;
+
+  DynamicallyLinkedFunctionPtrBase(DynamicallyLinkedFunctionPtrBase&&) = delete;
+  DynamicallyLinkedFunctionPtrBase& operator=(
+      DynamicallyLinkedFunctionPtrBase&&) = delete;
+
+  template <typename... Args>
+  ReturnT operator()(Args&&... args) const {
+    return mFunction(std::forward<Args>(args)...);
+  }
+
+  explicit operator bool() const { return !!mFunction; }
+
+ protected:
+  HMODULE mModule;
+  FunctionPtrT mFunction;
+};
+
+}  // namespace detail
+
+/**
+ * In most cases, this class is the one that you want to use for resolving a
+ * dynamically-linked function pointer. It should be instantiated as a static
+ * local variable.
+ *
+ * NB: It has a trivial destructor, so the DLL that is loaded is never freed.
+ * Assuming that this function is called fairly often, this is the most
+ * sensible option. OTOH, if the function you are calling is a one-off, or the
+ * static local requirement is too restrictive, use DynamicallyLinkedFunctionPtr
+ * instead.
+ */
+template <typename T>
+class MOZ_STATIC_LOCAL_CLASS StaticDynamicallyLinkedFunctionPtr final
+    : public detail::DynamicallyLinkedFunctionPtrBase<T> {
+ public:
+  StaticDynamicallyLinkedFunctionPtr(const wchar_t* aLibName,
+                                     const char* aFuncName)
+      : detail::DynamicallyLinkedFunctionPtrBase<T>(aLibName, aFuncName) {}
+
+  /**
+   * We only offer this operator for the static local case, as it is not
+   * possible for this object to be destroyed while the returned pointer is
+   * being held.
+   */
+  operator typename detail::DynamicallyLinkedFunctionPtrBase<T>::FunctionPtrT()
+      const {
+    return this->mFunction;
+  }
+};
+
+template <typename T>
+class MOZ_NON_PARAM MOZ_NON_TEMPORARY_CLASS DynamicallyLinkedFunctionPtr final
+    : public detail::DynamicallyLinkedFunctionPtrBase<T> {
+ public:
+  DynamicallyLinkedFunctionPtr(const wchar_t* aLibName, const char* aFuncName)
+      : detail::DynamicallyLinkedFunctionPtrBase<T>(aLibName, aFuncName) {}
+
+  ~DynamicallyLinkedFunctionPtr() {
+    if (!this->mModule) {
+      return;
+    }
+
+    ::FreeLibrary(this->mModule);
+  }
+};
+
+}  // namespace mozilla
+
+#endif  // mozilla_DynamicallyLinkedFunctionPtr_h
diff --git a/mozglue/misc/ImportDir.h b/mozglue/misc/ImportDir.h
new file mode 100644
index 0000000000..6f7721d966
--- /dev/null
+++ b/mozglue/misc/ImportDir.h
@@ -0,0 +1,94 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/NativeNt.h"
+#include "mozilla/WinHeaderOnlyUtils.h"
+
+namespace mozilla {
+namespace detail {
+
+inline LauncherResult<nt::DataDirectoryEntry> GetImageDirectoryViaFileIo(
+    const nsAutoHandle& aImageFile, const uint32_t aOurImportDirectoryRva) {
+  OVERLAPPED ov = {};
+  ov.Offset = aOurImportDirectoryRva;
+
+  DWORD bytesRead;
+  nt::DataDirectoryEntry result;
+  if (!::ReadFile(aImageFile, &result, sizeof(result), &bytesRead, &ov) ||
+      bytesRead != sizeof(result)) {
+    return LAUNCHER_ERROR_FROM_LAST();
+  }
+
+  return result;
+}
+
+}  // namespace detail
+
+/**
+ * This function ensures that the import directory of a loaded binary image
+ * matches the version that is found in the original file on disk. We do this
+ * to prevent tampering by third-party code.
+ *
+ * Yes, this function may perform file I/O on the critical path during
+ * startup. A mitigating factor here is that this function must be called
+ * immediately after creating a process using the image specified by
+ * |aFullImagePath|; by this point, the system has already paid the price of
+ * pulling the image file's contents into the page cache.
+ *
+ * @param aFullImagePath Wide-character string containing the absolute path
+ *                       to the binary whose import directory we are touching.
+ * @param aTransferMgr   Encapsulating the transfer from the current process to
+ *                       the child process whose import table we are touching.
+ */
+inline LauncherVoidResult RestoreImportDirectory(
+    const wchar_t* aFullImagePath, nt::CrossExecTransferManager& aTransferMgr) {
+  uint32_t importDirEntryRva;
+  PIMAGE_DATA_DIRECTORY importDirEntry =
+      aTransferMgr.LocalPEHeaders().GetImageDirectoryEntryPtr(
+          IMAGE_DIRECTORY_ENTRY_IMPORT, &importDirEntryRva);
+  if (!importDirEntry) {
+    return LAUNCHER_ERROR_FROM_WIN32(ERROR_BAD_EXE_FORMAT);
+  }
+
+  nsAutoHandle file(::CreateFileW(aFullImagePath, GENERIC_READ, FILE_SHARE_READ,
+                                  nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
+                                  nullptr));
+  if (file.get() == INVALID_HANDLE_VALUE) {
+    return LAUNCHER_ERROR_FROM_LAST();
+  }
+
+  // Why do we use file I/O here instead of a memory mapping? The simple reason
+  // is that we do not want any kernel-mode drivers to start tampering with file
+  // contents under the belief that the file is being mapped for execution.
+  // Windows 8 supports creation of file mappings using the SEC_IMAGE_NO_EXECUTE
+  // flag, which may help to mitigate this, but we might as well just support
+  // a single implementation that works everywhere.
+  LauncherResult<nt::DataDirectoryEntry> realImportDirectory =
+      detail::GetImageDirectoryViaFileIo(file, importDirEntryRva);
+  if (realImportDirectory.isErr()) {
+    return realImportDirectory.propagateErr();
+  }
+
+  nt::DataDirectoryEntry toWrite = realImportDirectory.unwrap();
+
+  {  // Scope for prot
+    AutoVirtualProtect prot = aTransferMgr.Protect(
+        importDirEntry, sizeof(IMAGE_DATA_DIRECTORY), PAGE_READWRITE);
+    if (!prot) {
+      return LAUNCHER_ERROR_FROM_MOZ_WINDOWS_ERROR(prot.GetError());
+    }
+
+    LauncherVoidResult writeResult = aTransferMgr.Transfer(
+        importDirEntry, &toWrite, sizeof(IMAGE_DATA_DIRECTORY));
+    if (writeResult.isErr()) {
+      return writeResult.propagateErr();
+    }
+  }
+
+  return Ok();
+}
+
+}  // namespace mozilla
diff --git a/mozglue/misc/MmapFaultHandler.cpp b/mozglue/misc/MmapFaultHandler.cpp
new file mode 100644
index 0000000000..1702a34243
--- /dev/null
+++ b/mozglue/misc/MmapFaultHandler.cpp
@@ -0,0 +1,131 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MmapFaultHandler.h"
+
+#if defined(XP_UNIX) && !defined(XP_DARWIN)
+
+#  include "PlatformMutex.h"
+#  include "mozilla/Atomics.h"
+#  include "mozilla/MemoryChecking.h"
+#  include "mozilla/ThreadLocal.h"
+#  include <signal.h>
+#  include <cstring>
+
+static MOZ_THREAD_LOCAL(MmapAccessScope*) sMmapAccessScope;
+
+static struct sigaction sPrevSIGBUSHandler;
+
+static void MmapSIGBUSHandler(int signum, siginfo_t* info, void* context) {
+  MOZ_RELEASE_ASSERT(signum == SIGBUS);
+
+  MmapAccessScope* mas = sMmapAccessScope.get();
+
+  if (mas && mas->IsInsideBuffer(info->si_addr)) {
+    // Temporarily instead of handling the signal, we crash intentionally and
+    // send some diagnostic information to find out why the signal is received.
+    mas->CrashWithInfo(info->si_addr);
+
+    // The address is inside the buffer, handle the failure.
+    siglongjmp(mas->mJmpBuf, signum);
+  }
+
+  // This signal is not caused by accessing region protected by MmapAccessScope.
+  // Forward the signal to the next handler.
+  if (sPrevSIGBUSHandler.sa_flags & SA_SIGINFO) {
+    sPrevSIGBUSHandler.sa_sigaction(signum, info, context);
+  } else if (sPrevSIGBUSHandler.sa_handler == SIG_DFL ||
+             sPrevSIGBUSHandler.sa_handler == SIG_IGN) {
+    // There is no next handler. Uninstalling our handler and returning will
+    // cause a crash.
+    sigaction(signum, &sPrevSIGBUSHandler, nullptr);
+  } else {
+    sPrevSIGBUSHandler.sa_handler(signum);
+  }
+}
+
+mozilla::Atomic<bool> gSIGBUSHandlerInstalled(false);
+mozilla::Atomic<bool> gSIGBUSHandlerInstalling(false);
+
+void InstallMmapFaultHandler() {
+  // This function is called from MmapAccessScope's constructor because there is
+  // no single point where we could install the handler during startup. This
+  // means that it's called quite often, so to minimize using of the mutex we
+  // first check the atomic variable outside the lock.
+  if (gSIGBUSHandlerInstalled) {
+    return;
+  }
+
+  if (gSIGBUSHandlerInstalling.compareExchange(false, true)) {
+    sMmapAccessScope.infallibleInit();
+
+    struct sigaction busHandler;
+    busHandler.sa_flags = SA_SIGINFO | SA_NODEFER | SA_ONSTACK;
+    busHandler.sa_sigaction = MmapSIGBUSHandler;
+    sigemptyset(&busHandler.sa_mask);
+    if (sigaction(SIGBUS, &busHandler, &sPrevSIGBUSHandler)) {
+      MOZ_CRASH("Unable to install SIGBUS handler");
+    }
+
+    MOZ_ASSERT(!gSIGBUSHandlerInstalled);
+    gSIGBUSHandlerInstalled = true;
+  } else {
+    // Just spin lock here. It should not take a substantial amount
+    // of time, so a mutex would likely be a spin lock anyway, and
+    // this avoids the need to new up a static mutex from within
+    // mozglue/misc, which complicates things with
+    // check_vanilla_allocations.py
+    while (!gSIGBUSHandlerInstalled) {
+    }
+  }
+}
+
+MmapAccessScope::MmapAccessScope(void* aBuf, uint32_t aBufLen,
+                                 const char* aFilename) {
+  // Install signal handler if it wasn't installed yet.
+  InstallMmapFaultHandler();
+
+  // We'll handle the signal only if the crashing address is inside this buffer.
+  mBuf = aBuf;
+  mBufLen = aBufLen;
+  mFilename = aFilename;
+
+  SetThreadLocalScope();
+}
+
+MmapAccessScope::~MmapAccessScope() {
+  MOZ_RELEASE_ASSERT(sMmapAccessScope.get() == this);
+  sMmapAccessScope.set(mPreviousScope);
+}
+
+void MmapAccessScope::SetThreadLocalScope() {
+  // mJmpBuf is set outside of this classs for reasons mentioned in the header
+  // file, but we need to initialize the member here too to make Coverity happy.
+  memset(mJmpBuf, 0, sizeof(sigjmp_buf));
+
+  // If MmapAccessScopes are nested, save the previous one and restore it in
+  // the destructor.
+  mPreviousScope = sMmapAccessScope.get();
+
+  // MmapAccessScope is now set up (except mJmpBuf for reasons mentioned in the
+  // header file). Store the pointer in a thread-local variable sMmapAccessScope
+  // so we can use it in the handler if the signal is triggered.
+  sMmapAccessScope.set(this);
+}
+
+bool MmapAccessScope::IsInsideBuffer(void* aPtr) {
+  return aPtr >= mBuf && aPtr < (void*)((char*)mBuf + mBufLen);
+}
+
+void MmapAccessScope::CrashWithInfo(void* aPtr) {
+  // All we have is the buffer and the crashing address.
+  MOZ_CRASH_UNSAFE_PRINTF(
+      "SIGBUS received when accessing mmaped file [buffer=%p, "
+      "buflen=%u, address=%p, filename=%s]",
+      mBuf, mBufLen, aPtr, mFilename);
+}
+
+#endif
diff --git a/mozglue/misc/MmapFaultHandler.h b/mozglue/misc/MmapFaultHandler.h
new file mode 100644
index 0000000000..5fb6cdb142
--- /dev/null
+++ b/mozglue/misc/MmapFaultHandler.h
@@ -0,0 +1,105 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MmapFaultHandler_h_
+#define MmapFaultHandler_h_
+
+#if defined(XP_WIN)
+// Windows
+
+#  ifdef HAVE_SEH_EXCEPTIONS
+#    define MMAP_FAULT_HANDLER_BEGIN_HANDLE(fd) __try {
+#    define MMAP_FAULT_HANDLER_BEGIN_BUFFER(buf, bufLen) __try {
+#    define MMAP_FAULT_HANDLER_CATCH(retval)                  \
+      }                                                       \
+      __except (GetExceptionCode() == EXCEPTION_IN_PAGE_ERROR \
+                    ? EXCEPTION_EXECUTE_HANDLER               \
+                    : EXCEPTION_CONTINUE_SEARCH) {            \
+        NS_WARNING("unexpected EXCEPTION_IN_PAGE_ERROR");     \
+        return retval;                                        \
+      }
+#  else
+#    define MMAP_FAULT_HANDLER_BEGIN_HANDLE(fd) {
+#    define MMAP_FAULT_HANDLER_BEGIN_BUFFER(buf, bufLen) {
+#    define MMAP_FAULT_HANDLER_CATCH(retval) }
+#  endif
+
+#elif defined(XP_DARWIN)
+// MacOS
+
+#  define MMAP_FAULT_HANDLER_BEGIN_HANDLE(fd) {
+#  define MMAP_FAULT_HANDLER_BEGIN_BUFFER(buf, bufLen) {
+#  define MMAP_FAULT_HANDLER_CATCH(retval) }
+
+#else
+// Linux
+
+#  include "mozilla/Attributes.h"
+#  include "mozilla/Types.h"
+#  include <stdint.h>
+#  include <setjmp.h>
+
+class MOZ_RAII MmapAccessScope {
+ public:
+  MFBT_API MmapAccessScope(void* aBuf, uint32_t aBufLen,
+                           const char* aFilename = nullptr);
+  MFBT_API ~MmapAccessScope();
+
+  MmapAccessScope(const MmapAccessScope&) = delete;
+  MmapAccessScope& operator=(const MmapAccessScope&) = delete;
+
+  void SetThreadLocalScope();
+  bool IsInsideBuffer(void* aPtr);
+  void CrashWithInfo(void* aPtr);
+
+  // sigsetjmp cannot be called from a method that returns before calling
+  // siglongjmp, so the macro must call sigsetjmp directly and mJmpBuf must be
+  // public.
+  sigjmp_buf mJmpBuf;
+
+ private:
+  void* mBuf;
+  const char* mFilename;
+  uint32_t mBufLen;
+  MmapAccessScope* mPreviousScope;
+};
+
+// Gets around warnings for null-checking in a macro.
+template <typename T>
+inline bool ValidFD(T fd) {
+  return !!fd;
+}
+
+#  define MMAP_FAULT_HANDLER_BEGIN_HANDLE(fd)                  \
+    {                                                          \
+      void* mmapScopeBuf = nullptr;                            \
+      nsCString mmapScopeFilename;                             \
+      uint32_t mmapScopeBufLen = 0;                            \
+      if (ValidFD(fd) && fd->mMap) {                           \
+        mmapScopeBuf = (void*)fd->mFileStart;                  \
+        mmapScopeBufLen = fd->mTotalLen;                       \
+      }                                                        \
+      if (ValidFD(fd) && fd->mFile) {                          \
+        nsCOMPtr<nsIFile> file = fd->mFile.GetBaseFile();      \
+        file->GetNativeLeafName(mmapScopeFilename);            \
+      }                                                        \
+      MmapAccessScope mmapScope(mmapScopeBuf, mmapScopeBufLen, \
+                                mmapScopeFilename.get());      \
+      if (sigsetjmp(mmapScope.mJmpBuf, 0) == 0) {
+#  define MMAP_FAULT_HANDLER_BEGIN_BUFFER(buf, bufLen)   \
+    {                                                    \
+      MmapAccessScope mmapScope((void*)(buf), (bufLen)); \
+      if (sigsetjmp(mmapScope.mJmpBuf, 0) == 0) {
+#  define MMAP_FAULT_HANDLER_CATCH(retval)                       \
+    }                                                            \
+    else {                                                       \
+      NS_WARNING("SIGBUS received when accessing mmapped file"); \
+      return retval;                                             \
+    }                                                            \
+    }
+
+#endif
+
+#endif
diff --git a/mozglue/misc/MutexPlatformData_posix.h b/mozglue/misc/MutexPlatformData_posix.h
new file mode 100644
index 0000000000..d1659d8d7b
--- /dev/null
+++ b/mozglue/misc/MutexPlatformData_posix.h
@@ -0,0 +1,18 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MutexPlatformData_posix_h
+#define MutexPlatformData_posix_h
+
+#include <pthread.h>
+
+#include "mozilla/PlatformMutex.h"
+
+struct mozilla::detail::MutexImpl::PlatformData {
+  pthread_mutex_t ptMutex;
+};
+
+#endif  // MutexPlatformData_posix_h
diff --git a/mozglue/misc/MutexPlatformData_windows.h b/mozglue/misc/MutexPlatformData_windows.h
new file mode 100644
index 0000000000..489f921115
--- /dev/null
+++ b/mozglue/misc/MutexPlatformData_windows.h
@@ -0,0 +1,18 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MutexPlatformData_windows_h
+#define MutexPlatformData_windows_h
+
+#include <windows.h>
+
+#include "mozilla/PlatformMutex.h"
+
+struct mozilla::detail::MutexImpl::PlatformData {
+  SRWLOCK lock;
+};
+
+#endif  // MutexPlatformData_windows_h
diff --git a/mozglue/misc/Mutex_posix.cpp b/mozglue/misc/Mutex_posix.cpp
new file mode 100644
index 0000000000..7378a544f2
--- /dev/null
+++ b/mozglue/misc/Mutex_posix.cpp
@@ -0,0 +1,133 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+
+#if defined(XP_DARWIN)
+#  include <pthread_spis.h>
+#endif
+
+#include "mozilla/PlatformMutex.h"
+#include "MutexPlatformData_posix.h"
+
+#define REPORT_PTHREADS_ERROR(result, msg) \
+  {                                        \
+    errno = result;                        \
+    perror(msg);                           \
+    MOZ_CRASH(msg);                        \
+  }
+
+#define TRY_CALL_PTHREADS(call, msg)      \
+  {                                       \
+    int result = (call);                  \
+    if (result != 0) {                    \
+      REPORT_PTHREADS_ERROR(result, msg); \
+    }                                     \
+  }
+
+mozilla::detail::MutexImpl::MutexImpl() {
+  pthread_mutexattr_t* attrp = nullptr;
+
+#if defined(DEBUG)
+#  define MUTEX_KIND PTHREAD_MUTEX_ERRORCHECK
+// Linux with glibc, FreeBSD and macOS 10.14+ support adaptive mutexes that
+// spin for a short number of tries before sleeping.  NSPR's locks did this,
+// too, and it seems like a reasonable thing to do.
+#elif (defined(__linux__) && defined(__GLIBC__)) || defined(__FreeBSD__)
+#  define MUTEX_KIND PTHREAD_MUTEX_ADAPTIVE_NP
+#elif defined(XP_DARWIN)
+#  if defined(PTHREAD_MUTEX_POLICY_FIRSTFIT_NP)
+#    define POLICY_KIND PTHREAD_MUTEX_POLICY_FIRSTFIT_NP
+#  else
+#    define POLICY_KIND (3)  // The definition is missing in old SDKs
+#  endif
+#endif
+
+#if defined(MUTEX_KIND) || defined(POLICY_KIND)
+#  define ATTR_REQUIRED
+#endif
+
+#if defined(ATTR_REQUIRED)
+  pthread_mutexattr_t attr;
+
+  TRY_CALL_PTHREADS(
+      pthread_mutexattr_init(&attr),
+      "mozilla::detail::MutexImpl::MutexImpl: pthread_mutexattr_init failed");
+
+#  if defined(MUTEX_KIND)
+  TRY_CALL_PTHREADS(pthread_mutexattr_settype(&attr, MUTEX_KIND),
+                    "mozilla::detail::MutexImpl::MutexImpl: "
+                    "pthread_mutexattr_settype failed");
+#  elif defined(POLICY_KIND)
+  if (__builtin_available(macOS 10.14, *)) {
+    TRY_CALL_PTHREADS(pthread_mutexattr_setpolicy_np(&attr, POLICY_KIND),
+                      "mozilla::detail::MutexImpl::MutexImpl: "
+                      "pthread_mutexattr_setpolicy_np failed");
+  }
+#  endif
+  attrp = &attr;
+#endif
+
+  TRY_CALL_PTHREADS(
+      pthread_mutex_init(&platformData()->ptMutex, attrp),
+      "mozilla::detail::MutexImpl::MutexImpl: pthread_mutex_init failed");
+
+#if defined(ATTR_REQUIRED)
+  TRY_CALL_PTHREADS(pthread_mutexattr_destroy(&attr),
+                    "mozilla::detail::MutexImpl::MutexImpl: "
+                    "pthread_mutexattr_destroy failed");
+#endif
+}
+
+mozilla::detail::MutexImpl::~MutexImpl() {
+  TRY_CALL_PTHREADS(
+      pthread_mutex_destroy(&platformData()->ptMutex),
+      "mozilla::detail::MutexImpl::~MutexImpl: pthread_mutex_destroy failed");
+}
+
+inline void mozilla::detail::MutexImpl::mutexLock() {
+  TRY_CALL_PTHREADS(
+      pthread_mutex_lock(&platformData()->ptMutex),
+      "mozilla::detail::MutexImpl::mutexLock: pthread_mutex_lock failed");
+}
+
+bool mozilla::detail::MutexImpl::tryLock() { return mutexTryLock(); }
+
+bool mozilla::detail::MutexImpl::mutexTryLock() {
+  int result = pthread_mutex_trylock(&platformData()->ptMutex);
+  if (result == 0) {
+    return true;
+  }
+
+  if (result == EBUSY) {
+    return false;
+  }
+
+  REPORT_PTHREADS_ERROR(
+      result,
+      "mozilla::detail::MutexImpl::mutexTryLock: pthread_mutex_trylock failed");
+}
+
+void mozilla::detail::MutexImpl::lock() { mutexLock(); }
+
+void mozilla::detail::MutexImpl::unlock() {
+  TRY_CALL_PTHREADS(
+      pthread_mutex_unlock(&platformData()->ptMutex),
+      "mozilla::detail::MutexImpl::unlock: pthread_mutex_unlock failed");
+}
+
+#undef TRY_CALL_PTHREADS
+
+mozilla::detail::MutexImpl::PlatformData*
+mozilla::detail::MutexImpl::platformData() {
+  static_assert(sizeof(platformData_) >= sizeof(PlatformData),
+                "platformData_ is too small");
+  return reinterpret_cast<PlatformData*>(platformData_);
+}
diff --git a/mozglue/misc/Mutex_windows.cpp b/mozglue/misc/Mutex_windows.cpp
new file mode 100644
index 0000000000..c4c78fd4f6
--- /dev/null
+++ b/mozglue/misc/Mutex_windows.cpp
@@ -0,0 +1,40 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/PlatformMutex.h"
+
+#include <windows.h>
+
+#include "MutexPlatformData_windows.h"
+
+mozilla::detail::MutexImpl::MutexImpl() {
+  InitializeSRWLock(&platformData()->lock);
+}
+
+mozilla::detail::MutexImpl::~MutexImpl() {}
+
+void mozilla::detail::MutexImpl::lock() {
+  AcquireSRWLockExclusive(&platformData()->lock);
+}
+
+bool mozilla::detail::MutexImpl::tryLock() { return mutexTryLock(); }
+
+bool mozilla::detail::MutexImpl::mutexTryLock() {
+  return !!TryAcquireSRWLockExclusive(&platformData()->lock);
+}
+
+void mozilla::detail::MutexImpl::unlock() {
+  ReleaseSRWLockExclusive(&platformData()->lock);
+}
+
+mozilla::detail::MutexImpl::PlatformData*
+mozilla::detail::MutexImpl::platformData() {
+  static_assert(sizeof(platformData_) >= sizeof(PlatformData),
+                "platformData_ is too small");
+  return reinterpret_cast<PlatformData*>(platformData_);
+}
diff --git a/mozglue/misc/NativeNt.h b/mozglue/misc/NativeNt.h
new file mode 100644
index 0000000000..ca37fc7bec
--- /dev/null
+++ b/mozglue/misc/NativeNt.h
@@ -0,0 +1,1680 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_NativeNt_h
+#define mozilla_NativeNt_h
+
+#include <stdint.h>
+#include <windows.h>
+#include <winnt.h>
+#include <winternl.h>
+
+#include <algorithm>
+#include <utility>
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Range.h"
+#include "mozilla/Span.h"
+#include "mozilla/WinHeaderOnlyUtils.h"
+#include "mozilla/interceptor/MMPolicies.h"
+#include "mozilla/interceptor/TargetFunction.h"
+
+#if defined(MOZILLA_INTERNAL_API)
+#  include "nsHashKeys.h"
+#  include "nsString.h"
+#  include "nsTHashtable.h"
+#endif  // defined(MOZILLA_INTERNAL_API)
+
+// The declarations within this #if block are intended to be used for initial
+// process initialization ONLY. You probably don't want to be using these in
+// normal Gecko code!
+#if !defined(MOZILLA_INTERNAL_API)
+
+extern "C" {
+
+#  if !defined(STATUS_ACCESS_DENIED)
+#    define STATUS_ACCESS_DENIED ((NTSTATUS)0xC0000022L)
+#  endif  // !defined(STATUS_ACCESS_DENIED)
+
+#  if !defined(STATUS_DLL_NOT_FOUND)
+#    define STATUS_DLL_NOT_FOUND ((NTSTATUS)0xC0000135L)
+#  endif  // !defined(STATUS_DLL_NOT_FOUND)
+
+#  if !defined(STATUS_UNSUCCESSFUL)
+#    define STATUS_UNSUCCESSFUL ((NTSTATUS)0xC0000001L)
+#  endif  // !defined(STATUS_UNSUCCESSFUL)
+
+#  if !defined(STATUS_INFO_LENGTH_MISMATCH)
+#    define STATUS_INFO_LENGTH_MISMATCH ((NTSTATUS)0xC0000004L)
+#  endif
+
+enum SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 };
+
+NTSTATUS NTAPI NtMapViewOfSection(
+    HANDLE aSection, HANDLE aProcess, PVOID* aBaseAddress, ULONG_PTR aZeroBits,
+    SIZE_T aCommitSize, PLARGE_INTEGER aSectionOffset, PSIZE_T aViewSize,
+    SECTION_INHERIT aInheritDisposition, ULONG aAllocationType,
+    ULONG aProtectionFlags);
+
+NTSTATUS NTAPI NtUnmapViewOfSection(HANDLE aProcess, PVOID aBaseAddress);
+
+enum MEMORY_INFORMATION_CLASS {
+  MemoryBasicInformation = 0,
+  MemorySectionName = 2
+};
+
+// NB: When allocating, space for the buffer must also be included
+typedef struct _MEMORY_SECTION_NAME {
+  UNICODE_STRING mSectionFileName;
+} MEMORY_SECTION_NAME, *PMEMORY_SECTION_NAME;
+
+NTSTATUS NTAPI NtQueryVirtualMemory(HANDLE aProcess, PVOID aBaseAddress,
+                                    MEMORY_INFORMATION_CLASS aMemInfoClass,
+                                    PVOID aMemInfo, SIZE_T aMemInfoLen,
+                                    PSIZE_T aReturnLen);
+
+LONG NTAPI RtlCompareUnicodeString(PCUNICODE_STRING aStr1,
+                                   PCUNICODE_STRING aStr2,
+                                   BOOLEAN aCaseInsensitive);
+
+BOOLEAN NTAPI RtlEqualUnicodeString(PCUNICODE_STRING aStr1,
+                                    PCUNICODE_STRING aStr2,
+                                    BOOLEAN aCaseInsensitive);
+
+NTSTATUS NTAPI RtlGetVersion(PRTL_OSVERSIONINFOW aOutVersionInformation);
+
+VOID NTAPI RtlAcquireSRWLockExclusive(PSRWLOCK aLock);
+VOID NTAPI RtlAcquireSRWLockShared(PSRWLOCK aLock);
+
+VOID NTAPI RtlReleaseSRWLockExclusive(PSRWLOCK aLock);
+VOID NTAPI RtlReleaseSRWLockShared(PSRWLOCK aLock);
+
+ULONG NTAPI RtlNtStatusToDosError(NTSTATUS aStatus);
+VOID NTAPI RtlSetLastWin32Error(DWORD aError);
+DWORD NTAPI RtlGetLastWin32Error();
+
+VOID NTAPI RtlRunOnceInitialize(PRTL_RUN_ONCE aRunOnce);
+
+NTSTATUS NTAPI NtReadVirtualMemory(HANDLE aProcessHandle, PVOID aBaseAddress,
+                                   PVOID aBuffer, SIZE_T aNumBytesToRead,
+                                   PSIZE_T aNumBytesRead);
+
+NTSTATUS NTAPI LdrLoadDll(PWCHAR aDllPath, PULONG aFlags,
+                          PUNICODE_STRING aDllName, PHANDLE aOutHandle);
+
+typedef ULONG(NTAPI* PRTL_RUN_ONCE_INIT_FN)(PRTL_RUN_ONCE, PVOID, PVOID*);
+NTSTATUS NTAPI RtlRunOnceExecuteOnce(PRTL_RUN_ONCE aRunOnce,
+                                     PRTL_RUN_ONCE_INIT_FN aInitFn,
+                                     PVOID aContext, PVOID* aParameter);
+
+}  // extern "C"
+
+#endif  // !defined(MOZILLA_INTERNAL_API)
+
+extern "C" {
+PVOID NTAPI RtlAllocateHeap(PVOID aHeapHandle, ULONG aFlags, SIZE_T aSize);
+
+PVOID NTAPI RtlReAllocateHeap(PVOID aHeapHandle, ULONG aFlags, LPVOID aMem,
+                              SIZE_T aNewSize);
+
+BOOLEAN NTAPI RtlFreeHeap(PVOID aHeapHandle, ULONG aFlags, PVOID aHeapBase);
+
+BOOLEAN NTAPI RtlQueryPerformanceCounter(LARGE_INTEGER* aPerfCount);
+
+#define RTL_DUPLICATE_UNICODE_STRING_NULL_TERMINATE 1
+#define RTL_DUPLICATE_UNICODE_STRING_ALLOCATE_NULL_STRING 2
+NTSTATUS NTAPI RtlDuplicateUnicodeString(ULONG aFlags, PCUNICODE_STRING aSrc,
+                                         PUNICODE_STRING aDest);
+
+VOID NTAPI RtlFreeUnicodeString(PUNICODE_STRING aUnicodeString);
+}  // extern "C"
+
+namespace mozilla {
+namespace nt {
+
+/**
+ * This class encapsulates a UNICODE_STRING that owns its own buffer. The
+ * buffer is always NULL terminated, thus allowing us to cast to a wide C-string
+ * without requiring any mutation.
+ *
+ * We only allow creation of this owned buffer from outside XUL.
+ */
+class AllocatedUnicodeString final {
+ public:
+  AllocatedUnicodeString() : mUnicodeString() {}
+
+#if defined(MOZILLA_INTERNAL_API)
+  AllocatedUnicodeString(const AllocatedUnicodeString& aOther) = delete;
+
+  AllocatedUnicodeString& operator=(const AllocatedUnicodeString& aOther) =
+      delete;
+#else
+  explicit AllocatedUnicodeString(PCUNICODE_STRING aSrc) {
+    if (!aSrc) {
+      mUnicodeString = {};
+      return;
+    }
+
+    Duplicate(aSrc);
+  }
+
+  explicit AllocatedUnicodeString(const char* aSrc) {
+    if (!aSrc) {
+      mUnicodeString = {};
+      return;
+    }
+
+    Duplicate(aSrc);
+  }
+
+  AllocatedUnicodeString(const AllocatedUnicodeString& aOther) {
+    Duplicate(&aOther.mUnicodeString);
+  }
+
+  AllocatedUnicodeString& operator=(const AllocatedUnicodeString& aOther) {
+    Clear();
+    Duplicate(&aOther.mUnicodeString);
+    return *this;
+  }
+
+  AllocatedUnicodeString& operator=(PCUNICODE_STRING aSrc) {
+    Clear();
+    Duplicate(aSrc);
+    return *this;
+  }
+#endif  // defined(MOZILLA_INTERNAL_API)
+
+  AllocatedUnicodeString(AllocatedUnicodeString&& aOther)
+      : mUnicodeString(aOther.mUnicodeString) {
+    aOther.mUnicodeString = {};
+  }
+
+  AllocatedUnicodeString& operator=(AllocatedUnicodeString&& aOther) {
+    Clear();
+    mUnicodeString = aOther.mUnicodeString;
+    aOther.mUnicodeString = {};
+    return *this;
+  }
+
+  ~AllocatedUnicodeString() { Clear(); }
+
+  bool IsEmpty() const {
+    return !mUnicodeString.Buffer || !mUnicodeString.Length;
+  }
+
+  operator PCUNICODE_STRING() const { return &mUnicodeString; }
+
+  operator const WCHAR*() const { return mUnicodeString.Buffer; }
+
+  USHORT CharLen() const { return mUnicodeString.Length / sizeof(WCHAR); }
+
+#if defined(MOZILLA_INTERNAL_API)
+  nsDependentString AsString() const {
+    if (!mUnicodeString.Buffer) {
+      return nsDependentString();
+    }
+
+    // We can use nsDependentString here as we guaranteed null termination
+    // when we allocated the string.
+    return nsDependentString(mUnicodeString.Buffer, CharLen());
+  }
+#endif  // defined(MOZILLA_INTERNAL_API)
+
+ private:
+#if !defined(MOZILLA_INTERNAL_API)
+  void Duplicate(PCUNICODE_STRING aSrc) {
+    MOZ_ASSERT(aSrc);
+
+    // We duplicate with null termination so that this string may be used
+    // as a wide C-string without any further manipulation.
+    NTSTATUS ntStatus = ::RtlDuplicateUnicodeString(
+        RTL_DUPLICATE_UNICODE_STRING_NULL_TERMINATE, aSrc, &mUnicodeString);
+    MOZ_ASSERT(NT_SUCCESS(ntStatus));
+    if (!NT_SUCCESS(ntStatus)) {
+      // Make sure that mUnicodeString does not contain bogus data
+      // (since not all callers zero it out before invoking)
+      mUnicodeString = {};
+    }
+  }
+
+  void Duplicate(const char* aSrc) {
+    MOZ_ASSERT(aSrc);
+
+    ANSI_STRING ansiStr;
+    RtlInitAnsiString(&ansiStr, aSrc);
+    NTSTATUS ntStatus =
+        ::RtlAnsiStringToUnicodeString(&mUnicodeString, &ansiStr, TRUE);
+    MOZ_ASSERT(NT_SUCCESS(ntStatus));
+    if (!NT_SUCCESS(ntStatus)) {
+      mUnicodeString = {};
+    }
+  }
+#endif  // !defined(MOZILLA_INTERNAL_API)
+
+  void Clear() {
+    if (!mUnicodeString.Buffer) {
+      return;
+    }
+
+    ::RtlFreeUnicodeString(&mUnicodeString);
+    mUnicodeString = {};
+  }
+
+  UNICODE_STRING mUnicodeString;
+};
+
+#if !defined(MOZILLA_INTERNAL_API)
+
+struct MemorySectionNameBuf : public _MEMORY_SECTION_NAME {
+  MemorySectionNameBuf() {
+    mSectionFileName.Length = 0;
+    mSectionFileName.MaximumLength = sizeof(mBuf);
+    mSectionFileName.Buffer = mBuf;
+  }
+
+  MemorySectionNameBuf(const MemorySectionNameBuf& aOther) { *this = aOther; }
+
+  MemorySectionNameBuf(MemorySectionNameBuf&& aOther) {
+    *this = std::move(aOther);
+  }
+
+  // We cannot use default copy here because mSectionFileName.Buffer needs to
+  // be updated to point to |this->mBuf|, not |aOther.mBuf|.
+  MemorySectionNameBuf& operator=(const MemorySectionNameBuf& aOther) {
+    mSectionFileName.Length = aOther.mSectionFileName.Length;
+    mSectionFileName.MaximumLength = sizeof(mBuf);
+    MOZ_ASSERT(mSectionFileName.Length <= mSectionFileName.MaximumLength);
+    mSectionFileName.Buffer = mBuf;
+    memcpy(mBuf, aOther.mBuf, aOther.mSectionFileName.Length);
+    return *this;
+  }
+
+  MemorySectionNameBuf& operator=(MemorySectionNameBuf&& aOther) {
+    mSectionFileName.Length = aOther.mSectionFileName.Length;
+    aOther.mSectionFileName.Length = 0;
+    mSectionFileName.MaximumLength = sizeof(mBuf);
+    MOZ_ASSERT(mSectionFileName.Length <= mSectionFileName.MaximumLength);
+    aOther.mSectionFileName.MaximumLength = sizeof(aOther.mBuf);
+    mSectionFileName.Buffer = mBuf;
+    memmove(mBuf, aOther.mBuf, mSectionFileName.Length);
+    return *this;
+  }
+
+  // Native NT paths, so we can't assume MAX_PATH. Use a larger buffer.
+  WCHAR mBuf[2 * MAX_PATH];
+
+  bool IsEmpty() const {
+    return !mSectionFileName.Buffer || !mSectionFileName.Length;
+  }
+
+  operator PCUNICODE_STRING() const { return &mSectionFileName; }
+};
+
+class MemorySectionNameOnHeap {
+  UniquePtr<uint8_t[]> mBuffer;
+
+  MemorySectionNameOnHeap() = default;
+  explicit MemorySectionNameOnHeap(size_t aBufferLen)
+      : mBuffer(MakeUnique<uint8_t[]>(aBufferLen)) {}
+
+ public:
+  static MemorySectionNameOnHeap GetBackingFilePath(HANDLE aProcess,
+                                                    void* aSectionAddr) {
+    SIZE_T bufferLen = MAX_PATH * 2;
+    do {
+      MemorySectionNameOnHeap sectionName(bufferLen);
+
+      SIZE_T requiredBytes;
+      NTSTATUS ntStatus = ::NtQueryVirtualMemory(
+          aProcess, aSectionAddr, MemorySectionName, sectionName.mBuffer.get(),
+          bufferLen, &requiredBytes);
+      if (NT_SUCCESS(ntStatus)) {
+        return sectionName;
+      }
+
+      if (ntStatus != STATUS_INFO_LENGTH_MISMATCH ||
+          bufferLen >= requiredBytes) {
+        break;
+      }
+
+      bufferLen = requiredBytes;
+    } while (1);
+
+    return MemorySectionNameOnHeap();
+  }
+
+  // Allow move & Disallow copy
+  MemorySectionNameOnHeap(MemorySectionNameOnHeap&&) = default;
+  MemorySectionNameOnHeap& operator=(MemorySectionNameOnHeap&&) = default;
+  MemorySectionNameOnHeap(const MemorySectionNameOnHeap&) = delete;
+  MemorySectionNameOnHeap& operator=(const MemorySectionNameOnHeap&) = delete;
+
+  PCUNICODE_STRING AsUnicodeString() const {
+    return reinterpret_cast<PCUNICODE_STRING>(mBuffer.get());
+  }
+};
+
+inline bool FindCharInUnicodeString(const UNICODE_STRING& aStr, WCHAR aChar,
+                                    uint16_t& aPos, uint16_t aStartIndex = 0) {
+  const uint16_t aMaxIndex = aStr.Length / sizeof(WCHAR);
+
+  for (uint16_t curIndex = aStartIndex; curIndex < aMaxIndex; ++curIndex) {
+    if (aStr.Buffer[curIndex] == aChar) {
+      aPos = curIndex;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+inline bool IsHexDigit(WCHAR aChar) {
+  return (aChar >= L'0' && aChar <= L'9') || (aChar >= L'A' && aChar <= L'F') ||
+         (aChar >= L'a' && aChar <= L'f');
+}
+
+inline bool MatchUnicodeString(const UNICODE_STRING& aStr,
+                               bool (*aPredicate)(WCHAR)) {
+  WCHAR* cur = aStr.Buffer;
+  WCHAR* end = &aStr.Buffer[aStr.Length / sizeof(WCHAR)];
+  while (cur < end) {
+    if (!aPredicate(*cur)) {
+      return false;
+    }
+
+    ++cur;
+  }
+
+  return true;
+}
+
+inline bool Contains12DigitHexString(const UNICODE_STRING& aLeafName) {
+  // Quick check: If the string is too short, don't bother
+  // (We need at least 12 hex digits, one char for '.', and 3 for extension)
+  const USHORT kMinLen = (12 + 1 + 3) * sizeof(wchar_t);
+  if (aLeafName.Length < kMinLen) {
+    return false;
+  }
+
+  uint16_t start, end;
+  if (!FindCharInUnicodeString(aLeafName, L'.', start)) {
+    return false;
+  }
+
+  ++start;
+  if (!FindCharInUnicodeString(aLeafName, L'.', end, start)) {
+    return false;
+  }
+
+  if (end - start != 12) {
+    return false;
+  }
+
+  UNICODE_STRING test;
+  test.Buffer = &aLeafName.Buffer[start];
+  test.Length = (end - start) * sizeof(WCHAR);
+  test.MaximumLength = test.Length;
+
+  return MatchUnicodeString(test, &IsHexDigit);
+}
+
+inline bool IsFileNameAtLeast16HexDigits(const UNICODE_STRING& aLeafName) {
+  // Quick check: If the string is too short, don't bother
+  // (We need 16 hex digits, one char for '.', and 3 for extension)
+  const USHORT kMinLen = (16 + 1 + 3) * sizeof(wchar_t);
+  if (aLeafName.Length < kMinLen) {
+    return false;
+  }
+
+  uint16_t dotIndex;
+  if (!FindCharInUnicodeString(aLeafName, L'.', dotIndex)) {
+    return false;
+  }
+
+  if (dotIndex < 16) {
+    return false;
+  }
+
+  UNICODE_STRING test;
+  test.Buffer = aLeafName.Buffer;
+  test.Length = dotIndex * sizeof(WCHAR);
+  test.MaximumLength = aLeafName.MaximumLength;
+
+  return MatchUnicodeString(test, &IsHexDigit);
+}
+
+inline void GetLeafName(PUNICODE_STRING aDestString,
+                        PCUNICODE_STRING aSrcString) {
+  WCHAR* buf = aSrcString->Buffer;
+  WCHAR* end = &aSrcString->Buffer[(aSrcString->Length / sizeof(WCHAR)) - 1];
+  WCHAR* cur = end;
+  while (cur >= buf) {
+    if (*cur == L'\\') {
+      break;
+    }
+
+    --cur;
+  }
+
+  // At this point, either cur points to the final backslash, or it points to
+  // buf - 1. Either way, we're interested in cur + 1 as the desired buffer.
+  aDestString->Buffer = cur + 1;
+  aDestString->Length = (end - aDestString->Buffer + 1) * sizeof(WCHAR);
+  aDestString->MaximumLength = aDestString->Length;
+}
+
+#endif  // !defined(MOZILLA_INTERNAL_API)
+
+#if defined(MOZILLA_INTERNAL_API)
+
+inline const nsDependentSubstring GetLeafName(const nsString& aString) {
+  int32_t lastBackslashPos = aString.RFindChar(L'\\');
+  int32_t leafStartPos =
+      (lastBackslashPos == kNotFound) ? 0 : (lastBackslashPos + 1);
+  return Substring(aString, leafStartPos);
+}
+
+#endif  // defined(MOZILLA_INTERNAL_API)
+
+inline char EnsureLowerCaseASCII(char aChar) {
+  if (aChar >= 'A' && aChar <= 'Z') {
+    aChar -= 'A' - 'a';
+  }
+
+  return aChar;
+}
+
+inline int StricmpASCII(const char* aLeft, const char* aRight) {
+  char curLeft, curRight;
+
+  do {
+    curLeft = EnsureLowerCaseASCII(*(aLeft++));
+    curRight = EnsureLowerCaseASCII(*(aRight++));
+  } while (curLeft && curLeft == curRight);
+
+  return curLeft - curRight;
+}
+
+inline int StrcmpASCII(const char* aLeft, const char* aRight) {
+  char curLeft, curRight;
+
+  do {
+    curLeft = *(aLeft++);
+    curRight = *(aRight++);
+  } while (curLeft && curLeft == curRight);
+
+  return curLeft - curRight;
+}
+
+inline size_t StrlenASCII(const char* aStr) {
+  size_t len = 0;
+
+  while (*(aStr++)) {
+    ++len;
+  }
+
+  return len;
+}
+
+class MOZ_RAII PEHeaders final {
+  /**
+   * This structure is documented on MSDN as VS_VERSIONINFO, but is not present
+   * in SDK headers because it cannot be specified as a C struct. The following
+   * structure contains the fixed-length fields at the beginning of
+   * VS_VERSIONINFO.
+   */
+  struct VS_VERSIONINFO_HEADER {
+    WORD wLength;
+    WORD wValueLength;
+    WORD wType;
+    WCHAR szKey[16];  // ArrayLength(L"VS_VERSION_INFO")
+    // Additional data goes here, aligned on a 4-byte boundary
+  };
+
+ public:
+  // The lowest two bits of an HMODULE are used as flags. Stripping those bits
+  // from the HMODULE yields the base address of the binary's memory mapping.
+  // (See LoadLibraryEx docs on MSDN)
+  template <typename T>
+  static T HModuleToBaseAddr(HMODULE aModule) {
+    return reinterpret_cast<T>(reinterpret_cast<uintptr_t>(aModule) &
+                               ~uintptr_t(3));
+  }
+
+  explicit PEHeaders(void* aBaseAddress)
+      : PEHeaders(reinterpret_cast<PIMAGE_DOS_HEADER>(aBaseAddress)) {}
+
+  explicit PEHeaders(HMODULE aModule)
+      : PEHeaders(HModuleToBaseAddr<PIMAGE_DOS_HEADER>(aModule)) {}
+
+  explicit PEHeaders(PIMAGE_DOS_HEADER aMzHeader)
+      : mMzHeader(aMzHeader),
+        mPeHeader(nullptr),
+        mImageLimit(nullptr),
+        mIsImportDirectoryTampered(false) {
+    if (!mMzHeader || mMzHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+      return;
+    }
+
+    mPeHeader = RVAToPtrUnchecked<PIMAGE_NT_HEADERS>(mMzHeader->e_lfanew);
+    if (!mPeHeader || mPeHeader->Signature != IMAGE_NT_SIGNATURE) {
+      return;
+    }
+
+    if (mPeHeader->OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR_MAGIC) {
+      return;
+    }
+
+    DWORD imageSize = mPeHeader->OptionalHeader.SizeOfImage;
+    // This is a coarse-grained check to ensure that the image size is
+    // reasonable. It we aren't big enough to contain headers, we have a
+    // problem!
+    if (imageSize < sizeof(IMAGE_DOS_HEADER) + sizeof(IMAGE_NT_HEADERS)) {
+      return;
+    }
+
+    mImageLimit = RVAToPtrUnchecked<void*>(imageSize - 1UL);
+
+    PIMAGE_DATA_DIRECTORY importDirEntry =
+        GetImageDirectoryEntryPtr(IMAGE_DIRECTORY_ENTRY_IMPORT);
+    if (!importDirEntry) {
+      return;
+    }
+
+    mIsImportDirectoryTampered = (importDirEntry->VirtualAddress >= imageSize);
+  }
+
+  explicit operator bool() const { return !!mImageLimit; }
+
+  /**
+   * This overload computes absolute virtual addresses relative to the base
+   * address of the binary.
+   */
+  template <typename T, typename R>
+  T RVAToPtr(R aRva) const {
+    return RVAToPtr<T>(mMzHeader, aRva);
+  }
+
+  /**
+   * This overload computes a result by adding aRva to aBase, but also ensures
+   * that the resulting pointer falls within the bounds of this binary's memory
+   * mapping.
+   */
+  template <typename T, typename R>
+  T RVAToPtr(void* aBase, R aRva) const {
+    if (!mImageLimit) {
+      return nullptr;
+    }
+
+    char* absAddress = reinterpret_cast<char*>(aBase) + aRva;
+    if (absAddress < reinterpret_cast<char*>(mMzHeader) ||
+        absAddress > reinterpret_cast<char*>(mImageLimit)) {
+      return nullptr;
+    }
+
+    return reinterpret_cast<T>(absAddress);
+  }
+
+  Maybe<Range<const uint8_t>> GetBounds() const {
+    if (!mImageLimit) {
+      return Nothing();
+    }
+
+    auto base = reinterpret_cast<const uint8_t*>(mMzHeader);
+    DWORD imageSize = mPeHeader->OptionalHeader.SizeOfImage;
+    return Some(Range(base, imageSize));
+  }
+
+  bool IsWithinImage(const void* aAddress) const {
+    uintptr_t addr = reinterpret_cast<uintptr_t>(aAddress);
+    uintptr_t imageBase = reinterpret_cast<uintptr_t>(mMzHeader);
+    uintptr_t imageLimit = reinterpret_cast<uintptr_t>(mImageLimit);
+    return addr >= imageBase && addr <= imageLimit;
+  }
+
+  PIMAGE_IMPORT_DESCRIPTOR GetImportDirectory() const {
+    // If the import directory is already tampered, we skip bounds check
+    // because it could be located outside the mapped image.
+    return mIsImportDirectoryTampered
+               ? GetImageDirectoryEntry<PIMAGE_IMPORT_DESCRIPTOR,
+                                        BoundsCheckPolicy::Skip>(
+                     IMAGE_DIRECTORY_ENTRY_IMPORT)
+               : GetImageDirectoryEntry<PIMAGE_IMPORT_DESCRIPTOR>(
+                     IMAGE_DIRECTORY_ENTRY_IMPORT);
+  }
+
+  PIMAGE_RESOURCE_DIRECTORY GetResourceTable() const {
+    return GetImageDirectoryEntry<PIMAGE_RESOURCE_DIRECTORY>(
+        IMAGE_DIRECTORY_ENTRY_RESOURCE);
+  }
+
+  PIMAGE_DATA_DIRECTORY GetImageDirectoryEntryPtr(
+      const uint32_t aDirectoryIndex, uint32_t* aOutRva = nullptr) const {
+    if (aOutRva) {
+      *aOutRva = 0;
+    }
+
+    IMAGE_OPTIONAL_HEADER& optionalHeader = mPeHeader->OptionalHeader;
+
+    const uint32_t maxIndex = std::min(optionalHeader.NumberOfRvaAndSizes,
+                                       DWORD(IMAGE_NUMBEROF_DIRECTORY_ENTRIES));
+    if (aDirectoryIndex >= maxIndex) {
+      return nullptr;
+    }
+
+    PIMAGE_DATA_DIRECTORY dirEntry =
+        &optionalHeader.DataDirectory[aDirectoryIndex];
+    if (aOutRva) {
+      *aOutRva = reinterpret_cast<char*>(dirEntry) -
+                 reinterpret_cast<char*>(mMzHeader);
+      MOZ_ASSERT(*aOutRva);
+    }
+
+    return dirEntry;
+  }
+
+  bool GetVersionInfo(uint64_t& aOutVersion) const {
+    // RT_VERSION == 16
+    // Version resources require an id of 1
+    auto root = FindResourceLeaf<VS_VERSIONINFO_HEADER*>(16, 1);
+    if (!root) {
+      return false;
+    }
+
+    VS_FIXEDFILEINFO* fixedInfo = GetFixedFileInfo(root);
+    if (!fixedInfo) {
+      return false;
+    }
+
+    aOutVersion = ((static_cast<uint64_t>(fixedInfo->dwFileVersionMS) << 32) |
+                   static_cast<uint64_t>(fixedInfo->dwFileVersionLS));
+    return true;
+  }
+
+  bool GetTimeStamp(DWORD& aResult) const {
+    if (!(*this)) {
+      return false;
+    }
+
+    aResult = mPeHeader->FileHeader.TimeDateStamp;
+    return true;
+  }
+
+  PIMAGE_IMPORT_DESCRIPTOR
+  GetImportDescriptor(const char* aModuleNameASCII) const {
+    for (PIMAGE_IMPORT_DESCRIPTOR curImpDesc = GetImportDirectory();
+         IsValid(curImpDesc); ++curImpDesc) {
+      auto curName = mIsImportDirectoryTampered
+                         ? RVAToPtrUnchecked<const char*>(curImpDesc->Name)
+                         : RVAToPtr<const char*>(curImpDesc->Name);
+      if (!curName) {
+        return nullptr;
+      }
+
+      if (StricmpASCII(aModuleNameASCII, curName)) {
+        continue;
+      }
+
+      // curImpDesc now points to the IAT for the module we're interested in
+      return curImpDesc;
+    }
+
+    return nullptr;
+  }
+
+  template <typename CallbackT>
+  void EnumImportChunks(const CallbackT& aCallback) const {
+    for (PIMAGE_IMPORT_DESCRIPTOR curImpDesc = GetImportDirectory();
+         IsValid(curImpDesc); ++curImpDesc) {
+      auto curName = mIsImportDirectoryTampered
+                         ? RVAToPtrUnchecked<const char*>(curImpDesc->Name)
+                         : RVAToPtr<const char*>(curImpDesc->Name);
+      if (!curName) {
+        continue;
+      }
+
+      aCallback(curName);
+    }
+  }
+
+#if defined(MOZILLA_INTERNAL_API)
+  nsTHashtable<nsStringCaseInsensitiveHashKey> GenerateDependentModuleSet()
+      const {
+    nsTHashtable<nsStringCaseInsensitiveHashKey> dependentModuleSet;
+    EnumImportChunks([&dependentModuleSet](const char* aModule) {
+      dependentModuleSet.PutEntry(GetLeafName(NS_ConvertASCIItoUTF16(aModule)));
+    });
+    return dependentModuleSet;
+  }
+#endif  // defined(MOZILLA_INTERNAL_API)
+
+  /**
+   * If |aBoundaries| is given, this method checks whether each IAT entry is
+   * within the given range, and if any entry is out of the range, we return
+   * Nothing().
+   */
+  Maybe<Span<IMAGE_THUNK_DATA>> GetIATThunksForModule(
+      const char* aModuleNameASCII,
+      const Range<const uint8_t>* aBoundaries = nullptr) const {
+    PIMAGE_IMPORT_DESCRIPTOR impDesc = GetImportDescriptor(aModuleNameASCII);
+    if (!impDesc) {
+      return Nothing();
+    }
+
+    auto firstIatThunk =
+        this->template RVAToPtr<PIMAGE_THUNK_DATA>(impDesc->FirstThunk);
+    if (!firstIatThunk) {
+      return Nothing();
+    }
+
+    // Find the length by iterating through the table until we find a null entry
+    PIMAGE_THUNK_DATA curIatThunk = firstIatThunk;
+    while (IsValid(curIatThunk)) {
+      if (aBoundaries) {
+        auto iatEntry =
+            reinterpret_cast<const uint8_t*>(curIatThunk->u1.Function);
+        if (iatEntry < aBoundaries->begin().get() ||
+            iatEntry >= aBoundaries->end().get()) {
+          return Nothing();
+        }
+      }
+
+      ++curIatThunk;
+    }
+
+    return Some(Span(firstIatThunk, curIatThunk));
+  }
+
+  /**
+   * Resources are stored in a three-level tree. To locate a particular entry,
+   * you must supply a resource type, the resource id, and then the language id.
+   * If aLangId == 0, we just resolve the first entry regardless of language.
+   */
+  template <typename T>
+  T FindResourceLeaf(WORD aType, WORD aResId, WORD aLangId = 0) const {
+    PIMAGE_RESOURCE_DIRECTORY topLevel = GetResourceTable();
+    if (!topLevel) {
+      return nullptr;
+    }
+
+    PIMAGE_RESOURCE_DIRECTORY_ENTRY typeEntry =
+        FindResourceEntry(topLevel, aType);
+    if (!typeEntry || !typeEntry->DataIsDirectory) {
+      return nullptr;
+    }
+
+    auto idDir = RVAToPtr<PIMAGE_RESOURCE_DIRECTORY>(
+        topLevel, typeEntry->OffsetToDirectory);
+    PIMAGE_RESOURCE_DIRECTORY_ENTRY idEntry = FindResourceEntry(idDir, aResId);
+    if (!idEntry || !idEntry->DataIsDirectory) {
+      return nullptr;
+    }
+
+    auto langDir = RVAToPtr<PIMAGE_RESOURCE_DIRECTORY>(
+        topLevel, idEntry->OffsetToDirectory);
+    PIMAGE_RESOURCE_DIRECTORY_ENTRY langEntry;
+    if (aLangId) {
+      langEntry = FindResourceEntry(langDir, aLangId);
+    } else {
+      langEntry = FindFirstResourceEntry(langDir);
+    }
+
+    if (!langEntry || langEntry->DataIsDirectory) {
+      return nullptr;
+    }
+
+    auto dataEntry =
+        RVAToPtr<PIMAGE_RESOURCE_DATA_ENTRY>(topLevel, langEntry->OffsetToData);
+    return RVAToPtr<T>(dataEntry->OffsetToData);
+  }
+
+  template <size_t N>
+  Maybe<Span<const uint8_t>> FindSection(const char (&aSecName)[N],
+                                         DWORD aCharacteristicsMask) const {
+    static_assert((N - 1) <= IMAGE_SIZEOF_SHORT_NAME,
+                  "Section names must be at most 8 characters excluding null "
+                  "terminator");
+
+    if (!(*this)) {
+      return Nothing();
+    }
+
+    Span<IMAGE_SECTION_HEADER> sectionTable = GetSectionTable();
+    for (auto&& sectionHeader : sectionTable) {
+      if (strncmp(reinterpret_cast<const char*>(sectionHeader.Name), aSecName,
+                  IMAGE_SIZEOF_SHORT_NAME)) {
+        continue;
+      }
+
+      if (!(sectionHeader.Characteristics & aCharacteristicsMask)) {
+        // We found the section but it does not have the expected
+        // characteristics
+        return Nothing();
+      }
+
+      DWORD rva = sectionHeader.VirtualAddress;
+      if (!rva) {
+        return Nothing();
+      }
+
+      DWORD size = sectionHeader.Misc.VirtualSize;
+      if (!size) {
+        return Nothing();
+      }
+
+      auto base = RVAToPtr<const uint8_t*>(rva);
+      return Some(Span(base, size));
+    }
+
+    return Nothing();
+  }
+
+  // There may be other code sections in the binary besides .text
+  Maybe<Span<const uint8_t>> GetTextSectionInfo() const {
+    return FindSection(".text", IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_EXECUTE |
+                                    IMAGE_SCN_MEM_READ);
+  }
+
+  static bool IsValid(PIMAGE_IMPORT_DESCRIPTOR aImpDesc) {
+    return aImpDesc && aImpDesc->OriginalFirstThunk != 0;
+  }
+
+  static bool IsValid(PIMAGE_THUNK_DATA aImgThunk) {
+    return aImgThunk && aImgThunk->u1.Ordinal != 0;
+  }
+
+  bool IsImportDirectoryTampered() const { return mIsImportDirectoryTampered; }
+
+  FARPROC GetEntryPoint() const {
+    // Use the unchecked version because the entrypoint may be tampered.
+    return RVAToPtrUnchecked<FARPROC>(
+        mPeHeader->OptionalHeader.AddressOfEntryPoint);
+  }
+
+ private:
+  enum class BoundsCheckPolicy { Default, Skip };
+
+  template <typename T, BoundsCheckPolicy Policy = BoundsCheckPolicy::Default>
+  T GetImageDirectoryEntry(const uint32_t aDirectoryIndex) const {
+    PIMAGE_DATA_DIRECTORY dirEntry = GetImageDirectoryEntryPtr(aDirectoryIndex);
+    if (!dirEntry) {
+      return nullptr;
+    }
+
+    return Policy == BoundsCheckPolicy::Skip
+               ? RVAToPtrUnchecked<T>(dirEntry->VirtualAddress)
+               : RVAToPtr<T>(dirEntry->VirtualAddress);
+  }
+
+  // This private variant does not have bounds checks, because we need to be
+  // able to resolve the bounds themselves.
+  template <typename T, typename R>
+  T RVAToPtrUnchecked(R aRva) const {
+    return reinterpret_cast<T>(reinterpret_cast<char*>(mMzHeader) + aRva);
+  }
+
+  Span<IMAGE_SECTION_HEADER> GetSectionTable() const {
+    MOZ_ASSERT(*this);
+    auto base = RVAToPtr<PIMAGE_SECTION_HEADER>(
+        &mPeHeader->OptionalHeader, mPeHeader->FileHeader.SizeOfOptionalHeader);
+    // The Windows loader has an internal limit of 96 sections (per PE spec)
+    auto numSections =
+        std::min(mPeHeader->FileHeader.NumberOfSections, WORD(96));
+    return Span{base, numSections};
+  }
+
+  PIMAGE_RESOURCE_DIRECTORY_ENTRY
+  FindResourceEntry(PIMAGE_RESOURCE_DIRECTORY aCurLevel, WORD aId) const {
+    // Immediately after the IMAGE_RESOURCE_DIRECTORY structure is an array
+    // of IMAGE_RESOURCE_DIRECTORY_ENTRY structures. Since this function
+    // searches by ID, we need to skip past any named entries before iterating.
+    auto dirEnt =
+        reinterpret_cast<PIMAGE_RESOURCE_DIRECTORY_ENTRY>(aCurLevel + 1) +
+        aCurLevel->NumberOfNamedEntries;
+    for (WORD i = 0; i < aCurLevel->NumberOfIdEntries; ++i) {
+      if (dirEnt[i].Id == aId) {
+        return &dirEnt[i];
+      }
+    }
+
+    return nullptr;
+  }
+
+  PIMAGE_RESOURCE_DIRECTORY_ENTRY
+  FindFirstResourceEntry(PIMAGE_RESOURCE_DIRECTORY aCurLevel) const {
+    // Immediately after the IMAGE_RESOURCE_DIRECTORY structure is an array
+    // of IMAGE_RESOURCE_DIRECTORY_ENTRY structures. We just return the first
+    // entry, regardless of whether it is indexed by name or by id.
+    auto dirEnt =
+        reinterpret_cast<PIMAGE_RESOURCE_DIRECTORY_ENTRY>(aCurLevel + 1);
+    WORD numEntries =
+        aCurLevel->NumberOfNamedEntries + aCurLevel->NumberOfIdEntries;
+    if (!numEntries) {
+      return nullptr;
+    }
+
+    return dirEnt;
+  }
+
+  VS_FIXEDFILEINFO* GetFixedFileInfo(VS_VERSIONINFO_HEADER* aVerInfo) const {
+    WORD length = aVerInfo->wLength;
+    if (length < sizeof(VS_VERSIONINFO_HEADER)) {
+      return nullptr;
+    }
+
+    const wchar_t kVersionInfoKey[] = L"VS_VERSION_INFO";
+    if (::RtlCompareMemory(aVerInfo->szKey, kVersionInfoKey,
+                           ArrayLength(kVersionInfoKey)) !=
+        ArrayLength(kVersionInfoKey)) {
+      return nullptr;
+    }
+
+    if (aVerInfo->wValueLength != sizeof(VS_FIXEDFILEINFO)) {
+      // Fixed file info does not exist
+      return nullptr;
+    }
+
+    WORD offset = sizeof(VS_VERSIONINFO_HEADER);
+
+    uintptr_t base = reinterpret_cast<uintptr_t>(aVerInfo);
+    // Align up to 4-byte boundary
+#pragma warning(suppress : 4146)
+    offset += (-(base + offset) & 3);
+
+    if (offset >= length) {
+      return nullptr;
+    }
+
+    auto result = reinterpret_cast<VS_FIXEDFILEINFO*>(base + offset);
+    if (result->dwSignature != 0xFEEF04BD) {
+      return nullptr;
+    }
+
+    return result;
+  }
+
+ private:
+  PIMAGE_DOS_HEADER mMzHeader;
+  PIMAGE_NT_HEADERS mPeHeader;
+  void* mImageLimit;
+  bool mIsImportDirectoryTampered;
+};
+
+// This class represents an export section of a local/remote process.
+template <typename MMPolicy>
+class MOZ_RAII PEExportSection {
+  const MMPolicy& mMMPolicy;
+  uintptr_t mImageBase;
+  DWORD mOrdinalBase;
+  DWORD mRvaDirStart;
+  DWORD mRvaDirEnd;
+  mozilla::interceptor::TargetObjectArray<MMPolicy, DWORD> mExportAddressTable;
+  mozilla::interceptor::TargetObjectArray<MMPolicy, DWORD> mExportNameTable;
+  mozilla::interceptor::TargetObjectArray<MMPolicy, WORD> mExportOrdinalTable;
+
+  explicit PEExportSection(const MMPolicy& aMMPolicy)
+      : mMMPolicy(aMMPolicy),
+        mImageBase(0),
+        mOrdinalBase(0),
+        mRvaDirStart(0),
+        mRvaDirEnd(0),
+        mExportAddressTable(mMMPolicy),
+        mExportNameTable(mMMPolicy),
+        mExportOrdinalTable(mMMPolicy) {}
+
+  PEExportSection(const MMPolicy& aMMPolicy, uintptr_t aImageBase,
+                  DWORD aRvaDirStart, DWORD aRvaDirEnd,
+                  const IMAGE_EXPORT_DIRECTORY& exportDir)
+      : mMMPolicy(aMMPolicy),
+        mImageBase(aImageBase),
+        mOrdinalBase(exportDir.Base),
+        mRvaDirStart(aRvaDirStart),
+        mRvaDirEnd(aRvaDirEnd),
+        mExportAddressTable(mMMPolicy,
+                            mImageBase + exportDir.AddressOfFunctions,
+                            exportDir.NumberOfFunctions),
+        mExportNameTable(mMMPolicy, mImageBase + exportDir.AddressOfNames,
+                         exportDir.NumberOfNames),
+        mExportOrdinalTable(mMMPolicy,
+                            mImageBase + exportDir.AddressOfNameOrdinals,
+                            exportDir.NumberOfNames) {}
+
+  static const PEExportSection Get(uintptr_t aImageBase,
+                                   const MMPolicy& aMMPolicy) {
+    mozilla::interceptor::TargetObject<MMPolicy, IMAGE_DOS_HEADER> mzHeader(
+        aMMPolicy, aImageBase);
+    if (!mzHeader || mzHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+      return PEExportSection(aMMPolicy);
+    }
+
+    mozilla::interceptor::TargetObject<MMPolicy, IMAGE_NT_HEADERS> peHeader(
+        aMMPolicy, aImageBase + mzHeader->e_lfanew);
+    if (!peHeader || peHeader->Signature != IMAGE_NT_SIGNATURE) {
+      return PEExportSection(aMMPolicy);
+    }
+
+    if (peHeader->OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR_MAGIC) {
+      return PEExportSection(aMMPolicy);
+    }
+
+    const IMAGE_OPTIONAL_HEADER& optionalHeader = peHeader->OptionalHeader;
+
+    DWORD imageSize = optionalHeader.SizeOfImage;
+    // This is a coarse-grained check to ensure that the image size is
+    // reasonable. It we aren't big enough to contain headers, we have a
+    // problem!
+    if (imageSize < sizeof(IMAGE_DOS_HEADER) + sizeof(IMAGE_NT_HEADERS)) {
+      return PEExportSection(aMMPolicy);
+    }
+
+    if (optionalHeader.NumberOfRvaAndSizes <= IMAGE_DIRECTORY_ENTRY_EXPORT) {
+      return PEExportSection(aMMPolicy);
+    }
+
+    const IMAGE_DATA_DIRECTORY& exportDirectoryEntry =
+        optionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
+    if (!exportDirectoryEntry.VirtualAddress || !exportDirectoryEntry.Size) {
+      return PEExportSection(aMMPolicy);
+    }
+
+    mozilla::interceptor::TargetObject<MMPolicy, IMAGE_EXPORT_DIRECTORY>
+        exportDirectory(aMMPolicy,
+                        aImageBase + exportDirectoryEntry.VirtualAddress);
+    if (!exportDirectory || !exportDirectory->NumberOfFunctions) {
+      return PEExportSection(aMMPolicy);
+    }
+
+    return PEExportSection(
+        aMMPolicy, aImageBase, exportDirectoryEntry.VirtualAddress,
+        exportDirectoryEntry.VirtualAddress + exportDirectoryEntry.Size,
+        *exportDirectory.GetLocalBase());
+  }
+
+  FARPROC GetProcAddressByOrdinal(WORD aOrdinal) const {
+    if (aOrdinal < mOrdinalBase) {
+      return nullptr;
+    }
+
+    auto rvaToFunction = mExportAddressTable[aOrdinal - mOrdinalBase];
+    if (!rvaToFunction) {
+      return nullptr;
+    }
+    return reinterpret_cast<FARPROC>(mImageBase + *rvaToFunction);
+  }
+
+ public:
+  static const PEExportSection Get(HMODULE aModule, const MMPolicy& aMMPolicy) {
+    return Get(PEHeaders::HModuleToBaseAddr<uintptr_t>(aModule), aMMPolicy);
+  }
+
+  explicit operator bool() const {
+    // Because PEExportSection doesn't use MMPolicy::Reserve(), a boolified
+    // mMMPolicy is expected to be false.  We don't check mMMPolicy here.
+    return mImageBase && mRvaDirStart && mRvaDirEnd && mExportAddressTable &&
+           mExportNameTable && mExportOrdinalTable;
+  }
+
+  template <typename T>
+  T RVAToPtr(uint32_t aRva) const {
+    return reinterpret_cast<T>(mImageBase + aRva);
+  }
+
+  PIMAGE_EXPORT_DIRECTORY GetExportDirectory() const {
+    if (!*this) {
+      return nullptr;
+    }
+
+    return RVAToPtr<PIMAGE_EXPORT_DIRECTORY>(mRvaDirStart);
+  }
+
+  /**
+   * This functions searches the export table for a given string as
+   * GetProcAddress does, but this returns a matched entry of the Export
+   * Address Table i.e. a pointer to an RVA of a matched function instead
+   * of a function address.  If the entry is forwarded, this function
+   * returns nullptr.
+   */
+  const DWORD* FindExportAddressTableEntry(
+      const char* aFunctionNameASCII) const {
+    if (!*this || !aFunctionNameASCII) {
+      return nullptr;
+    }
+
+    struct NameTableComparator {
+      NameTableComparator(const PEExportSection<MMPolicy>& aExportSection,
+                          const char* aTarget)
+          : mExportSection(aExportSection),
+            mTargetName(aTarget),
+            mTargetNamelength(StrlenASCII(aTarget)) {}
+
+      int operator()(DWORD aRVAToString) const {
+        mozilla::interceptor::TargetObjectArray<MMPolicy, char> itemString(
+            mExportSection.mMMPolicy, mExportSection.mImageBase + aRVAToString,
+            mTargetNamelength + 1);
+        return StrcmpASCII(mTargetName, itemString[0]);
+      }
+
+      const PEExportSection<MMPolicy>& mExportSection;
+      const char* mTargetName;
+      size_t mTargetNamelength;
+    };
+
+    const NameTableComparator comp(*this, aFunctionNameASCII);
+
+    size_t match;
+    if (!mExportNameTable.BinarySearchIf(comp, &match)) {
+      return nullptr;
+    }
+
+    const WORD* index = mExportOrdinalTable[match];
+    if (!index) {
+      return nullptr;
+    }
+
+    const DWORD* rvaToFunction = mExportAddressTable[*index];
+    if (!rvaToFunction) {
+      return nullptr;
+    }
+
+    if (*rvaToFunction >= mRvaDirStart && *rvaToFunction < mRvaDirEnd) {
+      // If an entry points to an address within the export section, the
+      // field is a forwarder RVA.  We return nullptr because the entry is
+      // not a function address but a null-terminated string used for export
+      // forwarding.
+      return nullptr;
+    }
+
+    return rvaToFunction;
+  }
+
+  /**
+   * This functions behaves the same as the native ::GetProcAddress except
+   * the following cases:
+   * - Returns nullptr if a target entry is forwarded to another dll.
+   */
+  FARPROC GetProcAddress(const char* aFunctionNameASCII) const {
+    uintptr_t maybeOdrinal = reinterpret_cast<uintptr_t>(aFunctionNameASCII);
+    // When the high-order word of |aFunctionNameASCII| is zero, it's not
+    // a string but an ordinal value.
+    if (maybeOdrinal < 0x10000) {
+      return GetProcAddressByOrdinal(static_cast<WORD>(maybeOdrinal));
+    }
+
+    auto rvaToFunction = FindExportAddressTableEntry(aFunctionNameASCII);
+    if (!rvaToFunction) {
+      return nullptr;
+    }
+    return reinterpret_cast<FARPROC>(mImageBase + *rvaToFunction);
+  }
+};
+
+inline HANDLE RtlGetProcessHeap() {
+  PTEB teb = ::NtCurrentTeb();
+  PPEB peb = teb->ProcessEnvironmentBlock;
+  return peb->Reserved4[1];
+}
+
+inline PVOID RtlGetThreadLocalStoragePointer() {
+  return ::NtCurrentTeb()->Reserved1[11];
+}
+
+inline void RtlSetThreadLocalStoragePointerForTestingOnly(PVOID aNewValue) {
+  ::NtCurrentTeb()->Reserved1[11] = aNewValue;
+}
+
+inline DWORD RtlGetCurrentThreadId() {
+  PTEB teb = ::NtCurrentTeb();
+  CLIENT_ID* cid = reinterpret_cast<CLIENT_ID*>(&teb->Reserved1[8]);
+  return static_cast<DWORD>(reinterpret_cast<uintptr_t>(cid->UniqueThread) &
+                            0xFFFFFFFFUL);
+}
+
+const HANDLE kCurrentProcess = reinterpret_cast<HANDLE>(-1);
+
+inline LauncherResult<DWORD> GetParentProcessId() {
+  struct PROCESS_BASIC_INFORMATION {
+    NTSTATUS ExitStatus;
+    PPEB PebBaseAddress;
+    ULONG_PTR AffinityMask;
+    LONG BasePriority;
+    ULONG_PTR UniqueProcessId;
+    ULONG_PTR InheritedFromUniqueProcessId;
+  };
+
+  ULONG returnLength;
+  PROCESS_BASIC_INFORMATION pbi = {};
+  NTSTATUS status =
+      ::NtQueryInformationProcess(kCurrentProcess, ProcessBasicInformation,
+                                  &pbi, sizeof(pbi), &returnLength);
+  if (!NT_SUCCESS(status)) {
+    return LAUNCHER_ERROR_FROM_NTSTATUS(status);
+  }
+
+  return static_cast<DWORD>(pbi.InheritedFromUniqueProcessId & 0xFFFFFFFF);
+}
+
+inline SIZE_T WINAPI VirtualQueryEx(HANDLE aProcess, LPCVOID aAddress,
+                                    PMEMORY_BASIC_INFORMATION aMemInfo,
+                                    SIZE_T aMemInfoLen) {
+#if defined(MOZILLA_INTERNAL_API)
+  return ::VirtualQueryEx(aProcess, aAddress, aMemInfo, aMemInfoLen);
+#else
+  SIZE_T returnedLength;
+  NTSTATUS status = ::NtQueryVirtualMemory(
+      aProcess, const_cast<PVOID>(aAddress), MemoryBasicInformation, aMemInfo,
+      aMemInfoLen, &returnedLength);
+  if (!NT_SUCCESS(status)) {
+    ::RtlSetLastWin32Error(::RtlNtStatusToDosError(status));
+    returnedLength = 0;
+  }
+  return returnedLength;
+#endif  // defined(MOZILLA_INTERNAL_API)
+}
+
+inline SIZE_T WINAPI VirtualQuery(LPCVOID aAddress,
+                                  PMEMORY_BASIC_INFORMATION aMemInfo,
+                                  SIZE_T aMemInfoLen) {
+  return nt::VirtualQueryEx(kCurrentProcess, aAddress, aMemInfo, aMemInfoLen);
+}
+
+struct DataDirectoryEntry : public _IMAGE_DATA_DIRECTORY {
+  DataDirectoryEntry() : _IMAGE_DATA_DIRECTORY() {}
+
+  MOZ_IMPLICIT DataDirectoryEntry(const _IMAGE_DATA_DIRECTORY& aOther)
+      : _IMAGE_DATA_DIRECTORY(aOther) {}
+
+  DataDirectoryEntry(const DataDirectoryEntry& aOther) = default;
+
+  bool operator==(const DataDirectoryEntry& aOther) const {
+    return VirtualAddress == aOther.VirtualAddress && Size == aOther.Size;
+  }
+
+  bool operator!=(const DataDirectoryEntry& aOther) const {
+    return !(*this == aOther);
+  }
+};
+
+inline LauncherResult<void*> GetProcessPebPtr(HANDLE aProcess) {
+  ULONG returnLength;
+  PROCESS_BASIC_INFORMATION pbi;
+  NTSTATUS status = ::NtQueryInformationProcess(
+      aProcess, ProcessBasicInformation, &pbi, sizeof(pbi), &returnLength);
+  if (!NT_SUCCESS(status)) {
+    return LAUNCHER_ERROR_FROM_NTSTATUS(status);
+  }
+
+  return pbi.PebBaseAddress;
+}
+
+/**
+ * This function relies on a specific offset into the mostly-undocumented PEB
+ * structure. The risk is reduced thanks to the fact that the Chromium sandbox
+ * relies on the location of this field. It is unlikely to change at this point.
+ * To further reduce the risk, we also check for the magic 'MZ' signature that
+ * should indicate the beginning of a PE image.
+ */
+inline LauncherResult<HMODULE> GetProcessExeModule(HANDLE aProcess) {
+  LauncherResult<void*> ppeb = GetProcessPebPtr(aProcess);
+  if (ppeb.isErr()) {
+    return ppeb.propagateErr();
+  }
+
+  PEB peb;
+  SIZE_T bytesRead;
+
+#if defined(MOZILLA_INTERNAL_API)
+  if (!::ReadProcessMemory(aProcess, ppeb.unwrap(), &peb, sizeof(peb),
+                           &bytesRead) ||
+      bytesRead != sizeof(peb)) {
+    return LAUNCHER_ERROR_FROM_LAST();
+  }
+#else
+  NTSTATUS ntStatus = ::NtReadVirtualMemory(aProcess, ppeb.unwrap(), &peb,
+                                            sizeof(peb), &bytesRead);
+  if (!NT_SUCCESS(ntStatus) || bytesRead != sizeof(peb)) {
+    return LAUNCHER_ERROR_FROM_NTSTATUS(ntStatus);
+  }
+#endif
+
+  // peb.ImageBaseAddress
+  void* baseAddress = peb.Reserved3[1];
+
+  char mzMagic[2];
+#if defined(MOZILLA_INTERNAL_API)
+  if (!::ReadProcessMemory(aProcess, baseAddress, mzMagic, sizeof(mzMagic),
+                           &bytesRead) ||
+      bytesRead != sizeof(mzMagic)) {
+    return LAUNCHER_ERROR_FROM_LAST();
+  }
+#else
+  ntStatus = ::NtReadVirtualMemory(aProcess, baseAddress, mzMagic,
+                                   sizeof(mzMagic), &bytesRead);
+  if (!NT_SUCCESS(ntStatus) || bytesRead != sizeof(mzMagic)) {
+    return LAUNCHER_ERROR_FROM_NTSTATUS(ntStatus);
+  }
+#endif
+
+  MOZ_ASSERT(mzMagic[0] == 'M' && mzMagic[1] == 'Z');
+  if (mzMagic[0] != 'M' || mzMagic[1] != 'Z') {
+    return LAUNCHER_ERROR_FROM_WIN32(ERROR_BAD_EXE_FORMAT);
+  }
+
+  return static_cast<HMODULE>(baseAddress);
+}
+
+#if defined(_MSC_VER)
+extern "C" IMAGE_DOS_HEADER __ImageBase;
+#endif
+
+// This class manages data transfer from the local process's executable
+// to another process's executable via WriteProcessMemory.
+// Bug 1662560 told us the same executable may be mapped onto a different
+// address in a different process.  This means when we transfer data within
+// the mapped executable such as a global variable or IAT from the current
+// process to another process, we need to shift its address by the difference
+// between two executable's mapped imagebase.
+class CrossExecTransferManager final {
+  HANDLE mRemoteProcess;
+  uint8_t* mLocalImagebase;
+  PEHeaders mLocalExec;
+  uint8_t* mRemoteImagebase;
+
+  static HMODULE GetLocalExecModule() {
+#if defined(_MSC_VER)
+    return reinterpret_cast<HMODULE>(&__ImageBase);
+#else
+    return ::GetModuleHandleW(nullptr);
+#endif
+  }
+
+  LauncherVoidResult EnsureRemoteImagebase() {
+    if (!mRemoteImagebase) {
+      LauncherResult<HMODULE> remoteImageBaseResult =
+          GetProcessExeModule(mRemoteProcess);
+      if (remoteImageBaseResult.isErr()) {
+        return remoteImageBaseResult.propagateErr();
+      }
+
+      mRemoteImagebase =
+          reinterpret_cast<uint8_t*>(remoteImageBaseResult.unwrap());
+    }
+    return Ok();
+  }
+
+  template <typename T>
+  T* LocalExecToRemoteExec(T* aLocalAddress) const {
+    MOZ_ASSERT(mRemoteImagebase);
+    MOZ_ASSERT(mLocalExec.IsWithinImage(aLocalAddress));
+
+    if (!mRemoteImagebase || !mLocalExec.IsWithinImage(aLocalAddress)) {
+      return aLocalAddress;
+    }
+
+    uintptr_t offset = reinterpret_cast<uintptr_t>(aLocalAddress) -
+                       reinterpret_cast<uintptr_t>(mLocalImagebase);
+    return reinterpret_cast<T*>(mRemoteImagebase + offset);
+  }
+
+ public:
+  explicit CrossExecTransferManager(HANDLE aRemoteProcess)
+      : mRemoteProcess(aRemoteProcess),
+        mLocalImagebase(
+            PEHeaders::HModuleToBaseAddr<uint8_t*>(GetLocalExecModule())),
+        mLocalExec(mLocalImagebase),
+        mRemoteImagebase(nullptr) {}
+
+  CrossExecTransferManager(HANDLE aRemoteProcess, HMODULE aLocalImagebase)
+      : mRemoteProcess(aRemoteProcess),
+        mLocalImagebase(
+            PEHeaders::HModuleToBaseAddr<uint8_t*>(aLocalImagebase)),
+        mLocalExec(mLocalImagebase),
+        mRemoteImagebase(nullptr) {}
+
+  explicit operator bool() const { return !!mLocalExec; }
+  HANDLE RemoteProcess() const { return mRemoteProcess; }
+  const PEHeaders& LocalPEHeaders() const { return mLocalExec; }
+
+  AutoVirtualProtect Protect(void* aLocalAddress, size_t aLength,
+                             DWORD aProtFlags) {
+    // If EnsureRemoteImagebase() fails, a subsequent operaion will fail.
+    Unused << EnsureRemoteImagebase();
+    return AutoVirtualProtect(LocalExecToRemoteExec(aLocalAddress), aLength,
+                              aProtFlags, mRemoteProcess);
+  }
+
+  LauncherVoidResult Transfer(LPVOID aDestinationAddress,
+                              LPCVOID aBufferToWrite, SIZE_T aBufferSize) {
+    LauncherVoidResult result = EnsureRemoteImagebase();
+    if (result.isErr()) {
+      return result.propagateErr();
+    }
+
+    if (!::WriteProcessMemory(mRemoteProcess,
+                              LocalExecToRemoteExec(aDestinationAddress),
+                              aBufferToWrite, aBufferSize, nullptr)) {
+      return LAUNCHER_ERROR_FROM_LAST();
+    }
+
+    return Ok();
+  }
+};
+
+#if !defined(MOZILLA_INTERNAL_API)
+
+inline LauncherResult<HMODULE> GetModuleHandleFromLeafName(
+    const UNICODE_STRING& aTarget) {
+  auto maybePeb = nt::GetProcessPebPtr(kCurrentProcess);
+  if (maybePeb.isErr()) {
+    return maybePeb.propagateErr();
+  }
+
+  const PPEB peb = reinterpret_cast<PPEB>(maybePeb.unwrap());
+  if (!peb->Ldr) {
+    return LAUNCHER_ERROR_FROM_WIN32(ERROR_BAD_EXE_FORMAT);
+  }
+
+  auto firstItem = &peb->Ldr->InMemoryOrderModuleList;
+  for (auto p = firstItem->Flink; p != firstItem; p = p->Flink) {
+    const auto currentTableEntry =
+        CONTAINING_RECORD(p, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks);
+
+    UNICODE_STRING leafName;
+    nt::GetLeafName(&leafName, &currentTableEntry->FullDllName);
+
+    if (::RtlCompareUnicodeString(&leafName, &aTarget, TRUE) == 0) {
+      return reinterpret_cast<HMODULE>(currentTableEntry->DllBase);
+    }
+  }
+
+  return LAUNCHER_ERROR_FROM_WIN32(ERROR_MOD_NOT_FOUND);
+}
+
+class MOZ_ONLY_USED_TO_AVOID_STATIC_CONSTRUCTORS SRWLock final {
+ public:
+  constexpr SRWLock() : mLock(SRWLOCK_INIT) {}
+
+  void LockShared() { ::RtlAcquireSRWLockShared(&mLock); }
+
+  void LockExclusive() { ::RtlAcquireSRWLockExclusive(&mLock); }
+
+  void UnlockShared() { ::RtlReleaseSRWLockShared(&mLock); }
+
+  void UnlockExclusive() { ::RtlReleaseSRWLockExclusive(&mLock); }
+
+  SRWLock(const SRWLock&) = delete;
+  SRWLock(SRWLock&&) = delete;
+  SRWLock& operator=(const SRWLock&) = delete;
+  SRWLock& operator=(SRWLock&&) = delete;
+
+  SRWLOCK* operator&() { return &mLock; }
+
+ private:
+  SRWLOCK mLock;
+};
+
+class MOZ_RAII AutoExclusiveLock final {
+ public:
+  explicit AutoExclusiveLock(SRWLock& aLock) : mLock(aLock) {
+    aLock.LockExclusive();
+  }
+
+  ~AutoExclusiveLock() { mLock.UnlockExclusive(); }
+
+  AutoExclusiveLock(const AutoExclusiveLock&) = delete;
+  AutoExclusiveLock(AutoExclusiveLock&&) = delete;
+  AutoExclusiveLock& operator=(const AutoExclusiveLock&) = delete;
+  AutoExclusiveLock& operator=(AutoExclusiveLock&&) = delete;
+
+ private:
+  SRWLock& mLock;
+};
+
+class MOZ_RAII AutoSharedLock final {
+ public:
+  explicit AutoSharedLock(SRWLock& aLock) : mLock(aLock) { aLock.LockShared(); }
+
+  ~AutoSharedLock() { mLock.UnlockShared(); }
+
+  AutoSharedLock(const AutoSharedLock&) = delete;
+  AutoSharedLock(AutoSharedLock&&) = delete;
+  AutoSharedLock& operator=(const AutoSharedLock&) = delete;
+  AutoSharedLock& operator=(AutoSharedLock&&) = delete;
+
+ private:
+  SRWLock& mLock;
+};
+
+#endif  // !defined(MOZILLA_INTERNAL_API)
+
+class RtlAllocPolicy {
+ public:
+  template <typename T>
+  T* maybe_pod_malloc(size_t aNumElems) {
+    if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
+      return nullptr;
+    }
+
+    return static_cast<T*>(
+        ::RtlAllocateHeap(RtlGetProcessHeap(), 0, aNumElems * sizeof(T)));
+  }
+
+  template <typename T>
+  T* maybe_pod_calloc(size_t aNumElems) {
+    if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
+      return nullptr;
+    }
+
+    return static_cast<T*>(::RtlAllocateHeap(
+        RtlGetProcessHeap(), HEAP_ZERO_MEMORY, aNumElems * sizeof(T)));
+  }
+
+  template <typename T>
+  T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) {
+    if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
+      return nullptr;
+    }
+
+    return static_cast<T*>(::RtlReAllocateHeap(RtlGetProcessHeap(), 0, aPtr,
+                                               aNewSize * sizeof(T)));
+  }
+
+  template <typename T>
+  T* pod_malloc(size_t aNumElems) {
+    return maybe_pod_malloc<T>(aNumElems);
+  }
+
+  template <typename T>
+  T* pod_calloc(size_t aNumElems) {
+    return maybe_pod_calloc<T>(aNumElems);
+  }
+
+  template <typename T>
+  T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) {
+    return maybe_pod_realloc<T>(aPtr, aOldSize, aNewSize);
+  }
+
+  template <typename T>
+  void free_(T* aPtr, size_t aNumElems = 0) {
+    ::RtlFreeHeap(RtlGetProcessHeap(), 0, aPtr);
+  }
+
+  void reportAllocOverflow() const {}
+
+  [[nodiscard]] bool checkSimulatedOOM() const { return true; }
+};
+
+class AutoMappedView final {
+  void* mView;
+
+  void Unmap() {
+    if (!mView) {
+      return;
+    }
+
+#if defined(MOZILLA_INTERNAL_API)
+    ::UnmapViewOfFile(mView);
+#else
+    NTSTATUS status = ::NtUnmapViewOfSection(nt::kCurrentProcess, mView);
+    if (!NT_SUCCESS(status)) {
+      ::RtlSetLastWin32Error(::RtlNtStatusToDosError(status));
+    }
+#endif
+    mView = nullptr;
+  }
+
+ public:
+  explicit AutoMappedView(void* aView) : mView(aView) {}
+
+  AutoMappedView(HANDLE aSection, ULONG aProtectionFlags) : mView(nullptr) {
+#if defined(MOZILLA_INTERNAL_API)
+    mView = ::MapViewOfFile(aSection, aProtectionFlags, 0, 0, 0);
+#else
+    SIZE_T viewSize = 0;
+    NTSTATUS status = ::NtMapViewOfSection(aSection, nt::kCurrentProcess,
+                                           &mView, 0, 0, nullptr, &viewSize,
+                                           ViewUnmap, 0, aProtectionFlags);
+    if (!NT_SUCCESS(status)) {
+      ::RtlSetLastWin32Error(::RtlNtStatusToDosError(status));
+    }
+#endif
+  }
+  ~AutoMappedView() { Unmap(); }
+
+  // Allow move & Disallow copy
+  AutoMappedView(AutoMappedView&& aOther) : mView(aOther.mView) {
+    aOther.mView = nullptr;
+  }
+  AutoMappedView& operator=(AutoMappedView&& aOther) {
+    if (this != &aOther) {
+      Unmap();
+      mView = aOther.mView;
+      aOther.mView = nullptr;
+    }
+    return *this;
+  }
+  AutoMappedView(const AutoMappedView&) = delete;
+  AutoMappedView& operator=(const AutoMappedView&) = delete;
+
+  explicit operator bool() const { return !!mView; }
+  template <typename T>
+  T* as() {
+    return reinterpret_cast<T*>(mView);
+  }
+
+  void* release() {
+    void* p = mView;
+    mView = nullptr;
+    return p;
+  }
+};
+
+}  // namespace nt
+}  // namespace mozilla
+
+#endif  // mozilla_NativeNt_h
diff --git a/mozglue/misc/PlatformConditionVariable.h b/mozglue/misc/PlatformConditionVariable.h
new file mode 100644
index 0000000000..2b006918cb
--- /dev/null
+++ b/mozglue/misc/PlatformConditionVariable.h
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_ConditionVariable_h
+#define mozilla_ConditionVariable_h
+
+#include <stdint.h>
+
+#include <utility>
+
+#include "mozilla/Attributes.h"
+#include "mozilla/PlatformMutex.h"
+#include "mozilla/TimeStamp.h"
+#ifndef XP_WIN
+#  include <pthread.h>
+#endif
+
+namespace mozilla {
+
+enum class CVStatus { NoTimeout, Timeout };
+
+namespace detail {
+
+class ConditionVariableImpl {
+ public:
+  struct PlatformData;
+
+  MFBT_API ConditionVariableImpl();
+  MFBT_API ~ConditionVariableImpl();
+
+  // Wake one thread that is waiting on this condition.
+  MFBT_API void notify_one();
+
+  // Wake all threads that are waiting on this condition.
+  MFBT_API void notify_all();
+
+  // Atomically release |lock| and sleep the current thread of execution on
+  // this condition variable.
+  // |lock| will be re-acquired before this function returns.
+  // The thread may be woken from sleep from another thread via notify_one()
+  // or notify_all(), but may also wake spuriously.  The caller should recheck
+  // its predicate after this function returns, typically in a while loop.
+  MFBT_API void wait(MutexImpl& lock);
+
+  MFBT_API CVStatus wait_for(MutexImpl& lock,
+                             const mozilla::TimeDuration& rel_time);
+
+ private:
+  ConditionVariableImpl(const ConditionVariableImpl&) = delete;
+  ConditionVariableImpl& operator=(const ConditionVariableImpl&) = delete;
+
+  PlatformData* platformData();
+
+#ifndef XP_WIN
+  void* platformData_[sizeof(pthread_cond_t) / sizeof(void*)];
+  static_assert(sizeof(pthread_cond_t) / sizeof(void*) != 0 &&
+                    sizeof(pthread_cond_t) % sizeof(void*) == 0,
+                "pthread_cond_t must have pointer alignment");
+#else
+  void* platformData_[4];
+#endif
+};
+
+}  // namespace detail
+
+}  // namespace mozilla
+
+#endif  // mozilla_ConditionVariable_h
diff --git a/mozglue/misc/PlatformMutex.h b/mozglue/misc/PlatformMutex.h
new file mode 100644
index 0000000000..704bf60e41
--- /dev/null
+++ b/mozglue/misc/PlatformMutex.h
@@ -0,0 +1,66 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_PlatformMutex_h
+#define mozilla_PlatformMutex_h
+
+#include <utility>
+
+#include "mozilla/Attributes.h"
+
+#if !defined(XP_WIN)
+#  include <pthread.h>
+#endif
+
+namespace mozilla {
+
+namespace detail {
+
+class ConditionVariableImpl;
+
+class MutexImpl {
+ public:
+  struct PlatformData;
+
+  explicit MFBT_API MutexImpl();
+  MFBT_API ~MutexImpl();
+
+ protected:
+  MFBT_API void lock();
+  MFBT_API void unlock();
+  // We have a separate, forwarding API so internal uses don't have to go
+  // through the PLT.
+  MFBT_API bool tryLock();
+
+ private:
+  MutexImpl(const MutexImpl&) = delete;
+  void operator=(const MutexImpl&) = delete;
+  MutexImpl(MutexImpl&&) = delete;
+  void operator=(MutexImpl&&) = delete;
+  bool operator==(const MutexImpl& rhs) = delete;
+
+  void mutexLock();
+  bool mutexTryLock();
+
+  PlatformData* platformData();
+
+#if !defined(XP_WIN)
+  void* platformData_[sizeof(pthread_mutex_t) / sizeof(void*)];
+  static_assert(sizeof(pthread_mutex_t) / sizeof(void*) != 0 &&
+                    sizeof(pthread_mutex_t) % sizeof(void*) == 0,
+                "pthread_mutex_t must have pointer alignment");
+#else
+  void* platformData_[6];
+#endif
+
+  friend class mozilla::detail::ConditionVariableImpl;
+};
+
+}  // namespace detail
+
+}  // namespace mozilla
+
+#endif  // mozilla_PlatformMutex_h
diff --git a/mozglue/misc/PreXULSkeletonUI.cpp b/mozglue/misc/PreXULSkeletonUI.cpp
new file mode 100644
index 0000000000..22abee4456
--- /dev/null
+++ b/mozglue/misc/PreXULSkeletonUI.cpp
@@ -0,0 +1,2222 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PreXULSkeletonUI.h"
+
+#include <algorithm>
+#include <math.h>
+#include <limits.h>
+#include <cmath>
+#include <locale>
+#include <string>
+#include <objbase.h>
+#include <shlobj.h>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/BaseProfilerMarkers.h"
+#include "mozilla/FStream.h"
+#include "mozilla/HelperMacros.h"
+#include "mozilla/glue/Debug.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/UniquePtrExtensions.h"
+#include "mozilla/Unused.h"
+#include "mozilla/WindowsDpiAwareness.h"
+#include "mozilla/WindowsVersion.h"
+
+namespace mozilla {
+
+// ColorRect defines an optionally-rounded, optionally-bordered rectangle of a
+// particular color that we will draw.
+struct ColorRect {
+  uint32_t color;
+  uint32_t borderColor;
+  int x;
+  int y;
+  int width;
+  int height;
+  int borderWidth;
+  int borderRadius;
+  bool flipIfRTL;
+};
+
+// DrawRect is mostly the same as ColorRect, but exists as an implementation
+// detail to simplify drawing borders. We draw borders as a strokeOnly rect
+// underneath an inner rect of a particular color. We also need to keep
+// track of the backgroundColor for rounding rects, in order to correctly
+// anti-alias.
+struct DrawRect {
+  uint32_t color;
+  uint32_t backgroundColor;
+  int x;
+  int y;
+  int width;
+  int height;
+  int borderRadius;
+  int borderWidth;
+  bool strokeOnly;
+};
+
+struct NormalizedRGB {
+  double r;
+  double g;
+  double b;
+};
+
+NormalizedRGB UintToRGB(uint32_t color) {
+  double r = static_cast<double>(color >> 16 & 0xff) / 255.0;
+  double g = static_cast<double>(color >> 8 & 0xff) / 255.0;
+  double b = static_cast<double>(color >> 0 & 0xff) / 255.0;
+  return NormalizedRGB{r, g, b};
+}
+
+uint32_t RGBToUint(const NormalizedRGB& rgb) {
+  return (static_cast<uint32_t>(rgb.r * 255.0) << 16) |
+         (static_cast<uint32_t>(rgb.g * 255.0) << 8) |
+         (static_cast<uint32_t>(rgb.b * 255.0) << 0);
+}
+
+double Lerp(double a, double b, double x) { return a + x * (b - a); }
+
+NormalizedRGB Lerp(const NormalizedRGB& a, const NormalizedRGB& b, double x) {
+  return NormalizedRGB{Lerp(a.r, b.r, x), Lerp(a.g, b.g, x), Lerp(a.b, b.b, x)};
+}
+
+// Produces a smooth curve in [0,1] based on a linear input in [0,1]
+double SmoothStep3(double x) { return x * x * (3.0 - 2.0 * x); }
+
+static const wchar_t kPreXULSkeletonUIKeyPath[] =
+    L"SOFTWARE"
+    L"\\" MOZ_APP_VENDOR L"\\" MOZ_APP_BASENAME L"\\PreXULSkeletonUISettings";
+
+static bool sPreXULSkeletonUIEnabled = false;
+// sPreXULSkeletonUIDisallowed means that we don't even have the capacity to
+// enable the skeleton UI, whether because we're on a platform that doesn't
+// support it or because we launched with command line arguments that we don't
+// support. Some of these situations are transient, so we want to make sure we
+// don't mess with registry values in these scenarios that we may use in
+// other scenarios in which the skeleton UI is actually enabled.
+static bool sPreXULSkeletonUIDisallowed = false;
+static HWND sPreXULSkeletonUIWindow;
+static LPWSTR const gStockApplicationIcon = MAKEINTRESOURCEW(32512);
+static LPWSTR const gIDCWait = MAKEINTRESOURCEW(32514);
+static HANDLE sPreXULSKeletonUIAnimationThread;
+
+static uint32_t* sPixelBuffer = nullptr;
+static Vector<ColorRect>* sAnimatedRects = nullptr;
+static int sTotalChromeHeight = 0;
+static volatile LONG sAnimationControlFlag = 0;
+static bool sMaximized = false;
+static int sNonClientVerticalMargins = 0;
+static int sNonClientHorizontalMargins = 0;
+static uint32_t sDpi = 0;
+
+// Color values needed by the animation loop
+static uint32_t sAnimationColor;
+static uint32_t sToolbarForegroundColor;
+
+static ThemeMode sTheme = ThemeMode::Invalid;
+
+typedef BOOL(WINAPI* EnableNonClientDpiScalingProc)(HWND);
+static EnableNonClientDpiScalingProc sEnableNonClientDpiScaling = NULL;
+typedef int(WINAPI* GetSystemMetricsForDpiProc)(int, UINT);
+GetSystemMetricsForDpiProc sGetSystemMetricsForDpi = NULL;
+typedef UINT(WINAPI* GetDpiForWindowProc)(HWND);
+GetDpiForWindowProc sGetDpiForWindow = NULL;
+typedef ATOM(WINAPI* RegisterClassWProc)(const WNDCLASSW*);
+RegisterClassWProc sRegisterClassW = NULL;
+typedef HICON(WINAPI* LoadIconWProc)(HINSTANCE, LPCWSTR);
+LoadIconWProc sLoadIconW = NULL;
+typedef HICON(WINAPI* LoadCursorWProc)(HINSTANCE, LPCWSTR);
+LoadCursorWProc sLoadCursorW = NULL;
+typedef HWND(WINAPI* CreateWindowExWProc)(DWORD, LPCWSTR, LPCWSTR, DWORD, int,
+                                          int, int, int, HWND, HMENU, HINSTANCE,
+                                          LPVOID);
+CreateWindowExWProc sCreateWindowExW = NULL;
+typedef BOOL(WINAPI* ShowWindowProc)(HWND, int);
+ShowWindowProc sShowWindow = NULL;
+typedef BOOL(WINAPI* SetWindowPosProc)(HWND, HWND, int, int, int, int, UINT);
+SetWindowPosProc sSetWindowPos = NULL;
+typedef HDC(WINAPI* GetWindowDCProc)(HWND);
+GetWindowDCProc sGetWindowDC = NULL;
+typedef int(WINAPI* FillRectProc)(HDC, const RECT*, HBRUSH);
+FillRectProc sFillRect = NULL;
+typedef BOOL(WINAPI* DeleteObjectProc)(HGDIOBJ);
+DeleteObjectProc sDeleteObject = NULL;
+typedef int(WINAPI* ReleaseDCProc)(HWND, HDC);
+ReleaseDCProc sReleaseDC = NULL;
+typedef HMONITOR(WINAPI* MonitorFromWindowProc)(HWND, DWORD);
+MonitorFromWindowProc sMonitorFromWindow = NULL;
+typedef BOOL(WINAPI* GetMonitorInfoWProc)(HMONITOR, LPMONITORINFO);
+GetMonitorInfoWProc sGetMonitorInfoW = NULL;
+typedef LONG_PTR(WINAPI* SetWindowLongPtrWProc)(HWND, int, LONG_PTR);
+SetWindowLongPtrWProc sSetWindowLongPtrW = NULL;
+typedef int(WINAPI* StretchDIBitsProc)(HDC, int, int, int, int, int, int, int,
+                                       int, const VOID*, const BITMAPINFO*,
+                                       UINT, DWORD);
+StretchDIBitsProc sStretchDIBits = NULL;
+typedef HBRUSH(WINAPI* CreateSolidBrushProc)(COLORREF);
+CreateSolidBrushProc sCreateSolidBrush = NULL;
+
+static int sWindowWidth;
+static int sWindowHeight;
+static double sCSSToDevPixelScaling;
+
+static const int kAnimationCSSPixelsPerFrame = 21;
+static const int kAnimationCSSExtraWindowSize = 300;
+
+// NOTE: these values were pulled out of thin air as round numbers that are
+// likely to be too big to be seen in practice. If we legitimately see windows
+// this big, we probably don't want to be drawing them on the CPU anyway.
+static const uint32_t kMaxWindowWidth = 1 << 16;
+static const uint32_t kMaxWindowHeight = 1 << 16;
+
+static const wchar_t* sEnabledRegSuffix = L"|Enabled";
+static const wchar_t* sScreenXRegSuffix = L"|ScreenX";
+static const wchar_t* sScreenYRegSuffix = L"|ScreenY";
+static const wchar_t* sWidthRegSuffix = L"|Width";
+static const wchar_t* sHeightRegSuffix = L"|Height";
+static const wchar_t* sMaximizedRegSuffix = L"|Maximized";
+static const wchar_t* sUrlbarCSSRegSuffix = L"|UrlbarCSSSpan";
+static const wchar_t* sCssToDevPixelScalingRegSuffix = L"|CssToDevPixelScaling";
+static const wchar_t* sSearchbarRegSuffix = L"|SearchbarCSSSpan";
+static const wchar_t* sSpringsCSSRegSuffix = L"|SpringsCSSSpan";
+static const wchar_t* sThemeRegSuffix = L"|Theme";
+static const wchar_t* sFlagsRegSuffix = L"|Flags";
+
+struct LoadedCoTaskMemFreeDeleter {
+  void operator()(void* ptr) {
+    static decltype(CoTaskMemFree)* coTaskMemFree = nullptr;
+    if (!coTaskMemFree) {
+      // Just let this get cleaned up when the process is terminated, because
+      // we're going to load it anyway elsewhere.
+      HMODULE ole32Dll = ::LoadLibraryW(L"ole32");
+      if (!ole32Dll) {
+        printf_stderr(
+            "Could not load ole32 - will not free with CoTaskMemFree");
+        return;
+      }
+      coTaskMemFree = reinterpret_cast<decltype(coTaskMemFree)>(
+          ::GetProcAddress(ole32Dll, "CoTaskMemFree"));
+      if (!coTaskMemFree) {
+        printf_stderr("Could not find CoTaskMemFree");
+        return;
+      }
+    }
+    coTaskMemFree(ptr);
+  }
+};
+
+std::wstring GetRegValueName(const wchar_t* prefix, const wchar_t* suffix) {
+  std::wstring result(prefix);
+  result.append(suffix);
+  return result;
+}
+
+// This is paraphrased from WinHeaderOnlyUtils.h. The fact that this file is
+// included in standalone SpiderMonkey builds prohibits us from including that
+// file directly, and it hardly warrants its own header. Bug 1674920 tracks
+// only including this file for gecko-related builds.
+UniquePtr<wchar_t[]> GetBinaryPath() {
+  DWORD bufLen = MAX_PATH;
+  UniquePtr<wchar_t[]> buf;
+  while (true) {
+    buf = MakeUnique<wchar_t[]>(bufLen);
+    DWORD retLen = ::GetModuleFileNameW(nullptr, buf.get(), bufLen);
+    if (!retLen) {
+      return nullptr;
+    }
+
+    if (retLen == bufLen && ::GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
+      bufLen *= 2;
+      continue;
+    }
+
+    break;
+  }
+
+  return buf;
+}
+
+static UniquePtr<wchar_t, LoadedCoTaskMemFreeDeleter> GetKnownFolderPath(
+    REFKNOWNFOLDERID folderId) {
+  static decltype(SHGetKnownFolderPath)* shGetKnownFolderPath = nullptr;
+  if (!shGetKnownFolderPath) {
+    // We could go out of our way to `FreeLibrary` on this, decrementing its
+    // ref count and potentially unloading it. However doing so would be either
+    // effectively a no-op, or counterproductive. Just let it get cleaned up
+    // when the process is terminated, because we're going to load it anyway
+    // elsewhere.
+    HMODULE shell32Dll = ::LoadLibraryW(L"shell32");
+    if (!shell32Dll) {
+      return nullptr;
+    }
+    shGetKnownFolderPath = reinterpret_cast<decltype(shGetKnownFolderPath)>(
+        ::GetProcAddress(shell32Dll, "SHGetKnownFolderPath"));
+    if (!shGetKnownFolderPath) {
+      return nullptr;
+    }
+  }
+  PWSTR path = nullptr;
+  shGetKnownFolderPath(folderId, 0, nullptr, &path);
+  return UniquePtr<wchar_t, LoadedCoTaskMemFreeDeleter>(path);
+}
+
+// Note: this is specifically *not* a robust, multi-locale lowercasing
+// operation. It is not intended to be such. It is simply intended to match the
+// way in which we look for other instances of firefox to remote into.
+// See
+// https://searchfox.org/mozilla-central/rev/71621bfa47a371f2b1ccfd33c704913124afb933/toolkit/components/remote/nsRemoteService.cpp#56
+static void MutateStringToLowercase(wchar_t* ptr) {
+  while (*ptr) {
+    wchar_t ch = *ptr;
+    if (ch >= L'A' && ch <= L'Z') {
+      *ptr = ch + (L'a' - L'A');
+    }
+    ++ptr;
+  }
+}
+
+static bool TryGetSkeletonUILock() {
+  auto localAppDataPath = GetKnownFolderPath(FOLDERID_LocalAppData);
+  if (!localAppDataPath) {
+    return false;
+  }
+
+  // Note: because we're in mozglue, we cannot easily access things from
+  // toolkit, like `GetInstallHash`. We could move `GetInstallHash` into
+  // mozglue, and rip out all of its usage of types defined in toolkit headers.
+  // However, it seems cleaner to just hash the bin path ourselves. We don't
+  // get quite the same robustness that `GetInstallHash` might provide, but
+  // we already don't have that with how we key our registry values, so it
+  // probably makes sense to just match those.
+  UniquePtr<wchar_t[]> binPath = GetBinaryPath();
+  if (!binPath) {
+    return false;
+  }
+
+  // Lowercase the binpath to match how we look for remote instances.
+  MutateStringToLowercase(binPath.get());
+
+  // The number of bytes * 2 characters per byte + 1 for the null terminator
+  uint32_t hexHashSize = sizeof(uint32_t) * 2 + 1;
+  UniquePtr<wchar_t[]> installHash = MakeUnique<wchar_t[]>(hexHashSize);
+  // This isn't perfect - it's a 32-bit hash of the path to our executable. It
+  // could reasonably collide, or casing could potentially affect things, but
+  // the theory is that that should be uncommon enough and the failure case
+  // mild enough that this is fine.
+  uint32_t binPathHash = HashString(binPath.get());
+  swprintf(installHash.get(), hexHashSize, L"%08x", binPathHash);
+
+  std::wstring lockFilePath;
+  lockFilePath.append(localAppDataPath.get());
+  lockFilePath.append(
+      L"\\" MOZ_APP_VENDOR L"\\" MOZ_APP_BASENAME L"\\SkeletonUILock-");
+  lockFilePath.append(installHash.get());
+
+  // We intentionally leak this file - that is okay, and (kind of) the point.
+  // We want to hold onto this handle until the application exits, and hold
+  // onto it with exclusive rights. If this check fails, then we assume that
+  // another instance of the executable is holding it, and thus return false.
+  HANDLE lockFile =
+      ::CreateFileW(lockFilePath.c_str(), GENERIC_READ | GENERIC_WRITE,
+                    0,  // No sharing - this is how the lock works
+                    nullptr, CREATE_ALWAYS,
+                    FILE_FLAG_DELETE_ON_CLOSE,  // Don't leave this lying around
+                    nullptr);
+
+  return lockFile != INVALID_HANDLE_VALUE;
+}
+
+const char kGeneralSection[] = "[General]";
+const char kStartWithLastProfile[] = "StartWithLastProfile=";
+
+static bool ProfileDbHasStartWithLastProfile(IFStream& iniContents) {
+  bool inGeneral = false;
+  std::string line;
+  while (std::getline(iniContents, line)) {
+    int whitespace = 0;
+    while (line.length() > whitespace &&
+           (line[whitespace] == ' ' || line[whitespace] == '\t')) {
+      whitespace++;
+    }
+    line.erase(0, whitespace);
+
+    if (line.compare(kGeneralSection) == 0) {
+      inGeneral = true;
+    } else if (inGeneral) {
+      if (line[0] == '[') {
+        inGeneral = false;
+      } else {
+        if (line.find(kStartWithLastProfile) == 0) {
+          char val = line.c_str()[sizeof(kStartWithLastProfile) - 1];
+          if (val == '0') {
+            return false;
+          } else if (val == '1') {
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  // If we don't find it in the .ini file, we interpret that as true
+  return true;
+}
+
+static bool CheckForStartWithLastProfile() {
+  auto roamingAppData = GetKnownFolderPath(FOLDERID_RoamingAppData);
+  if (!roamingAppData) {
+    return false;
+  }
+  std::wstring profileDbPath(roamingAppData.get());
+  profileDbPath.append(
+      L"\\" MOZ_APP_VENDOR L"\\" MOZ_APP_BASENAME L"\\profiles.ini");
+  IFStream profileDb(profileDbPath.c_str());
+  if (profileDb.fail()) {
+    return false;
+  }
+
+  return ProfileDbHasStartWithLastProfile(profileDb);
+}
+
+// We could use nsAutoRegKey, but including nsWindowsHelpers.h causes build
+// failures in random places because we're in mozglue. Overall it should be
+// simpler and cleaner to just step around that issue with this class:
+class MOZ_RAII AutoCloseRegKey {
+ public:
+  explicit AutoCloseRegKey(HKEY key) : mKey(key) {}
+  ~AutoCloseRegKey() { ::RegCloseKey(mKey); }
+
+ private:
+  HKEY mKey;
+};
+
+int CSSToDevPixels(double cssPixels, double scaling) {
+  return floor(cssPixels * scaling + 0.5);
+}
+
+int CSSToDevPixels(int cssPixels, double scaling) {
+  return CSSToDevPixels((double)cssPixels, scaling);
+}
+
+int CSSToDevPixelsFloor(double cssPixels, double scaling) {
+  return floor(cssPixels * scaling);
+}
+
+// Some things appear to floor to device pixels rather than rounding. A good
+// example of this is border widths.
+int CSSToDevPixelsFloor(int cssPixels, double scaling) {
+  return CSSToDevPixelsFloor((double)cssPixels, scaling);
+}
+
+double SignedDistanceToCircle(double x, double y, double radius) {
+  return sqrt(x * x + y * y) - radius;
+}
+
+// For more details, see
+// https://searchfox.org/mozilla-central/rev/a5d9abfda1e26b1207db9549549ab0bdd73f735d/gfx/wr/webrender/res/shared.glsl#141-187
+// which was a reference for this function.
+double DistanceAntiAlias(double signedDistance) {
+  // Distance assumed to be in device pixels. We use an aa range of 0.5 for
+  // reasons detailed in the linked code above.
+  const double aaRange = 0.5;
+  double dist = 0.5 * signedDistance / aaRange;
+  if (dist <= -0.5 + std::numeric_limits<double>::epsilon()) return 1.0;
+  if (dist >= 0.5 - std::numeric_limits<double>::epsilon()) return 0.0;
+  return 0.5 + dist * (0.8431027 * dist * dist - 1.14453603);
+}
+
+void RasterizeRoundedRectTopAndBottom(const DrawRect& rect) {
+  if (rect.height <= 2 * rect.borderRadius) {
+    MOZ_ASSERT(false, "Skeleton UI rect height too small for border radius.");
+    return;
+  }
+  if (rect.width <= 2 * rect.borderRadius) {
+    MOZ_ASSERT(false, "Skeleton UI rect width too small for border radius.");
+    return;
+  }
+
+  NormalizedRGB rgbBase = UintToRGB(rect.backgroundColor);
+  NormalizedRGB rgbBlend = UintToRGB(rect.color);
+
+  for (int rowIndex = 0; rowIndex < rect.borderRadius; ++rowIndex) {
+    int yTop = rect.y + rect.borderRadius - 1 - rowIndex;
+    int yBottom = rect.y + rect.height - rect.borderRadius + rowIndex;
+
+    uint32_t* lineStartTop = &sPixelBuffer[yTop * sWindowWidth];
+    uint32_t* innermostPixelTopLeft =
+        lineStartTop + rect.x + rect.borderRadius - 1;
+    uint32_t* innermostPixelTopRight =
+        lineStartTop + rect.x + rect.width - rect.borderRadius;
+    uint32_t* lineStartBottom = &sPixelBuffer[yBottom * sWindowWidth];
+    uint32_t* innermostPixelBottomLeft =
+        lineStartBottom + rect.x + rect.borderRadius - 1;
+    uint32_t* innermostPixelBottomRight =
+        lineStartBottom + rect.x + rect.width - rect.borderRadius;
+
+    // Add 0.5 to x and y to get the pixel center.
+    double pixelY = (double)rowIndex + 0.5;
+    for (int columnIndex = 0; columnIndex < rect.borderRadius; ++columnIndex) {
+      double pixelX = (double)columnIndex + 0.5;
+      double distance =
+          SignedDistanceToCircle(pixelX, pixelY, (double)rect.borderRadius);
+      double alpha = DistanceAntiAlias(distance);
+      NormalizedRGB rgb = Lerp(rgbBase, rgbBlend, alpha);
+      uint32_t color = RGBToUint(rgb);
+
+      innermostPixelTopLeft[-columnIndex] = color;
+      innermostPixelTopRight[columnIndex] = color;
+      innermostPixelBottomLeft[-columnIndex] = color;
+      innermostPixelBottomRight[columnIndex] = color;
+    }
+
+    std::fill(innermostPixelTopLeft + 1, innermostPixelTopRight, rect.color);
+    std::fill(innermostPixelBottomLeft + 1, innermostPixelBottomRight,
+              rect.color);
+  }
+}
+
+void RasterizeAnimatedRoundedRectTopAndBottom(
+    const ColorRect& colorRect, const uint32_t* animationLookup,
+    int priorUpdateAreaMin, int priorUpdateAreaMax, int currentUpdateAreaMin,
+    int currentUpdateAreaMax, int animationMin) {
+  // We iterate through logical pixel rows here, from inside to outside, which
+  // for the top of the rounded rect means from bottom to top, and for the
+  // bottom of the rect means top to bottom. We paint pixels from left to
+  // right on the top and bottom rows at the same time for the entire animation
+  // window. (If the animation window does not overlap any rounded corners,
+  // however, we won't be called at all)
+  for (int rowIndex = 0; rowIndex < colorRect.borderRadius; ++rowIndex) {
+    int yTop = colorRect.y + colorRect.borderRadius - 1 - rowIndex;
+    int yBottom =
+        colorRect.y + colorRect.height - colorRect.borderRadius + rowIndex;
+
+    uint32_t* lineStartTop = &sPixelBuffer[yTop * sWindowWidth];
+    uint32_t* lineStartBottom = &sPixelBuffer[yBottom * sWindowWidth];
+
+    // Add 0.5 to x and y to get the pixel center.
+    double pixelY = (double)rowIndex + 0.5;
+    for (int x = priorUpdateAreaMin; x < currentUpdateAreaMax; ++x) {
+      // The column index is the distance from the innermost pixel, which
+      // is different depending on whether we're on the left or right
+      // side of the rect. It will always be the max here, and if it's
+      // negative that just means we're outside the rounded area.
+      int columnIndex =
+          std::max((int)colorRect.x + (int)colorRect.borderRadius - x - 1,
+                   x - ((int)colorRect.x + (int)colorRect.width -
+                        (int)colorRect.borderRadius));
+
+      double alpha = 1.0;
+      if (columnIndex >= 0) {
+        double pixelX = (double)columnIndex + 0.5;
+        double distance = SignedDistanceToCircle(
+            pixelX, pixelY, (double)colorRect.borderRadius);
+        alpha = DistanceAntiAlias(distance);
+      }
+      // We don't do alpha blending for the antialiased pixels at the
+      // shape's border. It is not noticeable in the animation.
+      if (alpha > 1.0 - std::numeric_limits<double>::epsilon()) {
+        // Overwrite the tail end of last frame's animation with the
+        // rect's normal, unanimated color.
+        uint32_t color = x < priorUpdateAreaMax
+                             ? colorRect.color
+                             : animationLookup[x - animationMin];
+        lineStartTop[x] = color;
+        lineStartBottom[x] = color;
+      }
+    }
+  }
+}
+
+void RasterizeColorRect(const ColorRect& colorRect) {
+  // We sometimes split our rect into two, to simplify drawing borders. If we
+  // have a border, we draw a stroke-only rect first, and then draw the smaller
+  // inner rect on top of it.
+  Vector<DrawRect, 2> drawRects;
+  Unused << drawRects.reserve(2);
+  if (colorRect.borderWidth == 0) {
+    DrawRect rect = {};
+    rect.color = colorRect.color;
+    rect.backgroundColor =
+        sPixelBuffer[colorRect.y * sWindowWidth + colorRect.x];
+    rect.x = colorRect.x;
+    rect.y = colorRect.y;
+    rect.width = colorRect.width;
+    rect.height = colorRect.height;
+    rect.borderRadius = colorRect.borderRadius;
+    rect.strokeOnly = false;
+    drawRects.infallibleAppend(rect);
+  } else {
+    DrawRect borderRect = {};
+    borderRect.color = colorRect.borderColor;
+    borderRect.backgroundColor =
+        sPixelBuffer[colorRect.y * sWindowWidth + colorRect.x];
+    borderRect.x = colorRect.x;
+    borderRect.y = colorRect.y;
+    borderRect.width = colorRect.width;
+    borderRect.height = colorRect.height;
+    borderRect.borderRadius = colorRect.borderRadius;
+    borderRect.borderWidth = colorRect.borderWidth;
+    borderRect.strokeOnly = true;
+    drawRects.infallibleAppend(borderRect);
+
+    DrawRect baseRect = {};
+    baseRect.color = colorRect.color;
+    baseRect.backgroundColor = borderRect.color;
+    baseRect.x = colorRect.x + colorRect.borderWidth;
+    baseRect.y = colorRect.y + colorRect.borderWidth;
+    baseRect.width = colorRect.width - 2 * colorRect.borderWidth;
+    baseRect.height = colorRect.height - 2 * colorRect.borderWidth;
+    baseRect.borderRadius =
+        std::max(0, (int)colorRect.borderRadius - (int)colorRect.borderWidth);
+    baseRect.borderWidth = 0;
+    baseRect.strokeOnly = false;
+    drawRects.infallibleAppend(baseRect);
+  }
+
+  for (const DrawRect& rect : drawRects) {
+    if (rect.height <= 0 || rect.width <= 0) {
+      continue;
+    }
+
+    // For rounded rectangles, the first thing we do is draw the top and
+    // bottom of the rectangle, with the more complicated logic below. After
+    // that we can just draw the vertically centered part of the rect like
+    // normal.
+    RasterizeRoundedRectTopAndBottom(rect);
+
+    // We then draw the flat, central portion of the rect (which in the case of
+    // non-rounded rects, is just the entire thing.)
+    int solidRectStartY =
+        std::clamp(rect.y + rect.borderRadius, 0, sTotalChromeHeight);
+    int solidRectEndY = std::clamp(rect.y + rect.height - rect.borderRadius, 0,
+                                   sTotalChromeHeight);
+    for (int y = solidRectStartY; y < solidRectEndY; ++y) {
+      // For strokeOnly rects (used to draw borders), we just draw the left
+      // and right side here. Looping down a column of pixels is not the most
+      // cache-friendly thing, but it shouldn't be a big deal given the height
+      // of the urlbar.
+      // Also, if borderRadius is less than borderWidth, we need to ensure
+      // that we fully draw the top and bottom lines, so we make sure to check
+      // that we're inside the middle range range before excluding pixels.
+      if (rect.strokeOnly && y - rect.y > rect.borderWidth &&
+          rect.y + rect.height - y > rect.borderWidth) {
+        int startXLeft = std::clamp(rect.x, 0, sWindowWidth);
+        int endXLeft = std::clamp(rect.x + rect.borderWidth, 0, sWindowWidth);
+        int startXRight =
+            std::clamp(rect.x + rect.width - rect.borderWidth, 0, sWindowWidth);
+        int endXRight = std::clamp(rect.x + rect.width, 0, sWindowWidth);
+
+        uint32_t* lineStart = &sPixelBuffer[y * sWindowWidth];
+        uint32_t* dataStartLeft = lineStart + startXLeft;
+        uint32_t* dataEndLeft = lineStart + endXLeft;
+        uint32_t* dataStartRight = lineStart + startXRight;
+        uint32_t* dataEndRight = lineStart + endXRight;
+        std::fill(dataStartLeft, dataEndLeft, rect.color);
+        std::fill(dataStartRight, dataEndRight, rect.color);
+      } else {
+        int startX = std::clamp(rect.x, 0, sWindowWidth);
+        int endX = std::clamp(rect.x + rect.width, 0, sWindowWidth);
+        uint32_t* lineStart = &sPixelBuffer[y * sWindowWidth];
+        uint32_t* dataStart = lineStart + startX;
+        uint32_t* dataEnd = lineStart + endX;
+        std::fill(dataStart, dataEnd, rect.color);
+      }
+    }
+  }
+}
+
+// Paints the pixels to sPixelBuffer for the skeleton UI animation (a light
+// gradient which moves from left to right across the grey placeholder rects).
+// Takes in the rect to draw, together with a lookup table for the gradient,
+// and the bounds of the previous and current frame of the animation.
+bool RasterizeAnimatedRect(const ColorRect& colorRect,
+                           const uint32_t* animationLookup,
+                           int priorAnimationMin, int animationMin,
+                           int animationMax) {
+  int rectMin = colorRect.x;
+  int rectMax = colorRect.x + colorRect.width;
+  bool animationWindowOverlaps =
+      rectMax >= priorAnimationMin && rectMin < animationMax;
+
+  int priorUpdateAreaMin = std::max(rectMin, priorAnimationMin);
+  int priorUpdateAreaMax = std::min(rectMax, animationMin);
+  int currentUpdateAreaMin = std::max(rectMin, animationMin);
+  int currentUpdateAreaMax = std::min(rectMax, animationMax);
+
+  if (!animationWindowOverlaps) {
+    return false;
+  }
+
+  bool animationWindowOverlapsBorderRadius =
+      rectMin + colorRect.borderRadius > priorAnimationMin ||
+      rectMax - colorRect.borderRadius <= animationMax;
+
+  // If we don't overlap the left or right side of the rounded rectangle,
+  // just pretend it's not rounded. This is a small optimization but
+  // there's no point in doing all of this rounded rectangle checking if
+  // we aren't even overlapping
+  int borderRadius =
+      animationWindowOverlapsBorderRadius ? colorRect.borderRadius : 0;
+
+  if (borderRadius > 0) {
+    // Similarly to how we draw the rounded rects in DrawSkeletonUI, we
+    // first draw the rounded top and bottom, and then we draw the center
+    // rect.
+    RasterizeAnimatedRoundedRectTopAndBottom(
+        colorRect, animationLookup, priorUpdateAreaMin, priorUpdateAreaMax,
+        currentUpdateAreaMin, currentUpdateAreaMax, animationMin);
+  }
+
+  for (int y = colorRect.y + borderRadius;
+       y < colorRect.y + colorRect.height - borderRadius; ++y) {
+    uint32_t* lineStart = &sPixelBuffer[y * sWindowWidth];
+    // Overwrite the tail end of last frame's animation with the rect's
+    // normal, unanimated color.
+    for (int x = priorUpdateAreaMin; x < priorUpdateAreaMax; ++x) {
+      lineStart[x] = colorRect.color;
+    }
+    // Then apply the animated color
+    for (int x = currentUpdateAreaMin; x < currentUpdateAreaMax; ++x) {
+      lineStart[x] = animationLookup[x - animationMin];
+    }
+  }
+
+  return true;
+}
+
+void DrawSkeletonUI(HWND hWnd, CSSPixelSpan urlbarCSSSpan,
+                    CSSPixelSpan searchbarCSSSpan,
+                    Vector<CSSPixelSpan>& springs,
+                    const ThemeColors& currentTheme,
+                    const EnumSet<SkeletonUIFlag, uint32_t>& flags) {
+  // NOTE: we opt here to paint a pixel buffer for the application chrome by
+  // hand, without using native UI library methods. Why do we do this?
+  //
+  // 1) It gives us a little bit more control, especially if we want to animate
+  //    any of this.
+  // 2) It's actually more portable. We can do this on any platform where we
+  //    can blit a pixel buffer to the screen, and it only has to change
+  //    insofar as the UI is different on those platforms (and thus would have
+  //    to change anyway.)
+  //
+  // The performance impact of this ought to be negligible. As far as has been
+  // observed, on slow reference hardware this might take up to a millisecond,
+  // for a startup which otherwise takes 30 seconds.
+  //
+  // The readability and maintainability are a greater concern. When the
+  // silhouette of Firefox's core UI changes, this code will likely need to
+  // change. However, for the foreseeable future, our skeleton UI will be mostly
+  // axis-aligned geometric shapes, and the thought is that any code which is
+  // manipulating raw pixels should not be *too* hard to maintain and
+  // understand so long as it is only painting such simple shapes.
+
+  sAnimationColor = currentTheme.animationColor;
+  sToolbarForegroundColor = currentTheme.toolbarForegroundColor;
+
+  bool menubarShown = flags.contains(SkeletonUIFlag::MenubarShown);
+  bool bookmarksToolbarShown =
+      flags.contains(SkeletonUIFlag::BookmarksToolbarShown);
+  bool rtlEnabled = flags.contains(SkeletonUIFlag::RtlEnabled);
+
+  int chromeHorMargin = CSSToDevPixels(2, sCSSToDevPixelScaling);
+  int verticalOffset = sMaximized ? sNonClientVerticalMargins : 0;
+  int horizontalOffset =
+      sNonClientHorizontalMargins - (sMaximized ? 0 : chromeHorMargin);
+
+  // found in browser-aero.css, ":root[sizemode=normal][tabsintitlebar]"
+  int topBorderHeight =
+      sMaximized ? 0 : CSSToDevPixels(1, sCSSToDevPixelScaling);
+  // found in tabs.inc.css, "--tab-min-height" - depends on uidensity variable
+  int tabBarHeight = CSSToDevPixels(33, sCSSToDevPixelScaling) + verticalOffset;
+  // found in tabs.inc.css, ".titlebar-spacer"
+  int titlebarSpacerWidth = horizontalOffset;
+  if (!sMaximized && !menubarShown) {
+    titlebarSpacerWidth += CSSToDevPixels(40, sCSSToDevPixelScaling);
+  }
+  // found in tabs.inc.css, ".tab-line"
+  int tabLineHeight = CSSToDevPixels(2, sCSSToDevPixelScaling) + verticalOffset;
+  int selectedTabWidth = CSSToDevPixels(224, sCSSToDevPixelScaling);
+  int toolbarHeight = CSSToDevPixels(39, sCSSToDevPixelScaling);
+  // found in browser.css, "#PersonalToolbar"
+  int bookmarkToolbarHeight = CSSToDevPixels(28, sCSSToDevPixelScaling);
+  if (bookmarksToolbarShown) {
+    toolbarHeight += bookmarkToolbarHeight;
+  }
+  // found in urlbar-searchbar.inc.css, "#urlbar[breakout]"
+  int urlbarTopOffset = CSSToDevPixels(5, sCSSToDevPixelScaling);
+  int urlbarHeight = CSSToDevPixels(30, sCSSToDevPixelScaling);
+  // found in browser-aero.css, "#navigator-toolbox::after" border-bottom
+  int chromeContentDividerHeight = CSSToDevPixels(1, sCSSToDevPixelScaling);
+
+  int tabPlaceholderBarMarginTop = CSSToDevPixels(13, sCSSToDevPixelScaling);
+  int tabPlaceholderBarMarginLeft = CSSToDevPixels(10, sCSSToDevPixelScaling);
+  int tabPlaceholderBarHeight = CSSToDevPixels(8, sCSSToDevPixelScaling);
+  int tabPlaceholderBarWidth = CSSToDevPixels(120, sCSSToDevPixelScaling);
+
+  int toolbarPlaceholderHeight = CSSToDevPixels(10, sCSSToDevPixelScaling);
+  int toolbarPlaceholderMarginRight =
+      rtlEnabled ? CSSToDevPixels(11, sCSSToDevPixelScaling)
+                 : CSSToDevPixels(9, sCSSToDevPixelScaling);
+  int toolbarPlaceholderMarginLeft =
+      rtlEnabled ? CSSToDevPixels(9, sCSSToDevPixelScaling)
+                 : CSSToDevPixels(11, sCSSToDevPixelScaling);
+  int placeholderMargin = CSSToDevPixels(8, sCSSToDevPixelScaling);
+
+  int menubarHeightDevPixels =
+      menubarShown ? CSSToDevPixels(28, sCSSToDevPixelScaling) : 0;
+
+  // controlled by css variable urlbarMarginInline in urlbar-searchbar.inc.css
+  int urlbarMargin =
+      CSSToDevPixels(5, sCSSToDevPixelScaling) + horizontalOffset;
+
+  int urlbarTextPlaceholderMarginTop =
+      CSSToDevPixels(10, sCSSToDevPixelScaling);
+  int urlbarTextPlaceholderMarginLeft =
+      CSSToDevPixels(10, sCSSToDevPixelScaling);
+  int urlbarTextPlaceHolderWidth = CSSToDevPixels(
+      std::clamp(urlbarCSSSpan.end - urlbarCSSSpan.start - 10.0, 0.0, 260.0),
+      sCSSToDevPixelScaling);
+  int urlbarTextPlaceholderHeight = CSSToDevPixels(10, sCSSToDevPixelScaling);
+
+  int searchbarTextPlaceholderWidth = CSSToDevPixels(62, sCSSToDevPixelScaling);
+
+  auto scopeExit = MakeScopeExit([&] {
+    delete sAnimatedRects;
+    sAnimatedRects = nullptr;
+    return;
+  });
+
+  Vector<ColorRect> rects;
+
+  ColorRect topBorder = {};
+  topBorder.color = 0x00000000;
+  topBorder.x = 0;
+  topBorder.y = 0;
+  topBorder.width = sWindowWidth;
+  topBorder.height = topBorderHeight;
+  topBorder.flipIfRTL = false;
+  if (!rects.append(topBorder)) {
+    return;
+  }
+
+  ColorRect menubar = {};
+  menubar.color = currentTheme.tabBarColor;
+  menubar.x = 0;
+  menubar.y = topBorder.height;
+  menubar.width = sWindowWidth;
+  menubar.height = menubarHeightDevPixels;
+  menubar.flipIfRTL = false;
+  if (!rects.append(menubar)) {
+    return;
+  }
+
+  int placeholderBorderRadius = CSSToDevPixels(2, sCSSToDevPixelScaling);
+  // found in browser.css "--toolbarbutton-border-radius"
+  int urlbarBorderRadius = CSSToDevPixels(2, sCSSToDevPixelScaling);
+  // found in urlbar-searchbar.inc.css "#urlbar-background"
+  int urlbarBorderWidth = CSSToDevPixelsFloor(1, sCSSToDevPixelScaling);
+  int urlbarBorderColor = currentTheme.urlbarBorderColor;
+
+  // The (traditionally dark blue on Windows) background of the tab bar.
+  ColorRect tabBar = {};
+  tabBar.color = currentTheme.tabBarColor;
+  tabBar.x = 0;
+  tabBar.y = menubar.height + topBorder.height;
+  tabBar.width = sWindowWidth;
+  tabBar.height = tabBarHeight;
+  tabBar.flipIfRTL = false;
+  if (!rects.append(tabBar)) {
+    return;
+  }
+
+  // The blue highlight at the top of the initial selected tab
+  ColorRect tabLine = {};
+  tabLine.color = currentTheme.tabLineColor;
+  tabLine.x = titlebarSpacerWidth;
+  tabLine.y = menubar.height + topBorder.height;
+  tabLine.width = selectedTabWidth;
+  tabLine.height = tabLineHeight;
+  tabLine.flipIfRTL = true;
+  if (!rects.append(tabLine)) {
+    return;
+  }
+
+  // The initial selected tab
+  ColorRect selectedTab = {};
+  selectedTab.color = currentTheme.backgroundColor;
+  selectedTab.x = titlebarSpacerWidth;
+  selectedTab.y = tabLine.y + tabLineHeight;
+  selectedTab.width = selectedTabWidth;
+  selectedTab.height = tabBar.y + tabBar.height - selectedTab.y;
+  selectedTab.flipIfRTL = true;
+  if (!rects.append(selectedTab)) {
+    return;
+  }
+
+  // A placeholder rect representing text that will fill the selected tab title
+  ColorRect tabTextPlaceholder = {};
+  tabTextPlaceholder.color = sToolbarForegroundColor;
+  tabTextPlaceholder.x = selectedTab.x + tabPlaceholderBarMarginLeft;
+  tabTextPlaceholder.y = selectedTab.y + tabPlaceholderBarMarginTop;
+  tabTextPlaceholder.width = tabPlaceholderBarWidth;
+  tabTextPlaceholder.height = tabPlaceholderBarHeight;
+  tabTextPlaceholder.borderRadius = placeholderBorderRadius;
+  tabTextPlaceholder.flipIfRTL = true;
+  if (!rects.append(tabTextPlaceholder)) {
+    return;
+  }
+
+  // The toolbar background
+  ColorRect toolbar = {};
+  toolbar.color = currentTheme.backgroundColor;
+  toolbar.x = 0;
+  toolbar.y = tabBar.y + tabBarHeight;
+  toolbar.width = sWindowWidth;
+  toolbar.height = toolbarHeight;
+  toolbar.flipIfRTL = false;
+  if (!rects.append(toolbar)) {
+    return;
+  }
+
+  // The single-pixel divider line below the toolbar
+  ColorRect chromeContentDivider = {};
+  chromeContentDivider.color = currentTheme.chromeContentDividerColor;
+  chromeContentDivider.x = 0;
+  chromeContentDivider.y = toolbar.y + toolbar.height;
+  chromeContentDivider.width = sWindowWidth;
+  chromeContentDivider.height = chromeContentDividerHeight;
+  chromeContentDivider.flipIfRTL = false;
+  if (!rects.append(chromeContentDivider)) {
+    return;
+  }
+
+  // The urlbar
+  ColorRect urlbar = {};
+  urlbar.color = currentTheme.urlbarColor;
+  urlbar.x = CSSToDevPixels(urlbarCSSSpan.start, sCSSToDevPixelScaling) +
+             horizontalOffset;
+  urlbar.y = tabBar.y + tabBarHeight + urlbarTopOffset;
+  urlbar.width = CSSToDevPixels((urlbarCSSSpan.end - urlbarCSSSpan.start),
+                                sCSSToDevPixelScaling);
+  urlbar.height = urlbarHeight;
+  urlbar.borderRadius = urlbarBorderRadius;
+  urlbar.borderWidth = urlbarBorderWidth;
+  urlbar.borderColor = urlbarBorderColor;
+  urlbar.flipIfRTL = false;
+  if (!rects.append(urlbar)) {
+    return;
+  }
+
+  // The urlbar placeholder rect representating text that will fill the urlbar
+  // If rtl is enabled, it is flipped relative to the the urlbar rectangle, not
+  // sWindowWidth.
+  ColorRect urlbarTextPlaceholder = {};
+  urlbarTextPlaceholder.color = sToolbarForegroundColor;
+  urlbarTextPlaceholder.x =
+      rtlEnabled
+          ? ((urlbar.x + urlbar.width) - urlbarTextPlaceholderMarginLeft -
+             urlbarTextPlaceHolderWidth)
+          : (urlbar.x + urlbarTextPlaceholderMarginLeft);
+  urlbarTextPlaceholder.y = urlbar.y + urlbarTextPlaceholderMarginTop;
+  urlbarTextPlaceholder.width = urlbarTextPlaceHolderWidth;
+  urlbarTextPlaceholder.height = urlbarTextPlaceholderHeight;
+  urlbarTextPlaceholder.borderRadius = placeholderBorderRadius;
+  urlbarTextPlaceholder.flipIfRTL = false;
+  if (!rects.append(urlbarTextPlaceholder)) {
+    return;
+  }
+
+  // The searchbar and placeholder text, if present
+  // This is y-aligned with the urlbar
+  bool hasSearchbar = searchbarCSSSpan.start != 0 && searchbarCSSSpan.end != 0;
+  ColorRect searchbarRect = {};
+  if (hasSearchbar == true) {
+    searchbarRect.color = currentTheme.urlbarColor;
+    searchbarRect.x =
+        CSSToDevPixels(searchbarCSSSpan.start, sCSSToDevPixelScaling) +
+        horizontalOffset;
+    searchbarRect.y = urlbar.y;
+    searchbarRect.width = CSSToDevPixels(
+        searchbarCSSSpan.end - searchbarCSSSpan.start, sCSSToDevPixelScaling);
+    searchbarRect.height = urlbarHeight;
+    searchbarRect.borderRadius = urlbarBorderRadius;
+    searchbarRect.borderWidth = urlbarBorderWidth;
+    searchbarRect.borderColor = urlbarBorderColor;
+    searchbarRect.flipIfRTL = false;
+    if (!rects.append(searchbarRect)) {
+      return;
+    }
+
+    // The placeholder rect representating text that will fill the searchbar
+    // This uses the same margins as the urlbarTextPlaceholder
+    // If rtl is enabled, it is flipped relative to the the searchbar rectangle,
+    // not sWindowWidth.
+    ColorRect searchbarTextPlaceholder = {};
+    searchbarTextPlaceholder.color = sToolbarForegroundColor;
+    searchbarTextPlaceholder.x =
+        rtlEnabled
+            ? ((searchbarRect.x + searchbarRect.width) -
+               urlbarTextPlaceholderMarginLeft - searchbarTextPlaceholderWidth)
+            : (searchbarRect.x + urlbarTextPlaceholderMarginLeft);
+    searchbarTextPlaceholder.y =
+        searchbarRect.y + urlbarTextPlaceholderMarginTop;
+    searchbarTextPlaceholder.width = searchbarTextPlaceholderWidth;
+    searchbarTextPlaceholder.height = urlbarTextPlaceholderHeight;
+    searchbarTextPlaceholder.flipIfRTL = false;
+    if (!rects.append(searchbarTextPlaceholder) ||
+        !sAnimatedRects->append(searchbarTextPlaceholder)) {
+      return;
+    }
+  }
+
+  // Determine where the placeholder rectangles should not go. This is
+  // anywhere occupied by a spring, urlbar, or searchbar
+  Vector<DevPixelSpan> noPlaceholderSpans;
+
+  DevPixelSpan urlbarSpan;
+  urlbarSpan.start = urlbar.x - urlbarMargin;
+  urlbarSpan.end = urlbar.width + urlbar.x + urlbarMargin;
+
+  DevPixelSpan searchbarSpan;
+  if (hasSearchbar) {
+    searchbarSpan.start = searchbarRect.x - urlbarMargin;
+    searchbarSpan.end = searchbarRect.width + searchbarRect.x + urlbarMargin;
+  }
+
+  DevPixelSpan marginLeftPlaceholder;
+  marginLeftPlaceholder.start = toolbarPlaceholderMarginLeft;
+  marginLeftPlaceholder.end = toolbarPlaceholderMarginLeft;
+  if (!noPlaceholderSpans.append(marginLeftPlaceholder)) {
+    return;
+  }
+
+  if (rtlEnabled) {
+    // If we're RTL, then the springs as ordered in the DOM will be from right
+    // to left, which will break our comparison logic below
+    springs.reverse();
+  }
+
+  for (auto spring : springs) {
+    DevPixelSpan springDevPixels;
+    springDevPixels.start =
+        CSSToDevPixels(spring.start, sCSSToDevPixelScaling) + horizontalOffset;
+    springDevPixels.end =
+        CSSToDevPixels(spring.end, sCSSToDevPixelScaling) + horizontalOffset;
+    if (!noPlaceholderSpans.append(springDevPixels)) {
+      return;
+    }
+  }
+
+  DevPixelSpan marginRightPlaceholder;
+  marginRightPlaceholder.start = sWindowWidth - toolbarPlaceholderMarginRight;
+  marginRightPlaceholder.end = sWindowWidth - toolbarPlaceholderMarginRight;
+  if (!noPlaceholderSpans.append(marginRightPlaceholder)) {
+    return;
+  }
+
+  Vector<DevPixelSpan, 2> spansToAdd;
+  Unused << spansToAdd.reserve(2);
+  spansToAdd.infallibleAppend(urlbarSpan);
+  if (hasSearchbar) {
+    spansToAdd.infallibleAppend(searchbarSpan);
+  }
+
+  for (auto& toAdd : spansToAdd) {
+    for (auto& span : noPlaceholderSpans) {
+      if (span.start > toAdd.start) {
+        if (!noPlaceholderSpans.insert(&span, toAdd)) {
+          return;
+        }
+        break;
+      }
+    }
+  }
+
+  for (int i = 1; i < noPlaceholderSpans.length(); i++) {
+    int start = noPlaceholderSpans[i - 1].end + placeholderMargin;
+    int end = noPlaceholderSpans[i].start - placeholderMargin;
+    if (start + 2 * placeholderBorderRadius >= end) {
+      continue;
+    }
+
+    // The placeholder rects should all be y-aligned.
+    ColorRect placeholderRect = {};
+    placeholderRect.color = sToolbarForegroundColor;
+    placeholderRect.x = start;
+    placeholderRect.y = urlbarTextPlaceholder.y;
+    placeholderRect.width = end - start;
+    placeholderRect.height = toolbarPlaceholderHeight;
+    placeholderRect.borderRadius = placeholderBorderRadius;
+    placeholderRect.flipIfRTL = false;
+    if (!rects.append(placeholderRect) ||
+        !sAnimatedRects->append(placeholderRect)) {
+      return;
+    }
+  }
+
+  sTotalChromeHeight = chromeContentDivider.y + chromeContentDivider.height;
+  if (sTotalChromeHeight > sWindowHeight) {
+    printf_stderr("Exiting drawing skeleton UI because window is too small.\n");
+    return;
+  }
+
+  if (!sAnimatedRects->append(tabTextPlaceholder) ||
+      !sAnimatedRects->append(urlbarTextPlaceholder)) {
+    return;
+  }
+
+  sPixelBuffer =
+      (uint32_t*)calloc(sWindowWidth * sTotalChromeHeight, sizeof(uint32_t));
+
+  for (auto& rect : *sAnimatedRects) {
+    if (rtlEnabled && rect.flipIfRTL) {
+      rect.x = sWindowWidth - rect.x - rect.width;
+    }
+    rect.x = std::clamp(rect.x, 0, sWindowWidth);
+    rect.width = std::clamp(rect.width, 0, sWindowWidth - rect.x);
+    rect.y = std::clamp(rect.y, 0, sTotalChromeHeight);
+    rect.height = std::clamp(rect.height, 0, sTotalChromeHeight - rect.y);
+  }
+
+  for (auto& rect : rects) {
+    if (rtlEnabled && rect.flipIfRTL) {
+      rect.x = sWindowWidth - rect.x - rect.width;
+    }
+    rect.x = std::clamp(rect.x, 0, sWindowWidth);
+    rect.width = std::clamp(rect.width, 0, sWindowWidth - rect.x);
+    rect.y = std::clamp(rect.y, 0, sTotalChromeHeight);
+    rect.height = std::clamp(rect.height, 0, sTotalChromeHeight - rect.y);
+    RasterizeColorRect(rect);
+  }
+
+  HDC hdc = sGetWindowDC(hWnd);
+
+  BITMAPINFO chromeBMI = {};
+  chromeBMI.bmiHeader.biSize = sizeof(chromeBMI.bmiHeader);
+  chromeBMI.bmiHeader.biWidth = sWindowWidth;
+  chromeBMI.bmiHeader.biHeight = -sTotalChromeHeight;
+  chromeBMI.bmiHeader.biPlanes = 1;
+  chromeBMI.bmiHeader.biBitCount = 32;
+  chromeBMI.bmiHeader.biCompression = BI_RGB;
+
+  // First, we just paint the chrome area with our pixel buffer
+  sStretchDIBits(hdc, 0, 0, sWindowWidth, sTotalChromeHeight, 0, 0,
+                 sWindowWidth, sTotalChromeHeight, sPixelBuffer, &chromeBMI,
+                 DIB_RGB_COLORS, SRCCOPY);
+
+  // Then, we just fill the rest with FillRect
+  RECT rect = {0, sTotalChromeHeight, sWindowWidth, sWindowHeight};
+  HBRUSH brush = sCreateSolidBrush(currentTheme.backgroundColor);
+  sFillRect(hdc, &rect, brush);
+
+  scopeExit.release();
+  sReleaseDC(hWnd, hdc);
+  sDeleteObject(brush);
+}
+
+DWORD WINAPI AnimateSkeletonUI(void* aUnused) {
+  if (!sPixelBuffer || sAnimatedRects->empty()) {
+    return 0;
+  }
+
+  // On each of the animated rects (which happen to all be placeholder UI
+  // rects sharing the same color), we want to animate a gradient moving across
+  // the screen from left to right. The gradient starts as the rect's color on,
+  // the left side, changes to the background color of the window by the middle
+  // of the gradient, and then goes back down to the rect's color. To make this
+  // faster than interpolating between the two colors for each pixel for each
+  // frame, we simply create a lookup buffer in which we can look up the color
+  // for a particular offset into the gradient.
+  //
+  // To do this we just interpolate between the two values, and to give the
+  // gradient a smoother transition between colors, we transform the linear
+  // blend amount via the cubic smooth step function (SmoothStep3) to produce
+  // a smooth start and stop for the gradient. We do this for the first half
+  // of the gradient, and then simply copy that backwards for the second half.
+  //
+  // The CSS width of 80 chosen here is effectively is just to match the size
+  // of the animation provided in the design mockup. We define it in CSS pixels
+  // simply because the rest of our UI is based off of CSS scalings.
+  int animationWidth = CSSToDevPixels(80, sCSSToDevPixelScaling);
+  UniquePtr<uint32_t[]> animationLookup =
+      MakeUnique<uint32_t[]>(animationWidth);
+  uint32_t animationColor = sAnimationColor;
+  NormalizedRGB rgbBlend = UintToRGB(animationColor);
+
+  // Build the first half of the lookup table
+  for (int i = 0; i < animationWidth / 2; ++i) {
+    uint32_t baseColor = sToolbarForegroundColor;
+    double blendAmountLinear =
+        static_cast<double>(i) / (static_cast<double>(animationWidth / 2));
+    double blendAmount = SmoothStep3(blendAmountLinear);
+
+    NormalizedRGB rgbBase = UintToRGB(baseColor);
+    NormalizedRGB rgb = Lerp(rgbBase, rgbBlend, blendAmount);
+    animationLookup[i] = RGBToUint(rgb);
+  }
+
+  // Copy the first half of the lookup table into the second half backwards
+  for (int i = animationWidth / 2; i < animationWidth; ++i) {
+    int j = animationWidth - 1 - i;
+    if (j == animationWidth / 2) {
+      // If animationWidth is odd, we'll be left with one pixel at the center.
+      // Just color that as the animation color.
+      animationLookup[i] = animationColor;
+    } else {
+      animationLookup[i] = animationLookup[j];
+    }
+  }
+
+  // The bitmap info remains unchanged throughout the animation - this just
+  // effectively describes the contents of sPixelBuffer
+  BITMAPINFO chromeBMI = {};
+  chromeBMI.bmiHeader.biSize = sizeof(chromeBMI.bmiHeader);
+  chromeBMI.bmiHeader.biWidth = sWindowWidth;
+  chromeBMI.bmiHeader.biHeight = -sTotalChromeHeight;
+  chromeBMI.bmiHeader.biPlanes = 1;
+  chromeBMI.bmiHeader.biBitCount = 32;
+  chromeBMI.bmiHeader.biCompression = BI_RGB;
+
+  uint32_t animationIteration = 0;
+
+  int devPixelsPerFrame =
+      CSSToDevPixels(kAnimationCSSPixelsPerFrame, sCSSToDevPixelScaling);
+  int devPixelsExtraWindowSize =
+      CSSToDevPixels(kAnimationCSSExtraWindowSize, sCSSToDevPixelScaling);
+
+  if (::InterlockedCompareExchange(&sAnimationControlFlag, 0, 0)) {
+    // The window got consumed before we were able to draw anything.
+    return 0;
+  }
+
+  while (true) {
+    // The gradient will move across the screen at devPixelsPerFrame at
+    // 60fps, and then loop back to the beginning. However, we add a buffer of
+    // devPixelsExtraWindowSize around the edges so it doesn't immediately
+    // jump back, giving it a more pulsing feel.
+    int animationMin = ((animationIteration * devPixelsPerFrame) %
+                        (sWindowWidth + devPixelsExtraWindowSize)) -
+                       devPixelsExtraWindowSize / 2;
+    int animationMax = animationMin + animationWidth;
+    // The priorAnimationMin is the beginning of the previous frame's animation.
+    // Since we only want to draw the bits of the image that we updated, we need
+    // to overwrite the left bit of the animation we drew last frame with the
+    // default color.
+    int priorAnimationMin = animationMin - devPixelsPerFrame;
+    animationMin = std::max(0, animationMin);
+    priorAnimationMin = std::max(0, priorAnimationMin);
+    animationMax = std::min((int)sWindowWidth, animationMax);
+
+    // The gradient only affects the specific rects that we put into
+    // sAnimatedRects. So we simply update those rects, and maintain a flag
+    // to avoid drawing when we don't need to.
+    bool updatedAnything = false;
+    for (ColorRect rect : *sAnimatedRects) {
+      bool hadUpdates =
+          RasterizeAnimatedRect(rect, animationLookup.get(), priorAnimationMin,
+                                animationMin, animationMax);
+      updatedAnything = updatedAnything || hadUpdates;
+    }
+
+    if (updatedAnything) {
+      HDC hdc = sGetWindowDC(sPreXULSkeletonUIWindow);
+
+      sStretchDIBits(hdc, priorAnimationMin, 0,
+                     animationMax - priorAnimationMin, sTotalChromeHeight,
+                     priorAnimationMin, 0, animationMax - priorAnimationMin,
+                     sTotalChromeHeight, sPixelBuffer, &chromeBMI,
+                     DIB_RGB_COLORS, SRCCOPY);
+
+      sReleaseDC(sPreXULSkeletonUIWindow, hdc);
+    }
+
+    animationIteration++;
+
+    // We coordinate around our sleep here to ensure that the main thread does
+    // not wait on us if we're sleeping. If we don't get 1 here, it means the
+    // window has been consumed and we don't need to sleep. If in
+    // ConsumePreXULSkeletonUIHandle we get a value other than 1 after
+    // incrementing, it means we're sleeping, and that function can assume that
+    // we will safely exit after the sleep because of the observed value of
+    // sAnimationControlFlag.
+    if (InterlockedIncrement(&sAnimationControlFlag) != 1) {
+      return 0;
+    }
+
+    // Note: Sleep does not guarantee an exact time interval. If the system is
+    // busy, for instance, we could easily end up taking several frames longer,
+    // and really we could be left unscheduled for an arbitrarily long time.
+    // This is fine, and we don't really care. We could track how much time this
+    // actually took and jump the animation forward the appropriate amount, but
+    // its not even clear that that's a better user experience. So we leave this
+    // as simple as we can.
+    ::Sleep(16);
+
+    // Here we bring sAnimationControlFlag back down - again, if we don't get a
+    // 0 here it means we consumed the skeleton UI window in the mean time, so
+    // we can simply exit.
+    if (InterlockedDecrement(&sAnimationControlFlag) != 0) {
+      return 0;
+    }
+  }
+
+  return 0;
+}
+
+LRESULT WINAPI PreXULSkeletonUIProc(HWND hWnd, UINT msg, WPARAM wParam,
+                                    LPARAM lParam) {
+  // NOTE: this block was copied from WinUtils.cpp, and needs to be kept in
+  // sync.
+  if (msg == WM_NCCREATE && sEnableNonClientDpiScaling) {
+    sEnableNonClientDpiScaling(hWnd);
+  }
+
+  // NOTE: this block was paraphrased from the WM_NCCALCSIZE handler in
+  // nsWindow.cpp, and will need to be kept in sync.
+  if (msg == WM_NCCALCSIZE) {
+    RECT* clientRect =
+        wParam ? &(reinterpret_cast<NCCALCSIZE_PARAMS*>(lParam))->rgrc[0]
+               : (reinterpret_cast<RECT*>(lParam));
+
+    // These match the margins set in browser-tabsintitlebar.js with
+    // default prefs on Windows. Bug 1673092 tracks lining this up with
+    // that more correctly instead of hard-coding it.
+    int horizontalOffset =
+        sNonClientHorizontalMargins -
+        (sMaximized ? 0 : CSSToDevPixels(2, sCSSToDevPixelScaling));
+    int verticalOffset =
+        sNonClientHorizontalMargins -
+        (sMaximized ? 0 : CSSToDevPixels(2, sCSSToDevPixelScaling));
+    clientRect->top = clientRect->top;
+    clientRect->left += horizontalOffset;
+    clientRect->right -= horizontalOffset;
+    clientRect->bottom -= verticalOffset;
+    return 0;
+  }
+
+  return ::DefWindowProcW(hWnd, msg, wParam, lParam);
+}
+
+bool IsSystemDarkThemeEnabled() {
+  DWORD result;
+  HKEY themeKey;
+  DWORD dataLen = sizeof(uint32_t);
+  LPCWSTR keyName =
+      L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Themes\\Personalize";
+
+  result = ::RegOpenKeyExW(HKEY_CURRENT_USER, keyName, 0, KEY_READ, &themeKey);
+  if (result != ERROR_SUCCESS) {
+    return false;
+  }
+  AutoCloseRegKey closeKey(themeKey);
+
+  uint32_t lightThemeEnabled;
+  result = ::RegGetValueW(
+      themeKey, nullptr, L"AppsUseLightTheme", RRF_RT_REG_DWORD, nullptr,
+      reinterpret_cast<PBYTE>(&lightThemeEnabled), &dataLen);
+  if (result != ERROR_SUCCESS) {
+    return false;
+  }
+  return !lightThemeEnabled;
+}
+
+ThemeColors GetTheme(ThemeMode themeId) {
+  ThemeColors theme = {};
+  switch (themeId) {
+    case ThemeMode::Dark:
+      // Dark theme or default theme when in dark mode
+
+      // controlled by css variable --toolbar-bgcolor
+      theme.backgroundColor = 0x323234;
+      theme.toolbarForegroundColor = 0x6a6a6b;
+      // controlled by css variable --lwt-accent-color
+      theme.tabBarColor = 0x0c0c0d;
+      // controlled by --toolbar-non-lwt-textcolor in browser.css
+      theme.chromeContentDividerColor = 0x0c0c0d;
+      // controlled by css variable --tab-line-color
+      theme.tabLineColor = 0x0a84ff;
+      // controlled by css variable --lwt-toolbar-field-background-color
+      theme.urlbarColor = 0x474749;
+      // controlled by css variable --lwt-toolbar-field-border-color
+      theme.urlbarBorderColor = 0x5a5a5c;
+      theme.animationColor = theme.urlbarColor;
+      return theme;
+    case ThemeMode::Light:
+      // Light theme
+
+      // controlled by --toolbar-bgcolor
+      theme.backgroundColor = 0xf5f6f7;
+      theme.toolbarForegroundColor = 0xd9dadb;
+      // controlled by css variable --lwt-accent-color
+      theme.tabBarColor = 0xe3e4e6;
+      // --chrome-content-separator-color in browser.css
+      theme.chromeContentDividerColor = 0xcccccc;
+      // controlled by css variable --tab-line-color
+      theme.tabLineColor = 0x0a84ff;
+      // by css variable --lwt-toolbar-field-background-color
+      theme.urlbarColor = 0xffffff;
+      // controlled by css variable --lwt-toolbar-field-border-color
+      theme.urlbarBorderColor = 0xcccccc;
+      theme.animationColor = theme.backgroundColor;
+      return theme;
+    case ThemeMode::Default:
+    default:
+      // Default theme when not in dark mode
+      MOZ_ASSERT(themeId == ThemeMode::Default);
+
+      // --toolbar-non-lwt-bgcolor in browser.css
+      theme.backgroundColor = 0xf9f9fa;
+      theme.toolbarForegroundColor = 0xe5e5e5;
+      // found in browser-aero.css ":root[tabsintitlebar]:not(:-moz-lwtheme)"
+      // (set to "hsl(235,33%,19%)")
+      theme.tabBarColor = 0x202340;
+      // --chrome-content-separator-color in browser.css
+      theme.chromeContentDividerColor = 0xe2e1e3;
+      // controlled by css variable --tab-line-color
+      theme.tabLineColor = 0x0a84ff;
+      // controlled by css variable --toolbar-color
+      theme.urlbarColor = 0xffffff;
+      // controlled by css variable --lwt-toolbar-field-border-color
+      theme.urlbarBorderColor = 0xbebebe;
+      theme.animationColor = theme.backgroundColor;
+      return theme;
+  }
+}
+
+bool OpenPreXULSkeletonUIRegKey(HKEY& key) {
+  DWORD disposition;
+  LSTATUS result =
+      ::RegCreateKeyExW(HKEY_CURRENT_USER, kPreXULSkeletonUIKeyPath, 0, nullptr,
+                        0, KEY_ALL_ACCESS, nullptr, &key, &disposition);
+
+  if (result != ERROR_SUCCESS) {
+    return false;
+  }
+
+  if (disposition == REG_CREATED_NEW_KEY) {
+    return false;
+  }
+
+  if (disposition == REG_OPENED_EXISTING_KEY) {
+    return true;
+  }
+
+  ::RegCloseKey(key);
+  return false;
+}
+
+bool LoadGdi32AndUser32Procedures() {
+  HMODULE user32Dll = ::LoadLibraryW(L"user32");
+  HMODULE gdi32Dll = ::LoadLibraryW(L"gdi32");
+
+  if (!user32Dll || !gdi32Dll) {
+    return false;
+  }
+
+  auto getThreadDpiAwarenessContext =
+      (decltype(GetThreadDpiAwarenessContext)*)::GetProcAddress(
+          user32Dll, "GetThreadDpiAwarenessContext");
+  auto areDpiAwarenessContextsEqual =
+      (decltype(AreDpiAwarenessContextsEqual)*)::GetProcAddress(
+          user32Dll, "AreDpiAwarenessContextsEqual");
+  if (getThreadDpiAwarenessContext && areDpiAwarenessContextsEqual &&
+      areDpiAwarenessContextsEqual(getThreadDpiAwarenessContext(),
+                                   DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE)) {
+    // EnableNonClientDpiScaling is optional - we can handle not having it.
+    sEnableNonClientDpiScaling =
+        (EnableNonClientDpiScalingProc)::GetProcAddress(
+            user32Dll, "EnableNonClientDpiScaling");
+  }
+
+  sGetSystemMetricsForDpi = (GetSystemMetricsForDpiProc)::GetProcAddress(
+      user32Dll, "GetSystemMetricsForDpi");
+  if (!sGetSystemMetricsForDpi) {
+    return false;
+  }
+  sGetDpiForWindow =
+      (GetDpiForWindowProc)::GetProcAddress(user32Dll, "GetDpiForWindow");
+  if (!sGetDpiForWindow) {
+    return false;
+  }
+  sRegisterClassW =
+      (RegisterClassWProc)::GetProcAddress(user32Dll, "RegisterClassW");
+  if (!sRegisterClassW) {
+    return false;
+  }
+  sCreateWindowExW =
+      (CreateWindowExWProc)::GetProcAddress(user32Dll, "CreateWindowExW");
+  if (!sCreateWindowExW) {
+    return false;
+  }
+  sShowWindow = (ShowWindowProc)::GetProcAddress(user32Dll, "ShowWindow");
+  if (!sShowWindow) {
+    return false;
+  }
+  sSetWindowPos = (SetWindowPosProc)::GetProcAddress(user32Dll, "SetWindowPos");
+  if (!sSetWindowPos) {
+    return false;
+  }
+  sGetWindowDC = (GetWindowDCProc)::GetProcAddress(user32Dll, "GetWindowDC");
+  if (!sGetWindowDC) {
+    return false;
+  }
+  sFillRect = (FillRectProc)::GetProcAddress(user32Dll, "FillRect");
+  if (!sFillRect) {
+    return false;
+  }
+  sReleaseDC = (ReleaseDCProc)::GetProcAddress(user32Dll, "ReleaseDC");
+  if (!sReleaseDC) {
+    return false;
+  }
+  sLoadIconW = (LoadIconWProc)::GetProcAddress(user32Dll, "LoadIconW");
+  if (!sLoadIconW) {
+    return false;
+  }
+  sLoadCursorW = (LoadCursorWProc)::GetProcAddress(user32Dll, "LoadCursorW");
+  if (!sLoadCursorW) {
+    return false;
+  }
+  sMonitorFromWindow =
+      (MonitorFromWindowProc)::GetProcAddress(user32Dll, "MonitorFromWindow");
+  if (!sMonitorFromWindow) {
+    return false;
+  }
+  sGetMonitorInfoW =
+      (GetMonitorInfoWProc)::GetProcAddress(user32Dll, "GetMonitorInfoW");
+  if (!sGetMonitorInfoW) {
+    return false;
+  }
+  sSetWindowLongPtrW =
+      (SetWindowLongPtrWProc)::GetProcAddress(user32Dll, "SetWindowLongPtrW");
+  if (!sSetWindowLongPtrW) {
+    return false;
+  }
+  sStretchDIBits =
+      (StretchDIBitsProc)::GetProcAddress(gdi32Dll, "StretchDIBits");
+  if (!sStretchDIBits) {
+    return false;
+  }
+  sCreateSolidBrush =
+      (CreateSolidBrushProc)::GetProcAddress(gdi32Dll, "CreateSolidBrush");
+  if (!sCreateSolidBrush) {
+    return false;
+  }
+  sDeleteObject = (DeleteObjectProc)::GetProcAddress(gdi32Dll, "DeleteObject");
+  if (!sDeleteObject) {
+    return false;
+  }
+
+  return true;
+}
+
+// Strips "--", "-", and "/" from the front of the arg if one of those exists,
+// returning `arg + 2`, `arg + 1`, and `arg + 1` respectively. If none of these
+// prefixes are found, the argument is not a flag, and nullptr is returned.
+const char* NormalizeFlag(const char* arg) {
+  if (strstr(arg, "--") == arg) {
+    return arg + 2;
+  }
+
+  if (arg[0] == '-') {
+    return arg + 1;
+  }
+
+  if (arg[0] == '/') {
+    return arg + 1;
+  }
+
+  return nullptr;
+}
+
+static bool EnvHasValue(const char* name) {
+  const char* val = getenv(name);
+  return (val && *val);
+}
+
+// Ensures that we only see arguments in the command line which are acceptable.
+// This is based on manual inspection of the list of arguments listed in the MDN
+// page for Gecko/Firefox commandline options:
+// https://developer.mozilla.org/en-US/docs/Mozilla/Command_Line_Options
+// Broadly speaking, we want to reject any argument which causes us to show
+// something other than the default window at its normal size. Here is a non-
+// exhaustive list of command line options we want to *exclude*:
+//
+//   -ProfileManager : This will display the profile manager window, which does
+//                     not match the skeleton UI at all.
+//
+//   -CreateProfile  : This will display a firefox window with the default
+//                     screen position and size, and not the position and size
+//                     which we have recorded in the registry.
+//
+//   -P <profile>    : This could cause us to display firefox with a position
+//                     and size of a different profile than that in which we
+//                     were previously running.
+//
+//   -width, -height : This will cause the width and height values in the
+//                     registry to be incorrect.
+//
+//   -kiosk          : See above.
+//
+//   -headless       : This one should be rather obvious.
+//
+//   -migration      : This will start with the import wizard, which of course
+//                     does not match the skeleton UI.
+//
+//   -private-window : This is tricky, but the colors of the main content area
+//                     make this not feel great with the white content of the
+//                     default skeleton UI.
+//
+// NOTE: we generally want to skew towards erroneous rejections of the command
+// line rather than erroneous approvals. The consequence of a bad rejection
+// is that we don't show the skeleton UI, which is business as usual. The
+// consequence of a bad approval is that we show it when we're not supposed to,
+// which is visually jarring and can also be unpredictable - there's no
+// guarantee that the code which handles the non-default window is set up to
+// properly handle the transition from the skeleton UI window.
+bool AreAllCmdlineArgumentsApproved(int argc, char** argv,
+                                    bool* explicitProfile) {
+  const char* approvedArgumentsArray[] = {
+      // These won't cause the browser to be visualy different in any way
+      "new-instance", "no-remote", "browser", "foreground", "setDefaultBrowser",
+      "attach-console", "wait-for-browser", "osint",
+
+      // These will cause the chrome to be a bit different or extra windows to
+      // be created, but overall the skeleton UI should still be broadly
+      // correct enough.
+      "new-tab", "new-window",
+
+      // To the extent possible, we want to ensure that existing tests cover
+      // the skeleton UI, so we need to allow marionette
+      "marionette",
+
+      // These will cause the content area to appear different, but won't
+      // meaningfully affect the chrome
+      "preferences", "search", "url",
+
+#ifndef MOZILLA_OFFICIAL
+      // On local builds, we want to allow -profile, because it's how `mach run`
+      // operates, and excluding that would create an unnecessary blind spot for
+      // Firefox devs.
+      "profile"
+#endif
+
+      // There are other arguments which are likely okay. However, they are
+      // not included here because this list is not intended to be
+      // exhaustive - it only intends to green-light some somewhat commonly
+      // used arguments. We want to err on the side of an unnecessary
+      // rejection of the command line.
+  };
+
+  int approvedArgumentsArraySize =
+      sizeof(approvedArgumentsArray) / sizeof(approvedArgumentsArray[0]);
+  Vector<const char*> approvedArguments;
+  if (!approvedArguments.reserve(approvedArgumentsArraySize)) {
+    return false;
+  }
+
+  for (int i = 0; i < approvedArgumentsArraySize; ++i) {
+    approvedArguments.infallibleAppend(approvedArgumentsArray[i]);
+  }
+
+#ifdef MOZILLA_OFFICIAL
+  int profileArgIndex = -1;
+  // If we're running mochitests or direct marionette tests, those specify a
+  // temporary profile, and we want to ensure that we get the added coverage
+  // from those.
+  for (int i = 1; i < argc; ++i) {
+    const char* flag = NormalizeFlag(argv[i]);
+    if (flag && !strcmp(flag, "marionette")) {
+      if (!approvedArguments.append("profile")) {
+        return false;
+      }
+      profileArgIndex = approvedArguments.length() - 1;
+
+      break;
+    }
+  }
+#else
+  int profileArgIndex = approvedArguments.length() - 1;
+#endif
+
+  for (int i = 1; i < argc; ++i) {
+    const char* flag = NormalizeFlag(argv[i]);
+    if (!flag) {
+      // If this is not a flag, then we interpret it as a URL, similar to
+      // BrowserContentHandler.jsm. Some command line options take additional
+      // arguments, which may or may not be URLs. We don't need to know this,
+      // because we don't need to parse them out; we just rely on the
+      // assumption that if arg X is actually a parameter for the preceding
+      // arg Y, then X must not look like a flag (starting with "--", "-",
+      // or "/").
+      //
+      // The most important thing here is the assumption that if something is
+      // going to meaningfully alter the appearance of the window itself, it
+      // must be a flag.
+      continue;
+    }
+
+    bool approved = false;
+    for (const char* approvedArg : approvedArguments) {
+      // We do a case-insensitive compare here with _stricmp. Even though some
+      // of these arguments are *not* read as case-insensitive, others *are*.
+      // Similar to the flag logic above, we don't really care about this
+      // distinction, because we don't need to parse the arguments - we just
+      // rely on the assumption that none of the listed flags in our
+      // approvedArguments are overloaded in such a way that a different
+      // casing would visually alter the firefox window.
+      if (!_stricmp(flag, approvedArg)) {
+        approved = true;
+
+        if (i == profileArgIndex) {
+          *explicitProfile = true;
+        }
+        break;
+      }
+    }
+
+    if (!approved) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static bool VerifyWindowDimensions(uint32_t windowWidth,
+                                   uint32_t windowHeight) {
+  return windowWidth <= kMaxWindowWidth && windowHeight <= kMaxWindowHeight;
+}
+
+void CreateAndStorePreXULSkeletonUI(HINSTANCE hInstance, int argc,
+                                    char** argv) {
+#ifdef MOZ_GECKO_PROFILER
+  const TimeStamp skeletonStart = TimeStamp::NowUnfuzzed();
+#endif
+
+  bool explicitProfile = false;
+  if (!AreAllCmdlineArgumentsApproved(argc, argv, &explicitProfile) ||
+      EnvHasValue("MOZ_SAFE_MODE_RESTART") || EnvHasValue("XRE_PROFILE_PATH") ||
+      EnvHasValue("MOZ_RESET_PROFILE_RESTART") || EnvHasValue("MOZ_HEADLESS")) {
+    sPreXULSkeletonUIDisallowed = true;
+    return;
+  }
+
+  HKEY regKey;
+  if (!IsWin10OrLater() || !OpenPreXULSkeletonUIRegKey(regKey)) {
+    return;
+  }
+  AutoCloseRegKey closeKey(regKey);
+
+  UniquePtr<wchar_t[]> binPath = GetBinaryPath();
+
+  DWORD dataLen = sizeof(uint32_t);
+  uint32_t enabled;
+  LSTATUS result = ::RegGetValueW(
+      regKey, nullptr,
+      GetRegValueName(binPath.get(), sEnabledRegSuffix).c_str(),
+      RRF_RT_REG_DWORD, nullptr, reinterpret_cast<PBYTE>(&enabled), &dataLen);
+  if (result != ERROR_SUCCESS || enabled == 0) {
+    return;
+  }
+  sPreXULSkeletonUIEnabled = true;
+
+  MOZ_ASSERT(!sAnimatedRects);
+  sAnimatedRects = new Vector<ColorRect>();
+
+  if (!LoadGdi32AndUser32Procedures()) {
+    return;
+  }
+
+  if (!TryGetSkeletonUILock()) {
+    printf_stderr("Error trying to get skeleton UI lock %lu\n", GetLastError());
+    return;
+  }
+
+  if (!explicitProfile && !CheckForStartWithLastProfile()) {
+    return;
+  }
+
+  WNDCLASSW wc;
+  wc.style = CS_DBLCLKS;
+  wc.lpfnWndProc = PreXULSkeletonUIProc;
+  wc.cbClsExtra = 0;
+  wc.cbWndExtra = 0;
+  wc.hInstance = hInstance;
+  wc.hIcon = sLoadIconW(::GetModuleHandleW(nullptr), gStockApplicationIcon);
+  wc.hCursor = sLoadCursorW(hInstance, gIDCWait);
+  wc.hbrBackground = nullptr;
+  wc.lpszMenuName = nullptr;
+
+  // TODO: just ensure we disable this if we've overridden the window class
+  wc.lpszClassName = L"MozillaWindowClass";
+
+  if (!sRegisterClassW(&wc)) {
+    printf_stderr("RegisterClassW error %lu\n", GetLastError());
+    return;
+  }
+
+  uint32_t screenX;
+  result = ::RegGetValueW(
+      regKey, nullptr,
+      GetRegValueName(binPath.get(), sScreenXRegSuffix).c_str(),
+      RRF_RT_REG_DWORD, nullptr, reinterpret_cast<PBYTE>(&screenX), &dataLen);
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Error reading screenX %lu\n", GetLastError());
+    return;
+  }
+
+  uint32_t screenY;
+  result = ::RegGetValueW(
+      regKey, nullptr,
+      GetRegValueName(binPath.get(), sScreenYRegSuffix).c_str(),
+      RRF_RT_REG_DWORD, nullptr, reinterpret_cast<PBYTE>(&screenY), &dataLen);
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Error reading screenY %lu\n", GetLastError());
+    return;
+  }
+
+  uint32_t windowWidth;
+  result = ::RegGetValueW(
+      regKey, nullptr, GetRegValueName(binPath.get(), sWidthRegSuffix).c_str(),
+      RRF_RT_REG_DWORD, nullptr, reinterpret_cast<PBYTE>(&windowWidth),
+      &dataLen);
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Error reading width %lu\n", GetLastError());
+    return;
+  }
+
+  uint32_t windowHeight;
+  result = ::RegGetValueW(
+      regKey, nullptr, GetRegValueName(binPath.get(), sHeightRegSuffix).c_str(),
+      RRF_RT_REG_DWORD, nullptr, reinterpret_cast<PBYTE>(&windowHeight),
+      &dataLen);
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Error reading height %lu\n", GetLastError());
+    return;
+  }
+
+  uint32_t maximized;
+  result = ::RegGetValueW(
+      regKey, nullptr,
+      GetRegValueName(binPath.get(), sMaximizedRegSuffix).c_str(),
+      RRF_RT_REG_DWORD, nullptr, reinterpret_cast<PBYTE>(&maximized), &dataLen);
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Error reading maximized %lu\n", GetLastError());
+    return;
+  }
+  sMaximized = maximized != 0;
+
+  EnumSet<SkeletonUIFlag, uint32_t> flags;
+  uint32_t flagsUint;
+  result = ::RegGetValueW(
+      regKey, nullptr, GetRegValueName(binPath.get(), sFlagsRegSuffix).c_str(),
+      RRF_RT_REG_DWORD, nullptr, reinterpret_cast<PBYTE>(&flagsUint), &dataLen);
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Error reading flags %lu\n", GetLastError());
+    return;
+  }
+  flags.deserialize(flagsUint);
+
+  dataLen = sizeof(double);
+  result = ::RegGetValueW(
+      regKey, nullptr,
+      GetRegValueName(binPath.get(), sCssToDevPixelScalingRegSuffix).c_str(),
+      RRF_RT_REG_BINARY, nullptr,
+      reinterpret_cast<PBYTE>(&sCSSToDevPixelScaling), &dataLen);
+  if (result != ERROR_SUCCESS || dataLen != sizeof(double)) {
+    printf_stderr("Error reading cssToDevPixelScaling %lu\n", GetLastError());
+    return;
+  }
+
+  int showCmd = SW_SHOWNORMAL;
+  DWORD windowStyle = kPreXULSkeletonUIWindowStyle;
+  if (sMaximized) {
+    showCmd = SW_SHOWMAXIMIZED;
+    windowStyle |= WS_MAXIMIZE;
+  }
+
+  dataLen = 2 * sizeof(double);
+  auto buffer = MakeUniqueFallible<wchar_t[]>(2 * sizeof(double));
+  if (!buffer) {
+    return;
+  }
+  result = ::RegGetValueW(
+      regKey, nullptr,
+      GetRegValueName(binPath.get(), sUrlbarCSSRegSuffix).c_str(),
+      RRF_RT_REG_BINARY, nullptr, reinterpret_cast<PBYTE>(buffer.get()),
+      &dataLen);
+  if (result != ERROR_SUCCESS || dataLen % (2 * sizeof(double)) != 0) {
+    printf_stderr("Error reading urlbar %lu\n", GetLastError());
+    return;
+  }
+
+  double* asDoubles = reinterpret_cast<double*>(buffer.get());
+  CSSPixelSpan urlbar;
+  urlbar.start = *(asDoubles++);
+  urlbar.end = *(asDoubles++);
+
+  result = ::RegGetValueW(
+      regKey, nullptr,
+      GetRegValueName(binPath.get(), sSearchbarRegSuffix).c_str(),
+      RRF_RT_REG_BINARY, nullptr, reinterpret_cast<PBYTE>(buffer.get()),
+      &dataLen);
+  if (result != ERROR_SUCCESS || dataLen % (2 * sizeof(double)) != 0) {
+    printf_stderr("Error reading searchbar %lu\n", GetLastError());
+    return;
+  }
+
+  asDoubles = reinterpret_cast<double*>(buffer.get());
+  CSSPixelSpan searchbar;
+  searchbar.start = *(asDoubles++);
+  searchbar.end = *(asDoubles++);
+
+  result = ::RegQueryValueExW(
+      regKey, GetRegValueName(binPath.get(), sSpringsCSSRegSuffix).c_str(),
+      nullptr, nullptr, nullptr, &dataLen);
+  if (result != ERROR_SUCCESS || dataLen % (2 * sizeof(double)) != 0) {
+    printf_stderr("Error reading springsCSS %lu\n", GetLastError());
+    return;
+  }
+
+  buffer = MakeUniqueFallible<wchar_t[]>(dataLen);
+  if (!buffer) {
+    return;
+  }
+  result = ::RegGetValueW(
+      regKey, nullptr,
+      GetRegValueName(binPath.get(), sSpringsCSSRegSuffix).c_str(),
+      RRF_RT_REG_BINARY, nullptr, reinterpret_cast<PBYTE>(buffer.get()),
+      &dataLen);
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Error reading springsCSS %lu\n", GetLastError());
+    return;
+  }
+
+  Vector<CSSPixelSpan> springs;
+  asDoubles = reinterpret_cast<double*>(buffer.get());
+  for (int i = 0; i < dataLen / (2 * sizeof(double)); i++) {
+    CSSPixelSpan spring;
+    spring.start = *(asDoubles++);
+    spring.end = *(asDoubles++);
+    if (!springs.append(spring)) {
+      return;
+    }
+  }
+
+  dataLen = sizeof(uint32_t);
+  uint32_t theme;
+  result = ::RegGetValueW(
+      regKey, nullptr, GetRegValueName(binPath.get(), sThemeRegSuffix).c_str(),
+      RRF_RT_REG_DWORD, nullptr, reinterpret_cast<PBYTE>(&theme), &dataLen);
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Error reading theme %lu\n", GetLastError());
+    return;
+  }
+  ThemeMode themeMode = static_cast<ThemeMode>(theme);
+  if (themeMode == ThemeMode::Default) {
+    if (IsSystemDarkThemeEnabled() == true) {
+      themeMode = ThemeMode::Dark;
+    }
+  }
+  ThemeColors currentTheme = GetTheme(themeMode);
+
+  if (!VerifyWindowDimensions(windowWidth, windowHeight)) {
+    printf_stderr("Bad window dimensions for skeleton UI.");
+    return;
+  }
+
+  sPreXULSkeletonUIWindow =
+      sCreateWindowExW(kPreXULSkeletonUIWindowStyleEx, L"MozillaWindowClass",
+                       L"", windowStyle, screenX, screenY, windowWidth,
+                       windowHeight, nullptr, nullptr, hInstance, nullptr);
+  sShowWindow(sPreXULSkeletonUIWindow, showCmd);
+
+  sDpi = sGetDpiForWindow(sPreXULSkeletonUIWindow);
+  sNonClientHorizontalMargins =
+      sGetSystemMetricsForDpi(SM_CXFRAME, sDpi) +
+      sGetSystemMetricsForDpi(SM_CXPADDEDBORDER, sDpi);
+  sNonClientVerticalMargins = sGetSystemMetricsForDpi(SM_CYFRAME, sDpi) +
+                              sGetSystemMetricsForDpi(SM_CXPADDEDBORDER, sDpi);
+
+  if (sMaximized) {
+    HMONITOR monitor =
+        sMonitorFromWindow(sPreXULSkeletonUIWindow, MONITOR_DEFAULTTONULL);
+    if (!monitor) {
+      // NOTE: we specifically don't clean up the window here. If we're unable
+      // to finish setting up the window how we want it, we still need to keep
+      // it around and consume it with the first real toplevel window we
+      // create, to avoid flickering.
+      return;
+    }
+    MONITORINFO mi = {sizeof(MONITORINFO)};
+    if (!sGetMonitorInfoW(monitor, &mi)) {
+      return;
+    }
+
+    sWindowWidth =
+        mi.rcWork.right - mi.rcWork.left + sNonClientHorizontalMargins * 2;
+    sWindowHeight =
+        mi.rcWork.bottom - mi.rcWork.top + sNonClientVerticalMargins * 2;
+  } else {
+    sWindowWidth = static_cast<int>(windowWidth);
+    sWindowHeight = static_cast<int>(windowHeight);
+  }
+
+  sSetWindowPos(sPreXULSkeletonUIWindow, 0, 0, 0, 0, 0,
+                SWP_FRAMECHANGED | SWP_NOACTIVATE | SWP_NOMOVE |
+                    SWP_NOOWNERZORDER | SWP_NOSIZE | SWP_NOZORDER);
+  DrawSkeletonUI(sPreXULSkeletonUIWindow, urlbar, searchbar, springs,
+                 currentTheme, flags);
+  if (sAnimatedRects) {
+    sPreXULSKeletonUIAnimationThread = ::CreateThread(
+        nullptr, 256 * 1024, AnimateSkeletonUI, nullptr, 0, nullptr);
+  }
+
+  BASE_PROFILER_MARKER_UNTYPED(
+      "CreatePreXULSkeletonUI", OTHER,
+      MarkerTiming::IntervalUntilNowFrom(skeletonStart));
+}
+
+bool WasPreXULSkeletonUIMaximized() { return sMaximized; }
+
+HWND ConsumePreXULSkeletonUIHandle() {
+  // NOTE: we need to make sure that everything that runs here is a no-op if
+  // it failed to be set, which is a possibility. If anything fails to be set
+  // we don't want to clean everything up right away, because if we have a
+  // blank window up, we want that to stick around and get consumed by nsWindow
+  // as normal, otherwise the window will flicker in and out, which we imagine
+  // is unpleasant.
+
+  // If we don't get 1 here, it means the thread is actually just sleeping, so
+  // we don't need to worry about giving out ownership of the window, because
+  // the thread will simply exit after its sleep. However, if it is 1, we need
+  // to wait for the thread to exit to be safe, as it could be doing anything.
+  if (InterlockedIncrement(&sAnimationControlFlag) == 1) {
+    ::WaitForSingleObject(sPreXULSKeletonUIAnimationThread, INFINITE);
+  }
+  ::CloseHandle(sPreXULSKeletonUIAnimationThread);
+  sPreXULSKeletonUIAnimationThread = nullptr;
+  HWND result = sPreXULSkeletonUIWindow;
+  sPreXULSkeletonUIWindow = nullptr;
+  free(sPixelBuffer);
+  sPixelBuffer = nullptr;
+  delete sAnimatedRects;
+  sAnimatedRects = nullptr;
+  return result;
+}
+
+void PersistPreXULSkeletonUIValues(const SkeletonUISettings& settings) {
+  if (!sPreXULSkeletonUIEnabled) {
+    return;
+  }
+
+  HKEY regKey;
+  if (!OpenPreXULSkeletonUIRegKey(regKey)) {
+    return;
+  }
+  AutoCloseRegKey closeKey(regKey);
+
+  UniquePtr<wchar_t[]> binPath = GetBinaryPath();
+
+  LSTATUS result;
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sScreenXRegSuffix).c_str(), 0,
+      REG_DWORD, reinterpret_cast<const BYTE*>(&settings.screenX),
+      sizeof(settings.screenX));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting screenX to Windows registry\n");
+    return;
+  }
+
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sScreenYRegSuffix).c_str(), 0,
+      REG_DWORD, reinterpret_cast<const BYTE*>(&settings.screenY),
+      sizeof(settings.screenY));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting screenY to Windows registry\n");
+    return;
+  }
+
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sWidthRegSuffix).c_str(), 0,
+      REG_DWORD, reinterpret_cast<const BYTE*>(&settings.width),
+      sizeof(settings.width));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting width to Windows registry\n");
+    return;
+  }
+
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sHeightRegSuffix).c_str(), 0,
+      REG_DWORD, reinterpret_cast<const BYTE*>(&settings.height),
+      sizeof(settings.height));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting height to Windows registry\n");
+    return;
+  }
+
+  DWORD maximizedDword = settings.maximized ? 1 : 0;
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sMaximizedRegSuffix).c_str(), 0,
+      REG_DWORD, reinterpret_cast<const BYTE*>(&maximizedDword),
+      sizeof(maximizedDword));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting maximized to Windows registry\n");
+  }
+
+  EnumSet<SkeletonUIFlag, uint32_t> flags;
+  if (settings.menubarShown) {
+    flags += SkeletonUIFlag::MenubarShown;
+  }
+  if (settings.bookmarksToolbarShown) {
+    flags += SkeletonUIFlag::BookmarksToolbarShown;
+  }
+  if (settings.rtlEnabled) {
+    flags += SkeletonUIFlag::RtlEnabled;
+  }
+  uint32_t flagsUint = flags.serialize();
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sFlagsRegSuffix).c_str(), 0,
+      REG_DWORD, reinterpret_cast<const BYTE*>(&flagsUint), sizeof(flagsUint));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting flags to Windows registry\n");
+    return;
+  }
+
+  result = ::RegSetValueExW(
+      regKey,
+      GetRegValueName(binPath.get(), sCssToDevPixelScalingRegSuffix).c_str(), 0,
+      REG_BINARY, reinterpret_cast<const BYTE*>(&settings.cssToDevPixelScaling),
+      sizeof(settings.cssToDevPixelScaling));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr(
+        "Failed persisting cssToDevPixelScaling to Windows registry\n");
+    return;
+  }
+
+  double urlbar[2];
+  urlbar[0] = settings.urlbarSpan.start;
+  urlbar[1] = settings.urlbarSpan.end;
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sUrlbarCSSRegSuffix).c_str(), 0,
+      REG_BINARY, reinterpret_cast<const BYTE*>(urlbar), sizeof(urlbar));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting urlbar to Windows registry\n");
+    return;
+  }
+
+  double searchbar[2];
+  searchbar[0] = settings.searchbarSpan.start;
+  searchbar[1] = settings.searchbarSpan.end;
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sSearchbarRegSuffix).c_str(), 0,
+      REG_BINARY, reinterpret_cast<const BYTE*>(searchbar), sizeof(searchbar));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting searchbar to Windows registry\n");
+    return;
+  }
+
+  Vector<double> springValues;
+  if (!springValues.reserve(settings.springs.length() * 2)) {
+    return;
+  }
+
+  for (auto spring : settings.springs) {
+    springValues.infallibleAppend(spring.start);
+    springValues.infallibleAppend(spring.end);
+  }
+
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sSpringsCSSRegSuffix).c_str(), 0,
+      REG_BINARY, reinterpret_cast<const BYTE*>(springValues.begin()),
+      springValues.length() * sizeof(double));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting springsCSS to Windows registry\n");
+    return;
+  }
+}
+
+MFBT_API bool GetPreXULSkeletonUIEnabled() { return sPreXULSkeletonUIEnabled; }
+
+MFBT_API void SetPreXULSkeletonUIEnabledIfAllowed(bool value) {
+  // If the pre-XUL skeleton UI was disallowed for some reason, we just want to
+  // ignore changes to the registry. An example of how things could be bad if
+  // we didn't: someone running firefox with the -profile argument could
+  // turn the skeleton UI on or off for the default profile. Turning it off
+  // maybe isn't so bad (though it's likely still incorrect), but turning it
+  // on could be bad if the user had specifically disabled it for a profile for
+  // some reason. Ultimately there's no correct decision here, and the
+  // messiness of this is just a consequence of sharing the registry values
+  // across profiles. However, whatever ill effects we observe should be
+  // correct themselves after one session.
+  if (sPreXULSkeletonUIDisallowed) {
+    return;
+  }
+
+  HKEY regKey;
+  if (!OpenPreXULSkeletonUIRegKey(regKey)) {
+    return;
+  }
+  AutoCloseRegKey closeKey(regKey);
+
+  UniquePtr<wchar_t[]> binPath = GetBinaryPath();
+  DWORD enabled = value;
+  LSTATUS result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sEnabledRegSuffix).c_str(), 0,
+      REG_DWORD, reinterpret_cast<PBYTE>(&enabled), sizeof(enabled));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting enabled to Windows registry\n");
+    return;
+  }
+
+  if (!sPreXULSkeletonUIEnabled && value) {
+    // We specifically don't care if we fail to get this lock. We just want to
+    // do our best effort to lock it so that future instances don't create
+    // skeleton UIs while we're still running, since they will immediately exit
+    // and tell us to open a new window.
+    Unused << TryGetSkeletonUILock();
+  }
+
+  sPreXULSkeletonUIEnabled = value;
+}
+
+MFBT_API void SetPreXULSkeletonUIThemeId(ThemeMode theme) {
+  if (theme == sTheme) {
+    return;
+  }
+
+  HKEY regKey;
+  if (!OpenPreXULSkeletonUIRegKey(regKey)) {
+    return;
+  }
+  AutoCloseRegKey closeKey(regKey);
+
+  UniquePtr<wchar_t[]> binPath = GetBinaryPath();
+  uint32_t themeId = (uint32_t)theme;
+  LSTATUS result;
+  result = ::RegSetValueExW(
+      regKey, GetRegValueName(binPath.get(), sThemeRegSuffix).c_str(), 0,
+      REG_DWORD, reinterpret_cast<PBYTE>(&themeId), sizeof(themeId));
+  if (result != ERROR_SUCCESS) {
+    printf_stderr("Failed persisting theme to Windows registry\n");
+    sTheme = ThemeMode::Invalid;
+    return;
+  }
+  sTheme = static_cast<ThemeMode>(themeId);
+}
+
+MFBT_API void PollPreXULSkeletonUIEvents() {
+  if (sPreXULSkeletonUIEnabled && sPreXULSkeletonUIWindow) {
+    MSG outMsg = {};
+    PeekMessageW(&outMsg, sPreXULSkeletonUIWindow, 0, 0, 0);
+  }
+}
+
+}  // namespace mozilla
diff --git a/mozglue/misc/PreXULSkeletonUI.h b/mozglue/misc/PreXULSkeletonUI.h
new file mode 100644
index 0000000000..6076dff48b
--- /dev/null
+++ b/mozglue/misc/PreXULSkeletonUI.h
@@ -0,0 +1,82 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef PreXULSkeletonUI_h_
+#define PreXULSkeletonUI_h_
+
+#include <windows.h>
+#include "mozilla/EnumSet.h"
+#include "mozilla/Types.h"
+#include "mozilla/Vector.h"
+
+namespace mozilla {
+
+// These unfortunately need to be kept in sync with the window style and
+// extended window style computations in nsWindow. Luckily those styles seem
+// to not vary based off of any user settings for the initial toplevel window,
+// so we're safe here for now.
+static const DWORD kPreXULSkeletonUIWindowStyle =
+    WS_CLIPCHILDREN | WS_DLGFRAME | WS_BORDER | WS_MAXIMIZEBOX |
+    WS_MINIMIZEBOX | WS_SIZEBOX | WS_SYSMENU;
+static const DWORD kPreXULSkeletonUIWindowStyleEx = WS_EX_WINDOWEDGE;
+
+struct CSSPixelSpan {
+  double start;
+  double end;
+};
+
+struct DevPixelSpan {
+  int start;
+  int end;
+};
+
+struct SkeletonUISettings {
+  uint32_t screenX;
+  uint32_t screenY;
+  uint32_t width;
+  uint32_t height;
+  CSSPixelSpan urlbarSpan;
+  CSSPixelSpan searchbarSpan;
+  double cssToDevPixelScaling;
+  Vector<CSSPixelSpan> springs;
+  bool maximized;
+  bool menubarShown;
+  bool bookmarksToolbarShown;
+  bool rtlEnabled;
+};
+
+enum class ThemeMode : uint32_t { Invalid, Default, Dark, Light };
+
+enum class SkeletonUIFlag : uint8_t {
+  MenubarShown,
+  BookmarksToolbarShown,
+  RtlEnabled,
+};
+
+struct ThemeColors {
+  uint32_t backgroundColor;
+  uint32_t toolbarForegroundColor;
+  uint32_t tabBarColor;
+  uint32_t chromeContentDividerColor;
+  uint32_t tabLineColor;
+  uint32_t urlbarColor;
+  uint32_t urlbarBorderColor;
+  uint32_t animationColor;
+};
+
+MFBT_API void CreateAndStorePreXULSkeletonUI(HINSTANCE hInstance, int argc,
+                                             char** argv);
+MFBT_API HWND ConsumePreXULSkeletonUIHandle();
+MFBT_API bool WasPreXULSkeletonUIMaximized();
+MFBT_API void PersistPreXULSkeletonUIValues(const SkeletonUISettings& settings);
+MFBT_API bool GetPreXULSkeletonUIEnabled();
+MFBT_API void SetPreXULSkeletonUIEnabledIfAllowed(bool value);
+MFBT_API void PollPreXULSkeletonUIEvents();
+MFBT_API void SetPreXULSkeletonUIThemeId(ThemeMode theme);
+
+}  // namespace mozilla
+
+#endif
diff --git a/mozglue/misc/Printf.cpp b/mozglue/misc/Printf.cpp
new file mode 100644
index 0000000000..4d3306e513
--- /dev/null
+++ b/mozglue/misc/Printf.cpp
@@ -0,0 +1,952 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Portable safe sprintf code.
+ *
+ * Author: Kipp E.B. Hickman
+ */
+
+#include "mozilla/AllocPolicy.h"
+#include "mozilla/Likely.h"
+#include "mozilla/Printf.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/UniquePtrExtensions.h"
+#include "mozilla/Vector.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(XP_WIN)
+#  include <windows.h>
+#endif
+
+/*
+ * Note: on some platforms va_list is defined as an array,
+ * and requires array notation.
+ */
+#ifdef HAVE_VA_COPY
+#  define VARARGS_ASSIGN(foo, bar) VA_COPY(foo, bar)
+#elif defined(HAVE_VA_LIST_AS_ARRAY)
+#  define VARARGS_ASSIGN(foo, bar) foo[0] = bar[0]
+#else
+#  define VARARGS_ASSIGN(foo, bar) (foo) = (bar)
+#endif
+
+/*
+ * Numbered Argument State
+ */
+struct NumArgState {
+  int type;    // type of the current ap
+  va_list ap;  // point to the corresponding position on ap
+};
+
+typedef mozilla::Vector<NumArgState, 20, mozilla::MallocAllocPolicy>
+    NumArgStateVector;
+
+#define TYPE_SHORT 0
+#define TYPE_USHORT 1
+#define TYPE_INTN 2
+#define TYPE_UINTN 3
+#define TYPE_LONG 4
+#define TYPE_ULONG 5
+#define TYPE_LONGLONG 6
+#define TYPE_ULONGLONG 7
+#define TYPE_STRING 8
+#define TYPE_DOUBLE 9
+#define TYPE_INTSTR 10
+#define TYPE_POINTER 11
+#if defined(XP_WIN)
+#  define TYPE_WSTRING 12
+#endif
+#define TYPE_UNKNOWN 20
+
+#define FLAG_LEFT 0x1
+#define FLAG_SIGNED 0x2
+#define FLAG_SPACED 0x4
+#define FLAG_ZEROS 0x8
+#define FLAG_NEG 0x10
+
+static const char hex[] = "0123456789abcdef";
+static const char HEX[] = "0123456789ABCDEF";
+
+// Fill into the buffer using the data in src
+bool mozilla::PrintfTarget::fill2(const char* src, int srclen, int width,
+                                  int flags) {
+  char space = ' ';
+
+  width -= srclen;
+  if (width > 0 && (flags & FLAG_LEFT) == 0) {  // Right adjusting
+    if (flags & FLAG_ZEROS) space = '0';
+    while (--width >= 0) {
+      if (!emit(&space, 1)) return false;
+    }
+  }
+
+  // Copy out the source data
+  if (!emit(src, srclen)) return false;
+
+  if (width > 0 && (flags & FLAG_LEFT) != 0) {  // Left adjusting
+    while (--width >= 0) {
+      if (!emit(&space, 1)) return false;
+    }
+  }
+  return true;
+}
+
+/*
+ * Fill a number. The order is: optional-sign zero-filling conversion-digits
+ */
+bool mozilla::PrintfTarget::fill_n(const char* src, int srclen, int width,
+                                   int prec, int type, int flags) {
+  int zerowidth = 0;
+  int precwidth = 0;
+  int signwidth = 0;
+  int leftspaces = 0;
+  int rightspaces = 0;
+  int cvtwidth;
+  char sign;
+
+  if ((type & 1) == 0) {
+    if (flags & FLAG_NEG) {
+      sign = '-';
+      signwidth = 1;
+    } else if (flags & FLAG_SIGNED) {
+      sign = '+';
+      signwidth = 1;
+    } else if (flags & FLAG_SPACED) {
+      sign = ' ';
+      signwidth = 1;
+    }
+  }
+  cvtwidth = signwidth + srclen;
+
+  if (prec > 0) {
+    if (prec > srclen) {
+      precwidth = prec - srclen;  // Need zero filling
+      cvtwidth += precwidth;
+    }
+  }
+
+  if ((flags & FLAG_ZEROS) && (prec < 0)) {
+    if (width > cvtwidth) {
+      zerowidth = width - cvtwidth;  // Zero filling
+      cvtwidth += zerowidth;
+    }
+  }
+
+  if (flags & FLAG_LEFT) {
+    if (width > cvtwidth) {
+      // Space filling on the right (i.e. left adjusting)
+      rightspaces = width - cvtwidth;
+    }
+  } else {
+    if (width > cvtwidth) {
+      // Space filling on the left (i.e. right adjusting)
+      leftspaces = width - cvtwidth;
+    }
+  }
+  while (--leftspaces >= 0) {
+    if (!emit(" ", 1)) return false;
+  }
+  if (signwidth) {
+    if (!emit(&sign, 1)) return false;
+  }
+  while (--precwidth >= 0) {
+    if (!emit("0", 1)) return false;
+  }
+  while (--zerowidth >= 0) {
+    if (!emit("0", 1)) return false;
+  }
+  if (!emit(src, uint32_t(srclen))) return false;
+  while (--rightspaces >= 0) {
+    if (!emit(" ", 1)) return false;
+  }
+  return true;
+}
+
+// All that the cvt_* functions care about as far as the TYPE_* constants is
+// that the low bit is set to indicate unsigned, or unset to indicate signed.
+// So we don't try to hard to ensure that the passed TYPE_* constant lines
+// up with the actual size of the number being printed here.  The main printf
+// code, below, does have to care so that the correct bits are extracted from
+// the varargs list.
+bool mozilla::PrintfTarget::appendIntDec(int32_t num) {
+  int flags = 0;
+  long n = num;
+  if (n < 0) {
+    n = -n;
+    flags |= FLAG_NEG;
+  }
+  return cvt_l(n, -1, -1, 10, TYPE_INTN, flags, hex);
+}
+
+bool mozilla::PrintfTarget::appendIntDec(uint32_t num) {
+  return cvt_l(num, -1, -1, 10, TYPE_UINTN, 0, hex);
+}
+
+bool mozilla::PrintfTarget::appendIntOct(uint32_t num) {
+  return cvt_l(num, -1, -1, 8, TYPE_UINTN, 0, hex);
+}
+
+bool mozilla::PrintfTarget::appendIntHex(uint32_t num) {
+  return cvt_l(num, -1, -1, 16, TYPE_UINTN, 0, hex);
+}
+
+bool mozilla::PrintfTarget::appendIntDec(int64_t num) {
+  int flags = 0;
+  if (num < 0) {
+    num = -num;
+    flags |= FLAG_NEG;
+  }
+  return cvt_ll(num, -1, -1, 10, TYPE_INTN, flags, hex);
+}
+
+bool mozilla::PrintfTarget::appendIntDec(uint64_t num) {
+  return cvt_ll(num, -1, -1, 10, TYPE_UINTN, 0, hex);
+}
+
+bool mozilla::PrintfTarget::appendIntOct(uint64_t num) {
+  return cvt_ll(num, -1, -1, 8, TYPE_UINTN, 0, hex);
+}
+
+bool mozilla::PrintfTarget::appendIntHex(uint64_t num) {
+  return cvt_ll(num, -1, -1, 16, TYPE_UINTN, 0, hex);
+}
+
+/* Convert a long into its printable form. */
+bool mozilla::PrintfTarget::cvt_l(long num, int width, int prec, int radix,
+                                  int type, int flags, const char* hexp) {
+  char cvtbuf[100];
+  char* cvt;
+  int digits;
+
+  // according to the man page this needs to happen
+  if ((prec == 0) && (num == 0)) return true;
+
+  // Converting decimal is a little tricky. In the unsigned case we
+  // need to stop when we hit 10 digits. In the signed case, we can
+  // stop when the number is zero.
+  cvt = cvtbuf + sizeof(cvtbuf);
+  digits = 0;
+  while (num) {
+    int digit = (((unsigned long)num) % radix) & 0xF;
+    *--cvt = hexp[digit];
+    digits++;
+    num = (long)(((unsigned long)num) / radix);
+  }
+  if (digits == 0) {
+    *--cvt = '0';
+    digits++;
+  }
+
+  // Now that we have the number converted without its sign, deal with
+  // the sign and zero padding.
+  return fill_n(cvt, digits, width, prec, type, flags);
+}
+
+/* Convert a 64-bit integer into its printable form. */
+bool mozilla::PrintfTarget::cvt_ll(int64_t num, int width, int prec, int radix,
+                                   int type, int flags, const char* hexp) {
+  // According to the man page, this needs to happen.
+  if (prec == 0 && num == 0) return true;
+
+  // Converting decimal is a little tricky. In the unsigned case we
+  // need to stop when we hit 10 digits. In the signed case, we can
+  // stop when the number is zero.
+  int64_t rad = int64_t(radix);
+  char cvtbuf[100];
+  char* cvt = cvtbuf + sizeof(cvtbuf);
+  int digits = 0;
+  while (num != 0) {
+    int64_t quot = uint64_t(num) / rad;
+    int64_t rem = uint64_t(num) % rad;
+    int32_t digit = int32_t(rem);
+    *--cvt = hexp[digit & 0xf];
+    digits++;
+    num = quot;
+  }
+  if (digits == 0) {
+    *--cvt = '0';
+    digits++;
+  }
+
+  // Now that we have the number converted without its sign, deal with
+  // the sign and zero padding.
+  return fill_n(cvt, digits, width, prec, type, flags);
+}
+
+/*
+ * Convert a double precision floating point number into its printable
+ * form.
+ */
+bool mozilla::PrintfTarget::cvt_f(double d, const char* fmt0,
+                                  const char* fmt1) {
+  char fin[20];
+  // The size is chosen such that we can print DBL_MAX.  See bug#1350097.
+  char fout[320];
+  int amount = fmt1 - fmt0;
+
+  MOZ_ASSERT((amount > 0) && (amount < (int)sizeof(fin)));
+  if (amount >= (int)sizeof(fin)) {
+    // Totally bogus % command to sprintf. Just ignore it
+    return true;
+  }
+  memcpy(fin, fmt0, (size_t)amount);
+  fin[amount] = 0;
+
+  // Convert floating point using the native snprintf code
+#ifdef DEBUG
+  {
+    const char* p = fin;
+    while (*p) {
+      MOZ_ASSERT(*p != 'L');
+      p++;
+    }
+  }
+#endif
+  size_t len = SprintfLiteral(fout, fin, d);
+  // Note that SprintfLiteral will always write a \0 at the end, so a
+  // "<=" check here would be incorrect -- the buffer size passed to
+  // snprintf includes the trailing \0, but the returned length does
+  // not.
+  if (MOZ_LIKELY(len < sizeof(fout))) {
+    return emit(fout, len);
+  }
+
+  // Maybe the user used "%500.500f" or something like that.
+  size_t buf_size = len + 1;
+  UniqueFreePtr<char> buf((char*)malloc(buf_size));
+  if (!buf) {
+    return false;
+  }
+  len = snprintf(buf.get(), buf_size, fin, d);
+  // If this assert fails, then SprintfLiteral has a bug -- and in
+  // this case we would like to learn of it, which is why there is a
+  // release assert.
+  MOZ_RELEASE_ASSERT(len < buf_size);
+
+  return emit(buf.get(), len);
+}
+
+/*
+ * Convert a string into its printable form.  "width" is the output
+ * width. "prec" is the maximum number of characters of "s" to output,
+ * where -1 means until NUL.
+ */
+bool mozilla::PrintfTarget::cvt_s(const char* s, int width, int prec,
+                                  int flags) {
+  if (prec == 0) return true;
+  if (!s) s = "(null)";
+
+  // Limit string length by precision value
+  int slen = int(strlen(s));
+  if (0 < prec && prec < slen) slen = prec;
+
+  // and away we go
+  return fill2(s, slen, width, flags);
+}
+
+/*
+ * BuildArgArray stands for Numbered Argument list Sprintf
+ * for example,
+ *      fmp = "%4$i, %2$d, %3s, %1d";
+ * the number must start from 1, and no gap among them
+ */
+static bool BuildArgArray(const char* fmt, va_list ap, NumArgStateVector& nas) {
+  size_t number = 0, cn = 0, i;
+  const char* p;
+  char c;
+
+  // First pass:
+  // Detemine how many legal % I have got, then allocate space.
+
+  p = fmt;
+  i = 0;
+  while ((c = *p++) != 0) {
+    if (c != '%') continue;
+    if ((c = *p++) == '%')  // skip %% case
+      continue;
+
+    while (c != 0) {
+      if (c > '9' || c < '0') {
+        if (c == '$') {  // numbered argument case
+          if (i > 0) MOZ_CRASH("Bad format string");
+          number++;
+        } else {  // non-numbered argument case
+          if (number > 0) MOZ_CRASH("Bad format string");
+          i = 1;
+        }
+        break;
+      }
+
+      c = *p++;
+    }
+  }
+
+  if (number == 0) return true;
+
+  // Only allow a limited number of arguments.
+  MOZ_RELEASE_ASSERT(number <= 20);
+
+  if (!nas.growByUninitialized(number)) return false;
+
+  for (i = 0; i < number; i++) nas[i].type = TYPE_UNKNOWN;
+
+  // Second pass:
+  // Set nas[].type.
+
+  p = fmt;
+  while ((c = *p++) != 0) {
+    if (c != '%') continue;
+    c = *p++;
+    if (c == '%') continue;
+
+    cn = 0;
+    while (c && c != '$') {  // should improve error check later
+      cn = cn * 10 + c - '0';
+      c = *p++;
+    }
+
+    if (!c || cn < 1 || cn > number) MOZ_CRASH("Bad format string");
+
+    // nas[cn] starts from 0, and make sure nas[cn].type is not assigned.
+    cn--;
+    if (nas[cn].type != TYPE_UNKNOWN) continue;
+
+    c = *p++;
+
+    // flags
+    while ((c == '-') || (c == '+') || (c == ' ') || (c == '0')) {
+      c = *p++;
+    }
+
+    // width
+    if (c == '*') {
+      // not supported feature, for the argument is not numbered
+      MOZ_CRASH("Bad format string");
+    }
+
+    while ((c >= '0') && (c <= '9')) {
+      c = *p++;
+    }
+
+    // precision
+    if (c == '.') {
+      c = *p++;
+      if (c == '*') {
+        // not supported feature, for the argument is not numbered
+        MOZ_CRASH("Bad format string");
+      }
+
+      while ((c >= '0') && (c <= '9')) {
+        c = *p++;
+      }
+    }
+
+    // size
+    nas[cn].type = TYPE_INTN;
+    if (c == 'h') {
+      nas[cn].type = TYPE_SHORT;
+      c = *p++;
+    } else if (c == 'L') {
+      nas[cn].type = TYPE_LONGLONG;
+      c = *p++;
+    } else if (c == 'l') {
+      nas[cn].type = TYPE_LONG;
+      c = *p++;
+      if (c == 'l') {
+        nas[cn].type = TYPE_LONGLONG;
+        c = *p++;
+      }
+    } else if (c == 'z' || c == 'I') {
+      static_assert(sizeof(size_t) == sizeof(int) ||
+                        sizeof(size_t) == sizeof(long) ||
+                        sizeof(size_t) == sizeof(long long),
+                    "size_t is not one of the expected sizes");
+      nas[cn].type = sizeof(size_t) == sizeof(int)    ? TYPE_INTN
+                     : sizeof(size_t) == sizeof(long) ? TYPE_LONG
+                                                      : TYPE_LONGLONG;
+      c = *p++;
+    }
+
+    // format
+    switch (c) {
+      case 'd':
+      case 'c':
+      case 'i':
+        break;
+
+      case 'o':
+      case 'u':
+      case 'x':
+      case 'X':
+        // Mark as unsigned type.
+        nas[cn].type |= 1;
+        break;
+
+      case 'e':
+      case 'f':
+      case 'g':
+        nas[cn].type = TYPE_DOUBLE;
+        break;
+
+      case 'p':
+        nas[cn].type = TYPE_POINTER;
+        break;
+
+      case 'S':
+#if defined(XP_WIN)
+        nas[cn].type = TYPE_WSTRING;
+#else
+        MOZ_ASSERT(0);
+        nas[cn].type = TYPE_UNKNOWN;
+#endif
+        break;
+
+      case 's':
+#if defined(XP_WIN)
+        if (nas[cn].type == TYPE_LONG) {
+          nas[cn].type = TYPE_WSTRING;
+          break;
+        }
+#endif
+        // Other type sizes are not supported here.
+        MOZ_ASSERT(nas[cn].type == TYPE_INTN);
+        nas[cn].type = TYPE_STRING;
+        break;
+
+      case 'n':
+        nas[cn].type = TYPE_INTSTR;
+        break;
+
+      default:
+        MOZ_ASSERT(0);
+        nas[cn].type = TYPE_UNKNOWN;
+        break;
+    }
+
+    // get a legal para.
+    if (nas[cn].type == TYPE_UNKNOWN) MOZ_CRASH("Bad format string");
+  }
+
+  // Third pass:
+  // Fill nas[].ap.
+
+  cn = 0;
+  while (cn < number) {
+    // A TYPE_UNKNOWN here means that the format asked for a
+    // positional argument without specifying the meaning of some
+    // earlier argument.
+    MOZ_ASSERT(nas[cn].type != TYPE_UNKNOWN);
+
+    VARARGS_ASSIGN(nas[cn].ap, ap);
+
+    switch (nas[cn].type) {
+      case TYPE_SHORT:
+      case TYPE_USHORT:
+      case TYPE_INTN:
+      case TYPE_UINTN:
+        (void)va_arg(ap, int);
+        break;
+      case TYPE_LONG:
+        (void)va_arg(ap, long);
+        break;
+      case TYPE_ULONG:
+        (void)va_arg(ap, unsigned long);
+        break;
+      case TYPE_LONGLONG:
+        (void)va_arg(ap, long long);
+        break;
+      case TYPE_ULONGLONG:
+        (void)va_arg(ap, unsigned long long);
+        break;
+      case TYPE_STRING:
+        (void)va_arg(ap, char*);
+        break;
+      case TYPE_INTSTR:
+        (void)va_arg(ap, int*);
+        break;
+      case TYPE_DOUBLE:
+        (void)va_arg(ap, double);
+        break;
+      case TYPE_POINTER:
+        (void)va_arg(ap, void*);
+        break;
+#if defined(XP_WIN)
+      case TYPE_WSTRING:
+        (void)va_arg(ap, wchar_t*);
+        break;
+#endif
+
+      default:
+        MOZ_CRASH();
+    }
+
+    cn++;
+  }
+
+  return true;
+}
+
+mozilla::PrintfTarget::PrintfTarget() : mEmitted(0) {}
+
+bool mozilla::PrintfTarget::vprint(const char* fmt, va_list ap) {
+  char c;
+  int flags, width, prec, radix, type;
+  union {
+    char ch;
+    int i;
+    long l;
+    long long ll;
+    double d;
+    const char* s;
+    int* ip;
+    void* p;
+#if defined(XP_WIN)
+    const wchar_t* ws;
+#endif
+  } u;
+  const char* fmt0;
+  const char* hexp;
+  int i;
+  char pattern[20];
+  const char* dolPt = nullptr;  // in "%4$.2f", dolPt will point to '.'
+
+  // Build an argument array, IF the fmt is numbered argument
+  // list style, to contain the Numbered Argument list pointers.
+
+  NumArgStateVector nas;
+  if (!BuildArgArray(fmt, ap, nas)) {
+    // the fmt contains error Numbered Argument format, jliu@netscape.com
+    MOZ_CRASH("Bad format string");
+  }
+
+  while ((c = *fmt++) != 0) {
+    if (c != '%') {
+      if (!emit(fmt - 1, 1)) return false;
+
+      continue;
+    }
+    fmt0 = fmt - 1;
+
+    // Gobble up the % format string. Hopefully we have handled all
+    // of the strange cases!
+    flags = 0;
+    c = *fmt++;
+    if (c == '%') {
+      // quoting a % with %%
+      if (!emit(fmt - 1, 1)) return false;
+
+      continue;
+    }
+
+    if (!nas.empty()) {
+      // the fmt contains the Numbered Arguments feature
+      i = 0;
+      while (c && c != '$') {  // should improve error check later
+        i = (i * 10) + (c - '0');
+        c = *fmt++;
+      }
+
+      if (nas[i - 1].type == TYPE_UNKNOWN) MOZ_CRASH("Bad format string");
+
+      ap = nas[i - 1].ap;
+      dolPt = fmt;
+      c = *fmt++;
+    }
+
+    // Examine optional flags.  Note that we do not implement the
+    // '#' flag of sprintf().  The ANSI C spec. of the '#' flag is
+    // somewhat ambiguous and not ideal, which is perhaps why
+    // the various sprintf() implementations are inconsistent
+    // on this feature.
+    while ((c == '-') || (c == '+') || (c == ' ') || (c == '0')) {
+      if (c == '-') flags |= FLAG_LEFT;
+      if (c == '+') flags |= FLAG_SIGNED;
+      if (c == ' ') flags |= FLAG_SPACED;
+      if (c == '0') flags |= FLAG_ZEROS;
+      c = *fmt++;
+    }
+    if (flags & FLAG_SIGNED) flags &= ~FLAG_SPACED;
+    if (flags & FLAG_LEFT) flags &= ~FLAG_ZEROS;
+
+    // width
+    if (c == '*') {
+      c = *fmt++;
+      width = va_arg(ap, int);
+      if (width < 0) {
+        width = -width;
+        flags |= FLAG_LEFT;
+        flags &= ~FLAG_ZEROS;
+      }
+    } else {
+      width = 0;
+      while ((c >= '0') && (c <= '9')) {
+        width = (width * 10) + (c - '0');
+        c = *fmt++;
+      }
+    }
+
+    // precision
+    prec = -1;
+    if (c == '.') {
+      c = *fmt++;
+      if (c == '*') {
+        c = *fmt++;
+        prec = va_arg(ap, int);
+      } else {
+        prec = 0;
+        while ((c >= '0') && (c <= '9')) {
+          prec = (prec * 10) + (c - '0');
+          c = *fmt++;
+        }
+      }
+    }
+
+    // size
+    type = TYPE_INTN;
+    if (c == 'h') {
+      type = TYPE_SHORT;
+      c = *fmt++;
+    } else if (c == 'L') {
+      type = TYPE_LONGLONG;
+      c = *fmt++;
+    } else if (c == 'l') {
+      type = TYPE_LONG;
+      c = *fmt++;
+      if (c == 'l') {
+        type = TYPE_LONGLONG;
+        c = *fmt++;
+      }
+    } else if (c == 'z' || c == 'I') {
+      static_assert(sizeof(size_t) == sizeof(int) ||
+                        sizeof(size_t) == sizeof(long) ||
+                        sizeof(size_t) == sizeof(long long),
+                    "size_t is not one of the expected sizes");
+      type = sizeof(size_t) == sizeof(int)    ? TYPE_INTN
+             : sizeof(size_t) == sizeof(long) ? TYPE_LONG
+                                              : TYPE_LONGLONG;
+      c = *fmt++;
+    }
+
+    // format
+    hexp = hex;
+    switch (c) {
+      case 'd':
+      case 'i':  // decimal/integer
+        radix = 10;
+        goto fetch_and_convert;
+
+      case 'o':  // octal
+        radix = 8;
+        type |= 1;
+        goto fetch_and_convert;
+
+      case 'u':  // unsigned decimal
+        radix = 10;
+        type |= 1;
+        goto fetch_and_convert;
+
+      case 'x':  // unsigned hex
+        radix = 16;
+        type |= 1;
+        goto fetch_and_convert;
+
+      case 'X':  // unsigned HEX
+        radix = 16;
+        hexp = HEX;
+        type |= 1;
+        goto fetch_and_convert;
+
+      fetch_and_convert:
+        switch (type) {
+          case TYPE_SHORT:
+            u.l = va_arg(ap, int);
+            if (u.l < 0) {
+              u.l = -u.l;
+              flags |= FLAG_NEG;
+            }
+            goto do_long;
+          case TYPE_USHORT:
+            u.l = (unsigned short)va_arg(ap, unsigned int);
+            goto do_long;
+          case TYPE_INTN:
+            u.l = va_arg(ap, int);
+            if (u.l < 0) {
+              u.l = -u.l;
+              flags |= FLAG_NEG;
+            }
+            goto do_long;
+          case TYPE_UINTN:
+            u.l = (long)va_arg(ap, unsigned int);
+            goto do_long;
+
+          case TYPE_LONG:
+            u.l = va_arg(ap, long);
+            if (u.l < 0) {
+              u.l = -u.l;
+              flags |= FLAG_NEG;
+            }
+            goto do_long;
+          case TYPE_ULONG:
+            u.l = (long)va_arg(ap, unsigned long);
+          do_long:
+            if (!cvt_l(u.l, width, prec, radix, type, flags, hexp))
+              return false;
+
+            break;
+
+          case TYPE_LONGLONG:
+            u.ll = va_arg(ap, long long);
+            if (u.ll < 0) {
+              u.ll = -u.ll;
+              flags |= FLAG_NEG;
+            }
+            goto do_longlong;
+          case TYPE_POINTER:
+            u.ll = (uintptr_t)va_arg(ap, void*);
+            goto do_longlong;
+          case TYPE_ULONGLONG:
+            u.ll = va_arg(ap, unsigned long long);
+          do_longlong:
+            if (!cvt_ll(u.ll, width, prec, radix, type, flags, hexp))
+              return false;
+
+            break;
+        }
+        break;
+
+      case 'e':
+      case 'E':
+      case 'f':
+      case 'g':
+        u.d = va_arg(ap, double);
+        if (!nas.empty()) {
+          i = fmt - dolPt;
+          if (i < int(sizeof(pattern))) {
+            pattern[0] = '%';
+            memcpy(&pattern[1], dolPt, size_t(i));
+            if (!cvt_f(u.d, pattern, &pattern[i + 1])) return false;
+          }
+        } else {
+          if (!cvt_f(u.d, fmt0, fmt)) return false;
+        }
+
+        break;
+
+      case 'c':
+        if ((flags & FLAG_LEFT) == 0) {
+          while (width-- > 1) {
+            if (!emit(" ", 1)) return false;
+          }
+        }
+        switch (type) {
+          case TYPE_SHORT:
+          case TYPE_INTN:
+            u.ch = va_arg(ap, int);
+            if (!emit(&u.ch, 1)) return false;
+            break;
+        }
+        if (flags & FLAG_LEFT) {
+          while (width-- > 1) {
+            if (!emit(" ", 1)) return false;
+          }
+        }
+        break;
+
+      case 'p':
+        type = TYPE_POINTER;
+        radix = 16;
+        goto fetch_and_convert;
+
+      case 's':
+        if (type == TYPE_INTN) {
+          u.s = va_arg(ap, const char*);
+          if (!cvt_s(u.s, width, prec, flags)) return false;
+          break;
+        }
+        MOZ_ASSERT(type == TYPE_LONG);
+        [[fallthrough]];
+      case 'S':
+#if defined(XP_WIN)
+      {
+        u.ws = va_arg(ap, const wchar_t*);
+
+        int rv = WideCharToMultiByte(CP_ACP, 0, u.ws, -1, NULL, 0, NULL, NULL);
+        if (rv == 0 && GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
+          if (!cvt_s("<unicode errors in string>", width, prec, flags)) {
+            return false;
+          }
+        } else {
+          if (rv == 0) {
+            rv = 1;
+          }
+          UniqueFreePtr<char[]> buf((char*)malloc(rv));
+          WideCharToMultiByte(CP_ACP, 0, u.ws, -1, buf.get(), rv, NULL, NULL);
+          buf[rv - 1] = '\0';
+
+          if (!cvt_s(buf.get(), width, prec, flags)) {
+            return false;
+          }
+        }
+      }
+#else
+        // Not supported here.
+        MOZ_ASSERT(0);
+#endif
+      break;
+
+      case 'n':
+        u.ip = va_arg(ap, int*);
+        if (u.ip) {
+          *u.ip = mEmitted;
+        }
+        break;
+
+      default:
+        // Not a % token after all... skip it
+        if (!emit("%", 1)) return false;
+        if (!emit(fmt - 1, 1)) return false;
+    }
+  }
+
+  return true;
+}
+
+/************************************************************************/
+
+bool mozilla::PrintfTarget::print(const char* format, ...) {
+  va_list ap;
+
+  va_start(ap, format);
+  bool result = vprint(format, ap);
+  va_end(ap);
+  return result;
+}
+
+#undef TYPE_SHORT
+#undef TYPE_USHORT
+#undef TYPE_INTN
+#undef TYPE_UINTN
+#undef TYPE_LONG
+#undef TYPE_ULONG
+#undef TYPE_LONGLONG
+#undef TYPE_ULONGLONG
+#undef TYPE_STRING
+#undef TYPE_DOUBLE
+#undef TYPE_INTSTR
+#undef TYPE_POINTER
+#undef TYPE_WSTRING
+#undef TYPE_UNKNOWN
+
+#undef FLAG_LEFT
+#undef FLAG_SIGNED
+#undef FLAG_SPACED
+#undef FLAG_ZEROS
+#undef FLAG_NEG
diff --git a/mozglue/misc/Printf.h b/mozglue/misc/Printf.h
new file mode 100644
index 0000000000..886511b3cf
--- /dev/null
+++ b/mozglue/misc/Printf.h
@@ -0,0 +1,264 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Printf-like functions, with canned variants that malloc their result.  */
+
+#ifndef mozilla_Printf_h
+#define mozilla_Printf_h
+
+/*
+** API for PR printf like routines.
+**
+** These exist partly for historical reasons -- initially they were in
+** NSPR, then forked in tree and modified in js/ -- but now the prime
+** motivation is both closer control over the exact formatting (with
+** one exception, see below) and also the ability to control where
+** exactly the generated results are sent.
+**
+** It might seem that this could all be dispensed with in favor of a
+** wrapper around |vsnprintf| -- except that this implementation
+** guarantees that the %s format will accept a NULL pointer, whereas
+** with standard functions this is undefined.
+**
+** This supports the following formats.  It implements a subset of the
+** standard formats; due to the use of MOZ_FORMAT_PRINTF, it is not
+** permissible to extend the standard, aside from relaxing undefined
+** behavior.
+**
+**      %d - decimal
+**      %u - unsigned decimal
+**      %x - unsigned hex
+**      %X - unsigned uppercase hex
+**      %o - unsigned octal
+**      %hd, %hu, %hx, %hX, %ho - "short" versions of above
+**      %ld, %lu, %lx, %lX, %lo - "long" versions of above
+**      %lld, %llu, %llx, %llX, %llo - "long long" versions of above
+**      %zd, %zo, %zu, %zx, %zX - size_t versions of above
+**      %Id, %Io, %Iu, %Ix, %IX - size_t versions of above (for Windows compat).
+**           Note that MSVC 2015 and newer supports the z length modifier so
+**           users should prefer using %z instead of %I. We are supporting %I in
+**           addition to %z in case third-party code that uses %I gets routed to
+**           use this printf implementation.
+**      %s - string
+**      %S, %ls - wide string, that is wchar_t*
+**      %c - character
+**      %p - pointer (deals with machine dependent pointer size)
+**      %f - float; note that this is actually formatted using the
+**           system's native printf, and so the results may vary
+**      %g - float; note that this is actually formatted using the
+**           system's native printf, and so the results may vary
+*/
+
+#include "mozilla/AllocPolicy.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/Types.h"
+#include "mozilla/UniquePtr.h"
+
+#include <stdarg.h>
+#include <string.h>
+
+namespace mozilla {
+
+/*
+ * This class may be subclassed to provide a way to get the output of
+ * a printf-like call, as the output is generated.
+ */
+class PrintfTarget {
+ public:
+  /* The Printf-like interface.  */
+  bool MFBT_API print(const char* format, ...) MOZ_FORMAT_PRINTF(2, 3);
+
+  /* The Vprintf-like interface.  */
+  bool MFBT_API vprint(const char* format, va_list) MOZ_FORMAT_PRINTF(2, 0);
+
+  /* Fast paths for formatting integers as though by %d, %o, %u, or %x.
+     Since octal and hex formatting always treat numbers as unsigned, there
+     are no signed overloads for AppendInt{Oct,Hex}.  */
+  bool MFBT_API appendIntDec(int32_t);
+  bool MFBT_API appendIntDec(uint32_t);
+  bool MFBT_API appendIntOct(uint32_t);
+  bool MFBT_API appendIntHex(uint32_t);
+  bool MFBT_API appendIntDec(int64_t);
+  bool MFBT_API appendIntDec(uint64_t);
+  bool MFBT_API appendIntOct(uint64_t);
+  bool MFBT_API appendIntHex(uint64_t);
+
+ protected:
+  MFBT_API PrintfTarget();
+  virtual ~PrintfTarget() = default;
+
+  /* Subclasses override this.  It is called when more output is
+     available.  It may be called with len==0.  This should return
+     true on success, or false on failure.  */
+  virtual bool append(const char* sp, size_t len) = 0;
+
+ private:
+  /* Number of bytes emitted so far.  */
+  size_t mEmitted;
+
+  /* The implementation calls this to emit bytes and update
+     mEmitted.  */
+  bool emit(const char* sp, size_t len) {
+    mEmitted += len;
+    return append(sp, len);
+  }
+
+  bool fill2(const char* src, int srclen, int width, int flags);
+  bool fill_n(const char* src, int srclen, int width, int prec, int type,
+              int flags);
+  bool cvt_l(long num, int width, int prec, int radix, int type, int flags,
+             const char* hxp);
+  bool cvt_ll(int64_t num, int width, int prec, int radix, int type, int flags,
+              const char* hexp);
+  bool cvt_f(double d, const char* fmt0, const char* fmt1);
+  bool cvt_s(const char* s, int width, int prec, int flags);
+};
+
+namespace detail {
+
+template <typename AllocPolicy = mozilla::MallocAllocPolicy>
+struct AllocPolicyBasedFreePolicy {
+  void operator()(const void* ptr) {
+    AllocPolicy policy;
+    policy.free_(const_cast<void*>(ptr));
+  }
+};
+
+}  // namespace detail
+
+// The type returned by Smprintf and friends.
+template <typename AllocPolicy>
+using SmprintfPolicyPointer =
+    mozilla::UniquePtr<char, detail::AllocPolicyBasedFreePolicy<AllocPolicy>>;
+
+// The default type if no alloc policy is specified.
+typedef SmprintfPolicyPointer<mozilla::MallocAllocPolicy> SmprintfPointer;
+
+// Used in the implementation of Smprintf et al.
+template <typename AllocPolicy>
+class MOZ_STACK_CLASS SprintfState final : private mozilla::PrintfTarget,
+                                           private AllocPolicy {
+ public:
+  explicit SprintfState(char* base)
+      : mMaxlen(base ? strlen(base) : 0),
+        mBase(base),
+        mCur(base ? base + mMaxlen : 0) {}
+
+  ~SprintfState() { this->free_(mBase); }
+
+  bool vprint(const char* format, va_list ap_list) MOZ_FORMAT_PRINTF(2, 0) {
+    // The "" here has a single \0 character, which is what we're
+    // trying to append.
+    return mozilla::PrintfTarget::vprint(format, ap_list) && append("", 1);
+  }
+
+  SmprintfPolicyPointer<AllocPolicy> release() {
+    SmprintfPolicyPointer<AllocPolicy> result(mBase);
+    mBase = nullptr;
+    return result;
+  }
+
+ protected:
+  bool append(const char* sp, size_t len) override {
+    ptrdiff_t off;
+    char* newbase;
+    size_t newlen;
+
+    off = mCur - mBase;
+    if (off + len >= mMaxlen) {
+      /* Grow the buffer */
+      newlen = mMaxlen + ((len > 32) ? len : 32);
+      newbase = this->template maybe_pod_malloc<char>(newlen);
+      if (!newbase) {
+        /* Ran out of memory */
+        return false;
+      }
+      memcpy(newbase, mBase, mMaxlen);
+      this->free_(mBase);
+      mBase = newbase;
+      mMaxlen = newlen;
+      mCur = mBase + off;
+    }
+
+    /* Copy data */
+    memcpy(mCur, sp, len);
+    mCur += len;
+    MOZ_ASSERT(size_t(mCur - mBase) <= mMaxlen);
+    return true;
+  }
+
+ private:
+  size_t mMaxlen;
+  char* mBase;
+  char* mCur;
+};
+
+/*
+** sprintf into a malloc'd buffer. Return a pointer to the malloc'd
+** buffer on success, nullptr on failure. Call AllocPolicy::free_ to release
+** the memory returned.
+*/
+template <typename AllocPolicy = mozilla::MallocAllocPolicy>
+MOZ_FORMAT_PRINTF(1, 2)
+SmprintfPolicyPointer<AllocPolicy> Smprintf(const char* fmt, ...) {
+  SprintfState<AllocPolicy> ss(nullptr);
+  va_list ap;
+  va_start(ap, fmt);
+  bool r = ss.vprint(fmt, ap);
+  va_end(ap);
+  if (!r) {
+    return nullptr;
+  }
+  return ss.release();
+}
+
+/*
+** "append" sprintf into a malloc'd buffer. "last" is the last value of
+** the malloc'd buffer. sprintf will append data to the end of last,
+** growing it as necessary using realloc. If last is nullptr, SmprintfAppend
+** will allocate the initial string. The return value is the new value of
+** last for subsequent calls, or nullptr if there is a malloc failure.
+*/
+template <typename AllocPolicy = mozilla::MallocAllocPolicy>
+MOZ_FORMAT_PRINTF(2, 3)
+SmprintfPolicyPointer<AllocPolicy> SmprintfAppend(
+    SmprintfPolicyPointer<AllocPolicy>&& last, const char* fmt, ...) {
+  SprintfState<AllocPolicy> ss(last.release());
+  va_list ap;
+  va_start(ap, fmt);
+  bool r = ss.vprint(fmt, ap);
+  va_end(ap);
+  if (!r) {
+    return nullptr;
+  }
+  return ss.release();
+}
+
+/*
+** va_list forms of the above.
+*/
+template <typename AllocPolicy = mozilla::MallocAllocPolicy>
+MOZ_FORMAT_PRINTF(1, 0)
+SmprintfPolicyPointer<AllocPolicy> Vsmprintf(const char* fmt, va_list ap) {
+  SprintfState<AllocPolicy> ss(nullptr);
+  if (!ss.vprint(fmt, ap)) return nullptr;
+  return ss.release();
+}
+
+template <typename AllocPolicy = mozilla::MallocAllocPolicy>
+MOZ_FORMAT_PRINTF(2, 0)
+SmprintfPolicyPointer<AllocPolicy> VsmprintfAppend(
+    SmprintfPolicyPointer<AllocPolicy>&& last, const char* fmt, va_list ap) {
+  SprintfState<AllocPolicy> ss(last.release());
+  if (!ss.vprint(fmt, ap)) return nullptr;
+  return ss.release();
+}
+
+}  // namespace mozilla
+
+#endif /* mozilla_Printf_h */
diff --git a/mozglue/misc/StackWalk.cpp b/mozglue/misc/StackWalk.cpp
new file mode 100644
index 0000000000..24868ae5de
--- /dev/null
+++ b/mozglue/misc/StackWalk.cpp
@@ -0,0 +1,929 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* API for getting a stack trace of the C/C++ stack on the current thread */
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/StackWalk.h"
+
+#include <string.h>
+
+#if defined(ANDROID) && defined(MOZ_LINKER)
+#  include "Linker.h"
+#  include <android/log.h>
+#endif
+
+using namespace mozilla;
+
+// for _Unwind_Backtrace from libcxxrt or libunwind
+// cxxabi.h from libcxxrt implicitly includes unwind.h first
+#if defined(HAVE__UNWIND_BACKTRACE) && !defined(_GNU_SOURCE)
+#  define _GNU_SOURCE
+#endif
+
+#if defined(HAVE_DLOPEN) || defined(XP_DARWIN)
+#  include <dlfcn.h>
+#endif
+
+#if (defined(XP_DARWIN) && \
+     (defined(__i386) || defined(__ppc__) || defined(HAVE__UNWIND_BACKTRACE)))
+#  define MOZ_STACKWALK_SUPPORTS_MACOSX 1
+#else
+#  define MOZ_STACKWALK_SUPPORTS_MACOSX 0
+#endif
+
+#if (defined(linux) &&                                            \
+     ((defined(__GNUC__) && (defined(__i386) || defined(PPC))) || \
+      defined(HAVE__UNWIND_BACKTRACE)))
+#  define MOZ_STACKWALK_SUPPORTS_LINUX 1
+#else
+#  define MOZ_STACKWALK_SUPPORTS_LINUX 0
+#endif
+
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 1)
+#  define HAVE___LIBC_STACK_END 1
+#else
+#  define HAVE___LIBC_STACK_END 0
+#endif
+
+#if HAVE___LIBC_STACK_END
+extern MOZ_EXPORT void* __libc_stack_end;  // from ld-linux.so
+#endif
+
+#ifdef ANDROID
+#  include <algorithm>
+#  include <unistd.h>
+#  include <pthread.h>
+#endif
+
+#if MOZ_STACKWALK_SUPPORTS_WINDOWS
+
+#  include <windows.h>
+#  include <process.h>
+#  include <stdio.h>
+#  include <malloc.h>
+#  include "mozilla/ArrayUtils.h"
+#  include "mozilla/Atomics.h"
+#  include "mozilla/StackWalk_windows.h"
+#  include "mozilla/WindowsVersion.h"
+
+#  include <imagehlp.h>
+// We need a way to know if we are building for WXP (or later), as if we are, we
+// need to use the newer 64-bit APIs. API_VERSION_NUMBER seems to fit the bill.
+// A value of 9 indicates we want to use the new APIs.
+#  if API_VERSION_NUMBER < 9
+#    error Too old imagehlp.h
+#  endif
+
+struct WalkStackData {
+  // Are we walking the stack of the calling thread? Note that we need to avoid
+  // calling fprintf and friends if this is false, in order to avoid deadlocks.
+  bool walkCallingThread;
+  uint32_t skipFrames;
+  HANDLE thread;
+  HANDLE process;
+  HANDLE eventStart;
+  HANDLE eventEnd;
+  void** pcs;
+  uint32_t pc_size;
+  uint32_t pc_count;
+  uint32_t pc_max;
+  void** sps;
+  uint32_t sp_size;
+  uint32_t sp_count;
+  CONTEXT* context;
+};
+
+CRITICAL_SECTION gDbgHelpCS;
+
+#  if defined(_M_AMD64) || defined(_M_ARM64)
+// Because various Win64 APIs acquire function-table locks, we need a way of
+// preventing stack walking while those APIs are being called. Otherwise, the
+// stack walker may suspend a thread holding such a lock, and deadlock when the
+// stack unwind code attempts to wait for that lock.
+//
+// We're using an atomic counter rather than a critical section because we
+// don't require mutual exclusion with the stack walker. If the stack walker
+// determines that it's safe to start unwinding the suspended thread (i.e.
+// there are no suppressions when the unwind begins), then it's safe to
+// continue unwinding that thread even if other threads request suppressions
+// in the meantime, because we can't deadlock with those other threads.
+//
+// XXX: This global variable is a larger-than-necessary hammer. A more scoped
+// solution would be to maintain a counter per thread, but then it would be
+// more difficult for WalkStackMain64 to read the suspended thread's counter.
+static Atomic<size_t> sStackWalkSuppressions;
+
+void SuppressStackWalking() { ++sStackWalkSuppressions; }
+
+void DesuppressStackWalking() { --sStackWalkSuppressions; }
+
+MFBT_API
+AutoSuppressStackWalking::AutoSuppressStackWalking() { SuppressStackWalking(); }
+
+MFBT_API
+AutoSuppressStackWalking::~AutoSuppressStackWalking() {
+  DesuppressStackWalking();
+}
+
+static uint8_t* sJitCodeRegionStart;
+static size_t sJitCodeRegionSize;
+uint8_t* sMsMpegJitCodeRegionStart;
+size_t sMsMpegJitCodeRegionSize;
+
+MFBT_API void RegisterJitCodeRegion(uint8_t* aStart, size_t aSize) {
+  // Currently we can only handle one JIT code region at a time
+  MOZ_RELEASE_ASSERT(!sJitCodeRegionStart);
+
+  sJitCodeRegionStart = aStart;
+  sJitCodeRegionSize = aSize;
+}
+
+MFBT_API void UnregisterJitCodeRegion(uint8_t* aStart, size_t aSize) {
+  // Currently we can only handle one JIT code region at a time
+  MOZ_RELEASE_ASSERT(sJitCodeRegionStart && sJitCodeRegionStart == aStart &&
+                     sJitCodeRegionSize == aSize);
+
+  sJitCodeRegionStart = nullptr;
+  sJitCodeRegionSize = 0;
+}
+
+#  endif  // _M_AMD64 || _M_ARM64
+
+// Routine to print an error message to standard error.
+static void PrintError(const char* aPrefix) {
+  LPSTR lpMsgBuf;
+  DWORD lastErr = GetLastError();
+  FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
+                     FORMAT_MESSAGE_IGNORE_INSERTS,
+                 nullptr, lastErr,
+                 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),  // Default language
+                 (LPSTR)&lpMsgBuf, 0, nullptr);
+  fprintf(stderr, "### ERROR: %s: %s", aPrefix,
+          lpMsgBuf ? lpMsgBuf : "(null)\n");
+  fflush(stderr);
+  LocalFree(lpMsgBuf);
+}
+
+static void InitializeDbgHelpCriticalSection() {
+  static bool initialized = false;
+  if (initialized) {
+    return;
+  }
+  ::InitializeCriticalSection(&gDbgHelpCS);
+  initialized = true;
+}
+
+static void WalkStackMain64(struct WalkStackData* aData) {
+  // Get a context for the specified thread.
+  CONTEXT context_buf;
+  CONTEXT* context;
+  if (!aData->context) {
+    context = &context_buf;
+    memset(context, 0, sizeof(CONTEXT));
+    context->ContextFlags = CONTEXT_FULL;
+    if (aData->walkCallingThread) {
+      ::RtlCaptureContext(context);
+    } else if (!GetThreadContext(aData->thread, context)) {
+      return;
+    }
+  } else {
+    context = aData->context;
+  }
+
+#  if defined(_M_IX86) || defined(_M_IA64)
+  // Setup initial stack frame to walk from.
+  STACKFRAME64 frame64;
+  memset(&frame64, 0, sizeof(frame64));
+#    ifdef _M_IX86
+  frame64.AddrPC.Offset = context->Eip;
+  frame64.AddrStack.Offset = context->Esp;
+  frame64.AddrFrame.Offset = context->Ebp;
+#    elif defined _M_IA64
+  frame64.AddrPC.Offset = context->StIIP;
+  frame64.AddrStack.Offset = context->SP;
+  frame64.AddrFrame.Offset = context->RsBSP;
+#    endif
+  frame64.AddrPC.Mode = AddrModeFlat;
+  frame64.AddrStack.Mode = AddrModeFlat;
+  frame64.AddrFrame.Mode = AddrModeFlat;
+  frame64.AddrReturn.Mode = AddrModeFlat;
+#  endif
+
+#  if defined(_M_AMD64) || defined(_M_ARM64)
+  // If there are any active suppressions, then at least one thread (we don't
+  // know which) is holding a lock that can deadlock RtlVirtualUnwind. Since
+  // that thread may be the one that we're trying to unwind, we can't proceed.
+  //
+  // But if there are no suppressions, then our target thread can't be holding
+  // a lock, and it's safe to proceed. By virtue of being suspended, the target
+  // thread can't acquire any new locks during the unwind process, so we only
+  // need to do this check once. After that, sStackWalkSuppressions can be
+  // changed by other threads while we're unwinding, and that's fine because
+  // we can't deadlock with those threads.
+  if (sStackWalkSuppressions) {
+    return;
+  }
+#  endif
+
+#  if defined(_M_AMD64) || defined(_M_ARM64)
+  bool firstFrame = true;
+#  endif
+
+  // Skip our own stack walking frames.
+  int skip = (aData->walkCallingThread ? 3 : 0) + aData->skipFrames;
+
+  // Now walk the stack.
+  while (true) {
+    DWORD64 addr;
+    DWORD64 spaddr;
+
+#  if defined(_M_IX86) || defined(_M_IA64)
+    // 32-bit frame unwinding.
+    // Debug routines are not threadsafe, so grab the lock.
+    EnterCriticalSection(&gDbgHelpCS);
+    BOOL ok = StackWalk64(
+#    if defined _M_IA64
+        IMAGE_FILE_MACHINE_IA64,
+#    elif defined _M_IX86
+        IMAGE_FILE_MACHINE_I386,
+#    endif
+        aData->process, aData->thread, &frame64, context, nullptr,
+        SymFunctionTableAccess64,  // function table access routine
+        SymGetModuleBase64,        // module base routine
+        0);
+    LeaveCriticalSection(&gDbgHelpCS);
+
+    if (ok) {
+      addr = frame64.AddrPC.Offset;
+      spaddr = frame64.AddrStack.Offset;
+    } else {
+      addr = 0;
+      spaddr = 0;
+      if (aData->walkCallingThread) {
+        PrintError("WalkStack64");
+      }
+    }
+
+    if (!ok) {
+      break;
+    }
+
+#  elif defined(_M_AMD64) || defined(_M_ARM64)
+
+#    if defined(_M_AMD64)
+    auto currentInstr = context->Rip;
+#    elif defined(_M_ARM64)
+    auto currentInstr = context->Pc;
+#    endif
+
+    // If we reach a frame in JIT code, we don't have enough information to
+    // unwind, so we have to give up.
+    if (sJitCodeRegionStart && (uint8_t*)currentInstr >= sJitCodeRegionStart &&
+        (uint8_t*)currentInstr < sJitCodeRegionStart + sJitCodeRegionSize) {
+      break;
+    }
+
+    // We must also avoid msmpeg2vdec.dll's JIT region: they don't generate
+    // unwind data, so their JIT unwind callback just throws up its hands and
+    // terminates the process.
+    if (sMsMpegJitCodeRegionStart &&
+        (uint8_t*)currentInstr >= sMsMpegJitCodeRegionStart &&
+        (uint8_t*)currentInstr <
+            sMsMpegJitCodeRegionStart + sMsMpegJitCodeRegionSize) {
+      break;
+    }
+
+    // 64-bit frame unwinding.
+    // Try to look up unwind metadata for the current function.
+    ULONG64 imageBase;
+    PRUNTIME_FUNCTION runtimeFunction =
+        RtlLookupFunctionEntry(currentInstr, &imageBase, NULL);
+
+    if (runtimeFunction) {
+      PVOID dummyHandlerData;
+      ULONG64 dummyEstablisherFrame;
+      RtlVirtualUnwind(UNW_FLAG_NHANDLER, imageBase, currentInstr,
+                       runtimeFunction, context, &dummyHandlerData,
+                       &dummyEstablisherFrame, nullptr);
+    } else if (firstFrame) {
+      // Leaf functions can be unwound by hand.
+#    if defined(_M_AMD64)
+      context->Rip = *reinterpret_cast<DWORD64*>(context->Rsp);
+      context->Rsp += sizeof(void*);
+#    elif defined(_M_ARM64)
+      context->Pc = *reinterpret_cast<DWORD64*>(context->Sp);
+      context->Sp += sizeof(void*);
+#    endif
+    } else {
+      // Something went wrong.
+      break;
+    }
+
+#    if defined(_M_AMD64)
+    addr = context->Rip;
+    spaddr = context->Rsp;
+#    elif defined(_M_ARM64)
+    addr = context->Pc;
+    spaddr = context->Sp;
+#    endif
+    firstFrame = false;
+#  else
+#    error "unknown platform"
+#  endif
+
+    if (addr == 0) {
+      break;
+    }
+
+    if (skip-- > 0) {
+      continue;
+    }
+
+    if (aData->pc_count < aData->pc_size) {
+      aData->pcs[aData->pc_count] = (void*)addr;
+    }
+    ++aData->pc_count;
+
+    if (aData->sp_count < aData->sp_size) {
+      aData->sps[aData->sp_count] = (void*)spaddr;
+    }
+    ++aData->sp_count;
+
+    if (aData->pc_max != 0 && aData->pc_count == aData->pc_max) {
+      break;
+    }
+
+#  if defined(_M_IX86) || defined(_M_IA64)
+    if (frame64.AddrReturn.Offset == 0) {
+      break;
+    }
+#  endif
+  }
+}
+
+/**
+ * Walk the stack, translating PC's found into strings and recording the
+ * chain in aBuffer. For this to work properly, the DLLs must be rebased
+ * so that the address in the file agrees with the address in memory.
+ * Otherwise StackWalk will return FALSE when it hits a frame in a DLL
+ * whose in memory address doesn't match its in-file address.
+ */
+
+MFBT_API void MozStackWalkThread(MozWalkStackCallback aCallback,
+                                 uint32_t aSkipFrames, uint32_t aMaxFrames,
+                                 void* aClosure, HANDLE aThread,
+                                 CONTEXT* aContext) {
+  struct WalkStackData data;
+
+  InitializeDbgHelpCriticalSection();
+
+  HANDLE targetThread = aThread;
+  if (!aThread) {
+    targetThread = ::GetCurrentThread();
+    data.walkCallingThread = true;
+  } else {
+    DWORD threadId = ::GetThreadId(aThread);
+    DWORD currentThreadId = ::GetCurrentThreadId();
+    data.walkCallingThread = (threadId == currentThreadId);
+  }
+
+  data.skipFrames = aSkipFrames;
+  data.thread = targetThread;
+  data.process = ::GetCurrentProcess();
+  void* local_pcs[1024];
+  data.pcs = local_pcs;
+  data.pc_count = 0;
+  data.pc_size = ArrayLength(local_pcs);
+  data.pc_max = aMaxFrames;
+  void* local_sps[1024];
+  data.sps = local_sps;
+  data.sp_count = 0;
+  data.sp_size = ArrayLength(local_sps);
+  data.context = aContext;
+
+  WalkStackMain64(&data);
+
+  if (data.pc_count > data.pc_size) {
+    data.pcs = (void**)_alloca(data.pc_count * sizeof(void*));
+    data.pc_size = data.pc_count;
+    data.pc_count = 0;
+    data.sps = (void**)_alloca(data.sp_count * sizeof(void*));
+    data.sp_size = data.sp_count;
+    data.sp_count = 0;
+    WalkStackMain64(&data);
+  }
+
+  for (uint32_t i = 0; i < data.pc_count; ++i) {
+    (*aCallback)(i + 1, data.pcs[i], data.sps[i], aClosure);
+  }
+}
+
+MFBT_API void MozStackWalk(MozWalkStackCallback aCallback, uint32_t aSkipFrames,
+                           uint32_t aMaxFrames, void* aClosure) {
+  MozStackWalkThread(aCallback, aSkipFrames, aMaxFrames, aClosure, nullptr,
+                     nullptr);
+}
+
+static BOOL CALLBACK callbackEspecial64(PCSTR aModuleName, DWORD64 aModuleBase,
+                                        ULONG aModuleSize, PVOID aUserContext) {
+  BOOL retval = TRUE;
+  DWORD64 addr = *(DWORD64*)aUserContext;
+
+  /*
+   * You'll want to control this if we are running on an
+   *  architecture where the addresses go the other direction.
+   * Not sure this is even a realistic consideration.
+   */
+  const BOOL addressIncreases = TRUE;
+
+  /*
+   * If it falls in side the known range, load the symbols.
+   */
+  if (addressIncreases
+          ? (addr >= aModuleBase && addr <= (aModuleBase + aModuleSize))
+          : (addr <= aModuleBase && addr >= (aModuleBase - aModuleSize))) {
+    retval = !!SymLoadModule64(GetCurrentProcess(), nullptr, (PSTR)aModuleName,
+                               nullptr, aModuleBase, aModuleSize);
+    if (!retval) {
+      PrintError("SymLoadModule64");
+    }
+  }
+
+  return retval;
+}
+
+/*
+ * SymGetModuleInfoEspecial
+ *
+ * Attempt to determine the module information.
+ * Bug 112196 says this DLL may not have been loaded at the time
+ *  SymInitialize was called, and thus the module information
+ *  and symbol information is not available.
+ * This code rectifies that problem.
+ */
+
+// New members were added to IMAGEHLP_MODULE64 (that show up in the
+// Platform SDK that ships with VC8, but not the Platform SDK that ships
+// with VC7.1, i.e., between DbgHelp 6.0 and 6.1), but we don't need to
+// use them, and it's useful to be able to function correctly with the
+// older library.  (Stock Windows XP SP2 seems to ship with dbghelp.dll
+// version 5.1.)  Since Platform SDK version need not correspond to
+// compiler version, and the version number in debughlp.h was NOT bumped
+// when these changes were made, ifdef based on a constant that was
+// added between these versions.
+#  ifdef SSRVOPT_SETCONTEXT
+#    define NS_IMAGEHLP_MODULE64_SIZE                                        \
+      (((offsetof(IMAGEHLP_MODULE64, LoadedPdbName) + sizeof(DWORD64) - 1) / \
+        sizeof(DWORD64)) *                                                   \
+       sizeof(DWORD64))
+#  else
+#    define NS_IMAGEHLP_MODULE64_SIZE sizeof(IMAGEHLP_MODULE64)
+#  endif
+
+BOOL SymGetModuleInfoEspecial64(HANDLE aProcess, DWORD64 aAddr,
+                                PIMAGEHLP_MODULE64 aModuleInfo,
+                                PIMAGEHLP_LINE64 aLineInfo) {
+  BOOL retval = FALSE;
+
+  /*
+   * Init the vars if we have em.
+   */
+  aModuleInfo->SizeOfStruct = NS_IMAGEHLP_MODULE64_SIZE;
+  if (aLineInfo) {
+    aLineInfo->SizeOfStruct = sizeof(IMAGEHLP_LINE64);
+  }
+
+  /*
+   * Give it a go.
+   * It may already be loaded.
+   */
+  retval = SymGetModuleInfo64(aProcess, aAddr, aModuleInfo);
+  if (retval == FALSE) {
+    /*
+     * Not loaded, here's the magic.
+     * Go through all the modules.
+     */
+    // Need to cast to PENUMLOADED_MODULES_CALLBACK64 because the
+    // constness of the first parameter of
+    // PENUMLOADED_MODULES_CALLBACK64 varies over SDK versions (from
+    // non-const to const over time).  See bug 391848 and bug
+    // 415426.
+    BOOL enumRes = EnumerateLoadedModules64(
+        aProcess, (PENUMLOADED_MODULES_CALLBACK64)callbackEspecial64,
+        (PVOID)&aAddr);
+    if (enumRes != FALSE) {
+      /*
+       * One final go.
+       * If it fails, then well, we have other problems.
+       */
+      retval = SymGetModuleInfo64(aProcess, aAddr, aModuleInfo);
+    }
+  }
+
+  /*
+   * If we got module info, we may attempt line info as well.
+   * We will not report failure if this does not work.
+   */
+  if (retval != FALSE && aLineInfo) {
+    DWORD displacement = 0;
+    BOOL lineRes = FALSE;
+    lineRes = SymGetLineFromAddr64(aProcess, aAddr, &displacement, aLineInfo);
+    if (!lineRes) {
+      // Clear out aLineInfo to indicate that it's not valid
+      memset(aLineInfo, 0, sizeof(*aLineInfo));
+    }
+  }
+
+  return retval;
+}
+
+static bool EnsureSymInitialized() {
+  static bool gInitialized = false;
+  bool retStat;
+
+  if (gInitialized) {
+    return gInitialized;
+  }
+
+  InitializeDbgHelpCriticalSection();
+
+  SymSetOptions(SYMOPT_LOAD_LINES | SYMOPT_UNDNAME);
+  retStat = SymInitialize(GetCurrentProcess(), nullptr, TRUE);
+  if (!retStat) {
+    PrintError("SymInitialize");
+  }
+
+  gInitialized = retStat;
+  /* XXX At some point we need to arrange to call SymCleanup */
+
+  return retStat;
+}
+
+MFBT_API bool MozDescribeCodeAddress(void* aPC,
+                                     MozCodeAddressDetails* aDetails) {
+  aDetails->library[0] = '\0';
+  aDetails->loffset = 0;
+  aDetails->filename[0] = '\0';
+  aDetails->lineno = 0;
+  aDetails->function[0] = '\0';
+  aDetails->foffset = 0;
+
+  if (!EnsureSymInitialized()) {
+    return false;
+  }
+
+  HANDLE myProcess = ::GetCurrentProcess();
+  BOOL ok;
+
+  // debug routines are not threadsafe, so grab the lock.
+  EnterCriticalSection(&gDbgHelpCS);
+
+  //
+  // Attempt to load module info before we attempt to resolve the symbol.
+  // This just makes sure we get good info if available.
+  //
+
+  DWORD64 addr = (DWORD64)aPC;
+  IMAGEHLP_MODULE64 modInfo;
+  IMAGEHLP_LINE64 lineInfo;
+  BOOL modInfoRes;
+  modInfoRes = SymGetModuleInfoEspecial64(myProcess, addr, &modInfo, &lineInfo);
+
+  if (modInfoRes) {
+    strncpy(aDetails->library, modInfo.LoadedImageName,
+            sizeof(aDetails->library));
+    aDetails->library[mozilla::ArrayLength(aDetails->library) - 1] = '\0';
+    aDetails->loffset = (char*)aPC - (char*)modInfo.BaseOfImage;
+
+    if (lineInfo.FileName) {
+      strncpy(aDetails->filename, lineInfo.FileName,
+              sizeof(aDetails->filename));
+      aDetails->filename[mozilla::ArrayLength(aDetails->filename) - 1] = '\0';
+      aDetails->lineno = lineInfo.LineNumber;
+    }
+  }
+
+  ULONG64 buffer[(sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR) +
+                  sizeof(ULONG64) - 1) /
+                 sizeof(ULONG64)];
+  PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)buffer;
+  pSymbol->SizeOfStruct = sizeof(SYMBOL_INFO);
+  pSymbol->MaxNameLen = MAX_SYM_NAME;
+
+  DWORD64 displacement;
+  ok = SymFromAddr(myProcess, addr, &displacement, pSymbol);
+
+  if (ok) {
+    strncpy(aDetails->function, pSymbol->Name, sizeof(aDetails->function));
+    aDetails->function[mozilla::ArrayLength(aDetails->function) - 1] = '\0';
+    aDetails->foffset = static_cast<ptrdiff_t>(displacement);
+  }
+
+  LeaveCriticalSection(&gDbgHelpCS);  // release our lock
+  return true;
+}
+
+// i386 or PPC Linux stackwalking code
+#elif HAVE_DLADDR &&                                           \
+    (HAVE__UNWIND_BACKTRACE || MOZ_STACKWALK_SUPPORTS_LINUX || \
+     MOZ_STACKWALK_SUPPORTS_MACOSX)
+
+#  include <stdlib.h>
+#  include <string.h>
+#  include <stdio.h>
+
+// On glibc 2.1, the Dl_info api defined in <dlfcn.h> is only exposed
+// if __USE_GNU is defined.  I suppose its some kind of standards
+// adherence thing.
+//
+#  if (__GLIBC_MINOR__ >= 1) && !defined(__USE_GNU)
+#    define __USE_GNU
+#  endif
+
+// This thing is exported by libstdc++
+// Yes, this is a gcc only hack
+#  if defined(MOZ_DEMANGLE_SYMBOLS)
+#    include <cxxabi.h>
+#  endif  // MOZ_DEMANGLE_SYMBOLS
+
+namespace mozilla {
+
+void DemangleSymbol(const char* aSymbol, char* aBuffer, int aBufLen) {
+  aBuffer[0] = '\0';
+
+#  if defined(MOZ_DEMANGLE_SYMBOLS)
+  /* See demangle.h in the gcc source for the voodoo */
+  char* demangled = abi::__cxa_demangle(aSymbol, 0, 0, 0);
+
+  if (demangled) {
+    strncpy(aBuffer, demangled, aBufLen);
+    aBuffer[aBufLen - 1] = '\0';
+    free(demangled);
+  }
+#  endif  // MOZ_DEMANGLE_SYMBOLS
+}
+
+}  // namespace mozilla
+
+// {x86, ppc} x {Linux, Mac} stackwalking code.
+#  if ((defined(__i386) || defined(PPC) || defined(__ppc__)) && \
+       (MOZ_STACKWALK_SUPPORTS_MACOSX || MOZ_STACKWALK_SUPPORTS_LINUX))
+
+MFBT_API void MozStackWalk(MozWalkStackCallback aCallback, uint32_t aSkipFrames,
+                           uint32_t aMaxFrames, void* aClosure) {
+  // Get the frame pointer
+  void** bp = (void**)__builtin_frame_address(0);
+
+  void* stackEnd;
+#    if HAVE___LIBC_STACK_END
+  stackEnd = __libc_stack_end;
+#    elif defined(XP_DARWIN)
+  stackEnd = pthread_get_stackaddr_np(pthread_self());
+#    elif defined(ANDROID)
+  pthread_attr_t sattr;
+  pthread_attr_init(&sattr);
+  pthread_getattr_np(pthread_self(), &sattr);
+  void* stackBase = stackEnd = nullptr;
+  size_t stackSize = 0;
+  if (gettid() != getpid()) {
+    // bionic's pthread_attr_getstack doesn't tell the truth for the main
+    // thread (see bug 846670). So don't use it for the main thread.
+    if (!pthread_attr_getstack(&sattr, &stackBase, &stackSize)) {
+      stackEnd = static_cast<char*>(stackBase) + stackSize;
+    } else {
+      stackEnd = nullptr;
+    }
+  }
+  if (!stackEnd) {
+    // So consider the current frame pointer + an arbitrary size of 8MB
+    // (modulo overflow ; not really arbitrary as it's the default stack
+    // size for the main thread) if pthread_attr_getstack failed for
+    // some reason (or was skipped).
+    static const uintptr_t kMaxStackSize = 8 * 1024 * 1024;
+    uintptr_t maxStackStart = uintptr_t(-1) - kMaxStackSize;
+    uintptr_t stackStart = std::max(maxStackStart, uintptr_t(bp));
+    stackEnd = reinterpret_cast<void*>(stackStart + kMaxStackSize);
+  }
+#    else
+#      error Unsupported configuration
+#    endif
+  FramePointerStackWalk(aCallback, aSkipFrames, aMaxFrames, aClosure, bp,
+                        stackEnd);
+}
+
+#  elif defined(HAVE__UNWIND_BACKTRACE)
+
+// libgcc_s.so symbols _Unwind_Backtrace@@GCC_3.3 and _Unwind_GetIP@@GCC_3.0
+#    include <unwind.h>
+
+struct unwind_info {
+  MozWalkStackCallback callback;
+  int skip;
+  int maxFrames;
+  int numFrames;
+  void* closure;
+};
+
+static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context,
+                                           void* closure) {
+  unwind_info* info = static_cast<unwind_info*>(closure);
+  void* pc = reinterpret_cast<void*>(_Unwind_GetIP(context));
+  // TODO Use something like '_Unwind_GetGR()' to get the stack pointer.
+  if (--info->skip < 0) {
+    info->numFrames++;
+    (*info->callback)(info->numFrames, pc, nullptr, info->closure);
+    if (info->maxFrames != 0 && info->numFrames == info->maxFrames) {
+      // Again, any error code that stops the walk will do.
+      return _URC_FOREIGN_EXCEPTION_CAUGHT;
+    }
+  }
+  return _URC_NO_REASON;
+}
+
+MFBT_API void MozStackWalk(MozWalkStackCallback aCallback, uint32_t aSkipFrames,
+                           uint32_t aMaxFrames, void* aClosure) {
+  unwind_info info;
+  info.callback = aCallback;
+  info.skip = aSkipFrames + 1;
+  info.maxFrames = aMaxFrames;
+  info.numFrames = 0;
+  info.closure = aClosure;
+
+  // We ignore the return value from _Unwind_Backtrace. There are three main
+  // reasons for this.
+  // - On ARM/Android bionic's _Unwind_Backtrace usually (always?) returns
+  //   _URC_FAILURE.  See
+  //   https://bugzilla.mozilla.org/show_bug.cgi?id=717853#c110.
+  // - If aMaxFrames != 0, we want to stop early, and the only way to do that
+  //   is to make unwind_callback return something other than _URC_NO_REASON,
+  //   which causes _Unwind_Backtrace to return a non-success code.
+  // - MozStackWalk doesn't have a return value anyway.
+  (void)_Unwind_Backtrace(unwind_callback, &info);
+}
+
+#  endif
+
+bool MFBT_API MozDescribeCodeAddress(void* aPC,
+                                     MozCodeAddressDetails* aDetails) {
+  aDetails->library[0] = '\0';
+  aDetails->loffset = 0;
+  aDetails->filename[0] = '\0';
+  aDetails->lineno = 0;
+  aDetails->function[0] = '\0';
+  aDetails->foffset = 0;
+
+  Dl_info info;
+
+#  if defined(ANDROID) && defined(MOZ_LINKER)
+  int ok = __wrap_dladdr(aPC, &info);
+#  else
+  int ok = dladdr(aPC, &info);
+#  endif
+
+  if (!ok) {
+    return true;
+  }
+
+  strncpy(aDetails->library, info.dli_fname, sizeof(aDetails->library));
+  aDetails->library[mozilla::ArrayLength(aDetails->library) - 1] = '\0';
+  aDetails->loffset = (char*)aPC - (char*)info.dli_fbase;
+
+#  if !defined(XP_FREEBSD)
+  // On FreeBSD, dli_sname is unusably bad, it often returns things like
+  // 'gtk_xtbin_new' or 'XRE_GetBootstrap' instead of long C++ symbols. Just let
+  // GetFunction do the lookup directly in the ELF image.
+
+  const char* symbol = info.dli_sname;
+  if (!symbol || symbol[0] == '\0') {
+    return true;
+  }
+
+  DemangleSymbol(symbol, aDetails->function, sizeof(aDetails->function));
+
+  if (aDetails->function[0] == '\0') {
+    // Just use the mangled symbol if demangling failed.
+    strncpy(aDetails->function, symbol, sizeof(aDetails->function));
+    aDetails->function[mozilla::ArrayLength(aDetails->function) - 1] = '\0';
+  }
+
+  aDetails->foffset = (char*)aPC - (char*)info.dli_saddr;
+#  endif
+
+  return true;
+}
+
+#else  // unsupported platform.
+
+MFBT_API void MozStackWalk(MozWalkStackCallback aCallback, uint32_t aSkipFrames,
+                           uint32_t aMaxFrames, void* aClosure) {}
+
+MFBT_API bool MozDescribeCodeAddress(void* aPC,
+                                     MozCodeAddressDetails* aDetails) {
+  aDetails->library[0] = '\0';
+  aDetails->loffset = 0;
+  aDetails->filename[0] = '\0';
+  aDetails->lineno = 0;
+  aDetails->function[0] = '\0';
+  aDetails->foffset = 0;
+  return false;
+}
+
+#endif
+
+#if defined(XP_WIN) || defined(XP_MACOSX) || defined(XP_LINUX)
+namespace mozilla {
+MOZ_ASAN_BLACKLIST
+void FramePointerStackWalk(MozWalkStackCallback aCallback, uint32_t aSkipFrames,
+                           uint32_t aMaxFrames, void* aClosure, void** aBp,
+                           void* aStackEnd) {
+  // Stack walking code courtesy Kipp's "leaky".
+
+  int32_t skip = aSkipFrames;
+  uint32_t numFrames = 0;
+  while (aBp) {
+    void** next = (void**)*aBp;
+    // aBp may not be a frame pointer on i386 if code was compiled with
+    // -fomit-frame-pointer, so do some sanity checks.
+    // (aBp should be a frame pointer on ppc(64) but checking anyway may help
+    // a little if the stack has been corrupted.)
+    // We don't need to check against the begining of the stack because
+    // we can assume that aBp > sp
+    if (next <= aBp || next >= aStackEnd || (uintptr_t(next) & 3)) {
+      break;
+    }
+#  if (defined(__ppc__) && defined(XP_MACOSX)) || defined(__powerpc64__)
+    // ppc mac or powerpc64 linux
+    void* pc = *(aBp + 2);
+    aBp += 3;
+#  else  // i386 or powerpc32 linux
+    void* pc = *(aBp + 1);
+    aBp += 2;
+#  endif
+    if (--skip < 0) {
+      // Assume that the SP points to the BP of the function
+      // it called. We can't know the exact location of the SP
+      // but this should be sufficient for our use the SP
+      // to order elements on the stack.
+      numFrames++;
+      (*aCallback)(numFrames, pc, aBp, aClosure);
+      if (aMaxFrames != 0 && numFrames == aMaxFrames) {
+        break;
+      }
+    }
+    aBp = next;
+  }
+}
+}  // namespace mozilla
+
+#else
+
+namespace mozilla {
+MFBT_API void FramePointerStackWalk(MozWalkStackCallback aCallback,
+                                    uint32_t aSkipFrames, uint32_t aMaxFrames,
+                                    void* aClosure, void** aBp,
+                                    void* aStackEnd) {}
+}  // namespace mozilla
+
+#endif
+
+MFBT_API void MozFormatCodeAddressDetails(
+    char* aBuffer, uint32_t aBufferSize, uint32_t aFrameNumber, void* aPC,
+    const MozCodeAddressDetails* aDetails) {
+  MozFormatCodeAddress(aBuffer, aBufferSize, aFrameNumber, aPC,
+                       aDetails->function, aDetails->library, aDetails->loffset,
+                       aDetails->filename, aDetails->lineno);
+}
+
+MFBT_API void MozFormatCodeAddress(char* aBuffer, uint32_t aBufferSize,
+                                   uint32_t aFrameNumber, const void* aPC,
+                                   const char* aFunction, const char* aLibrary,
+                                   ptrdiff_t aLOffset, const char* aFileName,
+                                   uint32_t aLineNo) {
+  const char* function = aFunction && aFunction[0] ? aFunction : "???";
+  if (aFileName && aFileName[0]) {
+    // We have a filename and (presumably) a line number. Use them.
+    snprintf(aBuffer, aBufferSize, "#%02u: %s (%s:%u)", aFrameNumber, function,
+             aFileName, aLineNo);
+  } else if (aLibrary && aLibrary[0]) {
+    // We have no filename, but we do have a library name. Use it and the
+    // library offset, and print them in a way that `fix_stacks.py` can
+    // post-process.
+    snprintf(aBuffer, aBufferSize, "#%02u: %s[%s +0x%" PRIxPTR "]",
+             aFrameNumber, function, aLibrary,
+             static_cast<uintptr_t>(aLOffset));
+  } else {
+    // We have nothing useful to go on. (The format string is split because
+    // '??)' is a trigraph and causes a warning, sigh.)
+    snprintf(aBuffer, aBufferSize,
+             "#%02u: ??? (???:???"
+             ")",
+             aFrameNumber);
+  }
+}
diff --git a/mozglue/misc/StackWalk.h b/mozglue/misc/StackWalk.h
new file mode 100644
index 0000000000..3ea9de8091
--- /dev/null
+++ b/mozglue/misc/StackWalk.h
@@ -0,0 +1,177 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* APIs for getting a stack trace of the current thread */
+
+#ifndef mozilla_StackWalk_h
+#define mozilla_StackWalk_h
+
+#include "mozilla/Types.h"
+#include <stdint.h>
+
+/**
+ * The callback for MozStackWalk and MozStackWalkThread.
+ *
+ * @param aFrameNumber  The frame number (starts at 1, not 0).
+ * @param aPC           The program counter value.
+ * @param aSP           The best approximation possible of what the stack
+ *                      pointer will be pointing to when the execution returns
+ *                      to executing that at aPC. If no approximation can
+ *                      be made it will be nullptr.
+ * @param aClosure      Extra data passed in from MozStackWalk() or
+ *                      MozStackWalkThread().
+ */
+typedef void (*MozWalkStackCallback)(uint32_t aFrameNumber, void* aPC,
+                                     void* aSP, void* aClosure);
+
+/**
+ * Call aCallback for each stack frame on the current thread, from
+ * the caller of MozStackWalk to main (or above).
+ *
+ * @param aCallback    Callback function, called once per frame.
+ * @param aSkipFrames  Number of initial frames to skip.  0 means that
+ *                     the first callback will be for the caller of
+ *                     MozStackWalk.
+ * @param aMaxFrames   Maximum number of frames to trace.  0 means no limit.
+ * @param aClosure     Caller-supplied data passed through to aCallback.
+ *
+ * May skip some stack frames due to compiler optimizations or code
+ * generation.
+ */
+MFBT_API void MozStackWalk(MozWalkStackCallback aCallback, uint32_t aSkipFrames,
+                           uint32_t aMaxFrames, void* aClosure);
+
+#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_AMD64) || \
+                        defined(_M_IA64) || defined(_M_ARM64))
+
+#  include <windows.h>
+
+#  define MOZ_STACKWALK_SUPPORTS_WINDOWS 1
+
+/**
+ * Like MozStackWalk, but walks the stack for another thread.
+ * Call aCallback for each stack frame on the current thread, from
+ * the caller of MozStackWalk to main (or above).
+ *
+ * @param aCallback    Same as for MozStackWalk().
+ * @param aSkipFrames  Same as for MozStackWalk().
+ * @param aMaxFrames   Same as for MozStackWalk().
+ * @param aClosure     Same as for MozStackWalk().
+ * @param aThread      The handle of the thread whose stack is to be walked.
+ *                     If 0, walks the current thread.
+ * @param aContext     A CONTEXT, presumably obtained with GetThreadContext()
+ *                     after suspending the thread with SuspendThread(). If
+ *                     null, the CONTEXT will be re-obtained.
+ */
+MFBT_API void MozStackWalkThread(MozWalkStackCallback aCallback,
+                                 uint32_t aSkipFrames, uint32_t aMaxFrames,
+                                 void* aClosure, HANDLE aThread,
+                                 CONTEXT* aContext);
+
+#else
+
+#  define MOZ_STACKWALK_SUPPORTS_WINDOWS 0
+
+#endif
+
+typedef struct {
+  /*
+   * The name of the shared library or executable containing an
+   * address and the address's offset within that library, or empty
+   * string and zero if unknown.
+   */
+  char library[256];
+  ptrdiff_t loffset;
+  /*
+   * The name of the file name and line number of the code
+   * corresponding to the address, or empty string and zero if
+   * unknown.
+   */
+  char filename[256];
+  unsigned long lineno;
+  /*
+   * The name of the function containing an address and the address's
+   * offset within that function, or empty string and zero if unknown.
+   */
+  char function[256];
+  ptrdiff_t foffset;
+} MozCodeAddressDetails;
+
+/**
+ * For a given pointer to code, fill in the pieces of information used
+ * when printing a stack trace.
+ *
+ * @param aPC         The code address.
+ * @param aDetails    A structure to be filled in with the result.
+ */
+MFBT_API bool MozDescribeCodeAddress(void* aPC,
+                                     MozCodeAddressDetails* aDetails);
+
+/**
+ * Format the information about a code address in a format suitable for
+ * stack traces on the current platform.  When available, this string
+ * should contain the function name, source file, and line number.  When
+ * these are not available, library and offset should be reported, if
+ * possible.
+ *
+ * Note that this output is parsed by several scripts including the fix*.py and
+ * make-tree.pl scripts in tools/rb/. It should only be change with care, and
+ * in conjunction with those scripts.
+ *
+ * @param aBuffer      A string to be filled in with the description.
+ *                     The string will always be null-terminated.
+ * @param aBufferSize  The size, in bytes, of aBuffer, including
+ *                     room for the terminating null.  If the information
+ *                     to be printed would be larger than aBuffer, it
+ *                     will be truncated so that aBuffer[aBufferSize-1]
+ *                     is the terminating null.
+ * @param aFrameNumber The frame number.
+ * @param aPC          The code address.
+ * @param aFunction    The function name. Possibly null or the empty string.
+ * @param aLibrary     The library name. Possibly null or the empty string.
+ * @param aLOffset     The library offset.
+ * @param aFileName    The filename. Possibly null or the empty string.
+ * @param aLineNo      The line number. Possibly zero.
+ */
+MFBT_API void MozFormatCodeAddress(char* aBuffer, uint32_t aBufferSize,
+                                   uint32_t aFrameNumber, const void* aPC,
+                                   const char* aFunction, const char* aLibrary,
+                                   ptrdiff_t aLOffset, const char* aFileName,
+                                   uint32_t aLineNo);
+
+/**
+ * Format the information about a code address in the same fashion as
+ * MozFormatCodeAddress.
+ *
+ * @param aBuffer      A string to be filled in with the description.
+ *                     The string will always be null-terminated.
+ * @param aBufferSize  The size, in bytes, of aBuffer, including
+ *                     room for the terminating null.  If the information
+ *                     to be printed would be larger than aBuffer, it
+ *                     will be truncated so that aBuffer[aBufferSize-1]
+ *                     is the terminating null.
+ * @param aFrameNumber The frame number.
+ * @param aPC          The code address.
+ * @param aDetails     The value filled in by MozDescribeCodeAddress(aPC).
+ */
+MFBT_API void MozFormatCodeAddressDetails(
+    char* aBuffer, uint32_t aBufferSize, uint32_t aFrameNumber, void* aPC,
+    const MozCodeAddressDetails* aDetails);
+
+namespace mozilla {
+
+MFBT_API void FramePointerStackWalk(MozWalkStackCallback aCallback,
+                                    uint32_t aSkipFrames, uint32_t aMaxFrames,
+                                    void* aClosure, void** aBp,
+                                    void* aStackEnd);
+
+#if defined(XP_LINUX) || defined(XP_FREEBSD)
+MFBT_API void DemangleSymbol(const char* aSymbol, char* aBuffer, int aBufLen);
+#endif
+
+}  // namespace mozilla
+
+#endif
diff --git a/mozglue/misc/StackWalk_windows.h b/mozglue/misc/StackWalk_windows.h
new file mode 100644
index 0000000000..dca4d41064
--- /dev/null
+++ b/mozglue/misc/StackWalk_windows.h
@@ -0,0 +1,34 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_StackWalk_windows_h
+#define mozilla_StackWalk_windows_h
+
+#include "mozilla/Types.h"
+
+#if defined(_M_AMD64) || defined(_M_ARM64)
+/**
+ * Allow stack walkers to work around the egregious win64 dynamic lookup table
+ * list API by locking around SuspendThread to avoid deadlock.
+ *
+ * See comment in StackWalk.cpp
+ */
+struct MOZ_RAII AutoSuppressStackWalking {
+  MFBT_API AutoSuppressStackWalking();
+  MFBT_API ~AutoSuppressStackWalking();
+};
+
+#  if defined(IMPL_MFBT)
+void SuppressStackWalking();
+void DesuppressStackWalking();
+#  endif  // defined(IMPL_MFBT)
+
+MFBT_API void RegisterJitCodeRegion(uint8_t* aStart, size_t size);
+
+MFBT_API void UnregisterJitCodeRegion(uint8_t* aStart, size_t size);
+#endif  // _M_AMD64
+
+#endif  // mozilla_StackWalk_windows_h
diff --git a/mozglue/misc/TimeStamp.cpp b/mozglue/misc/TimeStamp.cpp
new file mode 100644
index 0000000000..0c02413e98
--- /dev/null
+++ b/mozglue/misc/TimeStamp.cpp
@@ -0,0 +1,154 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Implementation of the OS-independent methods of the TimeStamp class
+ */
+
+#include "mozilla/Atomics.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Uptime.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+namespace mozilla {
+
+/**
+ * Wrapper class used to initialize static data used by the TimeStamp class
+ */
+struct TimeStampInitialization {
+  /**
+   * First timestamp taken when the class static initializers are run. This
+   * timestamp is used to sanitize timestamps coming from different sources.
+   */
+  TimeStamp mFirstTimeStamp;
+
+  /**
+   * Timestamp representing the time when the process was created. This field
+   * is populated lazily the first time this information is required and is
+   * replaced every time the process is restarted.
+   */
+  TimeStamp mProcessCreation;
+
+  TimeStampInitialization() {
+    TimeStamp::Startup();
+    mFirstTimeStamp = TimeStamp::Now();
+    // On Windows < 10, initializing the uptime requires `mFirstTimeStamp` to be
+    // valid.
+    mozilla::InitializeUptime();
+  };
+
+  ~TimeStampInitialization() { TimeStamp::Shutdown(); };
+};
+
+static bool sFuzzyfoxEnabled;
+
+/* static */
+bool TimeStamp::GetFuzzyfoxEnabled() { return sFuzzyfoxEnabled; }
+
+/* static */
+void TimeStamp::SetFuzzyfoxEnabled(bool aValue) { sFuzzyfoxEnabled = aValue; }
+
+// These variables store the frozen time (as a TimeStamp) for FuzzyFox that
+// will be reported if FuzzyFox is enabled.
+// We overload the top bit of sCanonicalNow and sCanonicalGTC to
+// indicate if a Timestamp is a fuzzed timestamp (bit set) or not
+// (bit unset).
+#ifdef XP_WIN
+static Atomic<uint64_t> sCanonicalGTC;
+static Atomic<uint64_t> sCanonicalQPC;
+static Atomic<bool> sCanonicalHasQPC;
+#else
+static Atomic<uint64_t> sCanonicalNowTimeStamp;
+#endif
+static Atomic<int64_t> sCanonicalNowTime;
+// This variable stores the frozen time (as ms since the epoch) for FuzzyFox
+// to report if FuzzyFox is enabled.
+static TimeStampInitialization sInitOnce;
+
+MFBT_API TimeStamp TimeStamp::ProcessCreation(bool* aIsInconsistent) {
+  if (aIsInconsistent) {
+    *aIsInconsistent = false;
+  }
+
+  if (sInitOnce.mProcessCreation.IsNull()) {
+    char* mozAppRestart = getenv("MOZ_APP_RESTART");
+    TimeStamp ts;
+
+    /* When calling PR_SetEnv() with an empty value the existing variable may
+     * be unset or set to the empty string depending on the underlying platform
+     * thus we have to check if the variable is present and not empty. */
+    if (mozAppRestart && (strcmp(mozAppRestart, "") != 0)) {
+      /* Firefox was restarted, use the first time-stamp we've taken as the new
+       * process startup time. */
+      ts = sInitOnce.mFirstTimeStamp;
+    } else {
+      TimeStamp now = Now();
+      uint64_t uptime = ComputeProcessUptime();
+
+      ts = now - TimeDuration::FromMicroseconds(uptime);
+
+      if ((ts > sInitOnce.mFirstTimeStamp) || (uptime == 0)) {
+        /* If the process creation timestamp was inconsistent replace it with
+         * the first one instead and notify that a telemetry error was
+         * detected. */
+        if (aIsInconsistent) {
+          *aIsInconsistent = true;
+        }
+        ts = sInitOnce.mFirstTimeStamp;
+      }
+    }
+
+    sInitOnce.mProcessCreation = ts;
+  }
+
+  return sInitOnce.mProcessCreation;
+}
+
+void TimeStamp::RecordProcessRestart() {
+  sInitOnce.mProcessCreation = TimeStamp();
+}
+
+MFBT_API TimeStamp TimeStamp::NowFuzzy(TimeStampValue aValue) {
+#ifdef XP_WIN
+  TimeStampValue canonicalNow =
+      TimeStampValue(sCanonicalGTC, sCanonicalQPC, sCanonicalHasQPC, true);
+#else
+  TimeStampValue canonicalNow = TimeStampValue(sCanonicalNowTimeStamp);
+#endif
+
+  if (TimeStamp::GetFuzzyfoxEnabled()) {
+    if (MOZ_LIKELY(!canonicalNow.IsNull())) {
+      return TimeStamp(canonicalNow);
+    }
+  }
+  // When we disable Fuzzyfox, time may goes backwards, so we need to make sure
+  // we don't do that.
+  else if (MOZ_UNLIKELY(canonicalNow > aValue)) {
+    return TimeStamp(canonicalNow);
+  }
+
+  return TimeStamp(aValue);
+}
+
+MFBT_API void TimeStamp::UpdateFuzzyTimeStamp(TimeStamp aValue) {
+#ifdef XP_WIN
+  sCanonicalGTC = aValue.mValue.mGTC;
+  sCanonicalQPC = aValue.mValue.mQPC;
+  sCanonicalHasQPC = aValue.mValue.mHasQPC;
+#else
+  sCanonicalNowTimeStamp = aValue.mValue.mTimeStamp;
+#endif
+}
+
+MFBT_API int64_t TimeStamp::NowFuzzyTime() { return sCanonicalNowTime; }
+
+MFBT_API void TimeStamp::UpdateFuzzyTime(int64_t aValue) {
+  sCanonicalNowTime = aValue;
+}
+
+}  // namespace mozilla
diff --git a/mozglue/misc/TimeStamp.h b/mozglue/misc/TimeStamp.h
new file mode 100644
index 0000000000..9b2521955f
--- /dev/null
+++ b/mozglue/misc/TimeStamp.h
@@ -0,0 +1,615 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_TimeStamp_h
+#define mozilla_TimeStamp_h
+
+#include <stdint.h>
+#include <algorithm>  // for std::min, std::max
+#include <ostream>
+#include <type_traits>
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/FloatingPoint.h"
+#include "mozilla/Types.h"
+
+namespace IPC {
+template <typename T>
+struct ParamTraits;
+}  // namespace IPC
+
+#ifdef XP_WIN
+// defines TimeStampValue as a complex value keeping both
+// GetTickCount and QueryPerformanceCounter values
+#  include "TimeStamp_windows.h"
+#endif
+
+namespace mozilla {
+
+#ifndef XP_WIN
+struct TimeStamp63Bit {
+  uint64_t mUsedCanonicalNow : 1;
+  uint64_t mTimeStamp : 63;
+
+  constexpr TimeStamp63Bit() : mUsedCanonicalNow(0), mTimeStamp(0) {}
+
+  MOZ_IMPLICIT constexpr TimeStamp63Bit(const uint64_t aValue)
+      : mUsedCanonicalNow(0), mTimeStamp(aValue) {}
+
+  constexpr TimeStamp63Bit(const bool aUsedCanonicalNow,
+                           const int64_t aTimeStamp)
+      : mUsedCanonicalNow(aUsedCanonicalNow ? 1 : 0), mTimeStamp(aTimeStamp) {}
+
+  bool operator==(const TimeStamp63Bit aOther) const {
+    uint64_t here, there;
+    memcpy(&here, this, sizeof(TimeStamp63Bit));
+    memcpy(&there, &aOther, sizeof(TimeStamp63Bit));
+    return here == there;
+  }
+
+  operator uint64_t() const { return mTimeStamp; }
+
+  bool IsNull() const { return mTimeStamp == 0; }
+
+  bool UsedCanonicalNow() const { return mUsedCanonicalNow; }
+
+  void SetCanonicalNow() { mUsedCanonicalNow = 1; }
+};
+
+typedef TimeStamp63Bit TimeStampValue;
+#endif
+
+class TimeStamp;
+
+/**
+ * Platform-specific implementation details of BaseTimeDuration.
+ */
+class BaseTimeDurationPlatformUtils {
+ public:
+  static MFBT_API double ToSeconds(int64_t aTicks);
+  static MFBT_API double ToSecondsSigDigits(int64_t aTicks);
+  static MFBT_API int64_t TicksFromMilliseconds(double aMilliseconds);
+  static MFBT_API int64_t ResolutionInTicks();
+};
+
+/**
+ * Instances of this class represent the length of an interval of time.
+ * Negative durations are allowed, meaning the end is before the start.
+ *
+ * Internally the duration is stored as a int64_t in units of
+ * PR_TicksPerSecond() when building with NSPR interval timers, or a
+ * system-dependent unit when building with system clocks.  The
+ * system-dependent unit must be constant, otherwise the semantics of
+ * this class would be broken.
+ *
+ * The ValueCalculator template parameter determines how arithmetic
+ * operations are performed on the integer count of ticks (mValue).
+ */
+template <typename ValueCalculator>
+class BaseTimeDuration {
+ public:
+  // The default duration is 0.
+  constexpr BaseTimeDuration() : mValue(0) {}
+  // Allow construction using '0' as the initial value, for readability,
+  // but no other numbers (so we don't have any implicit unit conversions).
+  struct _SomethingVeryRandomHere;
+  MOZ_IMPLICIT BaseTimeDuration(_SomethingVeryRandomHere* aZero) : mValue(0) {
+    MOZ_ASSERT(!aZero, "Who's playing funny games here?");
+  }
+  // Default copy-constructor and assignment are OK
+
+  // Converting copy-constructor and assignment operator
+  template <typename E>
+  explicit BaseTimeDuration(const BaseTimeDuration<E>& aOther)
+      : mValue(aOther.mValue) {}
+
+  template <typename E>
+  BaseTimeDuration& operator=(const BaseTimeDuration<E>& aOther) {
+    mValue = aOther.mValue;
+    return *this;
+  }
+
+  double ToSeconds() const {
+    if (mValue == INT64_MAX) {
+      return PositiveInfinity<double>();
+    }
+    if (mValue == INT64_MIN) {
+      return NegativeInfinity<double>();
+    }
+    return BaseTimeDurationPlatformUtils::ToSeconds(mValue);
+  }
+  // Return a duration value that includes digits of time we think to
+  // be significant.  This method should be used when displaying a
+  // time to humans.
+  double ToSecondsSigDigits() const {
+    if (mValue == INT64_MAX) {
+      return PositiveInfinity<double>();
+    }
+    if (mValue == INT64_MIN) {
+      return NegativeInfinity<double>();
+    }
+    return BaseTimeDurationPlatformUtils::ToSecondsSigDigits(mValue);
+  }
+  double ToMilliseconds() const { return ToSeconds() * 1000.0; }
+  double ToMicroseconds() const { return ToMilliseconds() * 1000.0; }
+
+  // Using a double here is safe enough; with 53 bits we can represent
+  // durations up to over 280,000 years exactly.  If the units of
+  // mValue do not allow us to represent durations of that length,
+  // long durations are clamped to the max/min representable value
+  // instead of overflowing.
+  static inline BaseTimeDuration FromSeconds(double aSeconds) {
+    return FromMilliseconds(aSeconds * 1000.0);
+  }
+  static BaseTimeDuration FromMilliseconds(double aMilliseconds) {
+    if (aMilliseconds == PositiveInfinity<double>()) {
+      return Forever();
+    }
+    if (aMilliseconds == NegativeInfinity<double>()) {
+      return FromTicks(INT64_MIN);
+    }
+    return FromTicks(
+        BaseTimeDurationPlatformUtils::TicksFromMilliseconds(aMilliseconds));
+  }
+  static inline BaseTimeDuration FromMicroseconds(double aMicroseconds) {
+    return FromMilliseconds(aMicroseconds / 1000.0);
+  }
+
+  static constexpr BaseTimeDuration Forever() { return FromTicks(INT64_MAX); }
+
+  BaseTimeDuration operator+(const BaseTimeDuration& aOther) const {
+    return FromTicks(ValueCalculator::Add(mValue, aOther.mValue));
+  }
+  BaseTimeDuration operator-(const BaseTimeDuration& aOther) const {
+    return FromTicks(ValueCalculator::Subtract(mValue, aOther.mValue));
+  }
+  BaseTimeDuration& operator+=(const BaseTimeDuration& aOther) {
+    mValue = ValueCalculator::Add(mValue, aOther.mValue);
+    return *this;
+  }
+  BaseTimeDuration& operator-=(const BaseTimeDuration& aOther) {
+    mValue = ValueCalculator::Subtract(mValue, aOther.mValue);
+    return *this;
+  }
+  BaseTimeDuration operator-() const {
+    // We don't just use FromTicks(ValueCalculator::Subtract(0, mValue))
+    // since that won't give the correct result for -TimeDuration::Forever().
+    int64_t ticks;
+    if (mValue == INT64_MAX) {
+      ticks = INT64_MIN;
+    } else if (mValue == INT64_MIN) {
+      ticks = INT64_MAX;
+    } else {
+      ticks = -mValue;
+    }
+
+    return FromTicks(ticks);
+  }
+
+  static BaseTimeDuration Max(const BaseTimeDuration& aA,
+                              const BaseTimeDuration& aB) {
+    return FromTicks(std::max(aA.mValue, aB.mValue));
+  }
+  static BaseTimeDuration Min(const BaseTimeDuration& aA,
+                              const BaseTimeDuration& aB) {
+    return FromTicks(std::min(aA.mValue, aB.mValue));
+  }
+
+ private:
+  // Block double multiplier (slower, imprecise if long duration) - Bug 853398.
+  // If required, use MultDouble explicitly and with care.
+  BaseTimeDuration operator*(const double aMultiplier) const = delete;
+
+  // Block double divisor (for the same reason, and because dividing by
+  // fractional values would otherwise invoke the int64_t variant, and rounding
+  // the passed argument can then cause divide-by-zero) - Bug 1147491.
+  BaseTimeDuration operator/(const double aDivisor) const = delete;
+
+ public:
+  BaseTimeDuration MultDouble(double aMultiplier) const {
+    return FromTicks(ValueCalculator::Multiply(mValue, aMultiplier));
+  }
+  BaseTimeDuration operator*(const int32_t aMultiplier) const {
+    return FromTicks(ValueCalculator::Multiply(mValue, aMultiplier));
+  }
+  BaseTimeDuration operator*(const uint32_t aMultiplier) const {
+    return FromTicks(ValueCalculator::Multiply(mValue, aMultiplier));
+  }
+  BaseTimeDuration operator*(const int64_t aMultiplier) const {
+    return FromTicks(ValueCalculator::Multiply(mValue, aMultiplier));
+  }
+  BaseTimeDuration operator*(const uint64_t aMultiplier) const {
+    if (aMultiplier > INT64_MAX) {
+      return Forever();
+    }
+    return FromTicks(ValueCalculator::Multiply(mValue, aMultiplier));
+  }
+  BaseTimeDuration operator/(const int64_t aDivisor) const {
+    MOZ_ASSERT(aDivisor != 0, "Division by zero");
+    return FromTicks(ValueCalculator::Divide(mValue, aDivisor));
+  }
+  double operator/(const BaseTimeDuration& aOther) const {
+    MOZ_ASSERT(aOther.mValue != 0, "Division by zero");
+    return ValueCalculator::DivideDouble(mValue, aOther.mValue);
+  }
+  BaseTimeDuration operator%(const BaseTimeDuration& aOther) const {
+    MOZ_ASSERT(aOther.mValue != 0, "Division by zero");
+    return FromTicks(ValueCalculator::Modulo(mValue, aOther.mValue));
+  }
+
+  template <typename E>
+  bool operator<(const BaseTimeDuration<E>& aOther) const {
+    return mValue < aOther.mValue;
+  }
+  template <typename E>
+  bool operator<=(const BaseTimeDuration<E>& aOther) const {
+    return mValue <= aOther.mValue;
+  }
+  template <typename E>
+  bool operator>=(const BaseTimeDuration<E>& aOther) const {
+    return mValue >= aOther.mValue;
+  }
+  template <typename E>
+  bool operator>(const BaseTimeDuration<E>& aOther) const {
+    return mValue > aOther.mValue;
+  }
+  template <typename E>
+  bool operator==(const BaseTimeDuration<E>& aOther) const {
+    return mValue == aOther.mValue;
+  }
+  template <typename E>
+  bool operator!=(const BaseTimeDuration<E>& aOther) const {
+    return mValue != aOther.mValue;
+  }
+  bool IsZero() const { return mValue == 0; }
+  explicit operator bool() const { return mValue != 0; }
+
+  friend std::ostream& operator<<(std::ostream& aStream,
+                                  const BaseTimeDuration& aDuration) {
+    return aStream << aDuration.ToMilliseconds() << " ms";
+  }
+
+  // Return a best guess at the system's current timing resolution,
+  // which might be variable.  BaseTimeDurations below this order of
+  // magnitude are meaningless, and those at the same order of
+  // magnitude or just above are suspect.
+  static BaseTimeDuration Resolution() {
+    return FromTicks(BaseTimeDurationPlatformUtils::ResolutionInTicks());
+  }
+
+  // We could define additional operators here:
+  // -- convert to/from other time units
+  // -- scale duration by a float
+  // but let's do that on demand.
+  // Comparing durations for equality will only lead to bugs on
+  // platforms with high-resolution timers.
+
+ private:
+  friend class TimeStamp;
+  friend struct IPC::ParamTraits<mozilla::BaseTimeDuration<ValueCalculator>>;
+  template <typename>
+  friend class BaseTimeDuration;
+
+  static BaseTimeDuration FromTicks(int64_t aTicks) {
+    BaseTimeDuration t;
+    t.mValue = aTicks;
+    return t;
+  }
+
+  static BaseTimeDuration FromTicks(double aTicks) {
+    // NOTE: this MUST be a >= test, because int64_t(double(INT64_MAX))
+    // overflows and gives INT64_MIN.
+    if (aTicks >= double(INT64_MAX)) {
+      return FromTicks(INT64_MAX);
+    }
+
+    // This MUST be a <= test.
+    if (aTicks <= double(INT64_MIN)) {
+      return FromTicks(INT64_MIN);
+    }
+
+    return FromTicks(int64_t(aTicks));
+  }
+
+  // Duration, result is implementation-specific difference of two TimeStamps
+  int64_t mValue;
+};
+
+/**
+ * Perform arithmetic operations on the value of a BaseTimeDuration without
+ * doing strict checks on the range of values.
+ */
+class TimeDurationValueCalculator {
+ public:
+  static int64_t Add(int64_t aA, int64_t aB) { return aA + aB; }
+  static int64_t Subtract(int64_t aA, int64_t aB) { return aA - aB; }
+
+  template <typename T>
+  static int64_t Multiply(int64_t aA, T aB) {
+    static_assert(std::is_integral_v<T>,
+                  "Using integer multiplication routine with non-integer type."
+                  " Further specialization required");
+    return aA * static_cast<int64_t>(aB);
+  }
+
+  static int64_t Divide(int64_t aA, int64_t aB) { return aA / aB; }
+  static double DivideDouble(int64_t aA, int64_t aB) {
+    return static_cast<double>(aA) / aB;
+  }
+  static int64_t Modulo(int64_t aA, int64_t aB) { return aA % aB; }
+};
+
+template <>
+inline int64_t TimeDurationValueCalculator::Multiply<double>(int64_t aA,
+                                                             double aB) {
+  return static_cast<int64_t>(aA * aB);
+}
+
+/**
+ * Specialization of BaseTimeDuration that uses TimeDurationValueCalculator for
+ * arithmetic on the mValue member.
+ *
+ * Use this class for time durations that are *not* expected to hold values of
+ * Forever (or the negative equivalent) or when such time duration are *not*
+ * expected to be used in arithmetic operations.
+ */
+typedef BaseTimeDuration<TimeDurationValueCalculator> TimeDuration;
+
+/**
+ * Instances of this class represent moments in time, or a special
+ * "null" moment. We do not use the non-monotonic system clock or
+ * local time, since they can be reset, causing apparent backward
+ * travel in time, which can confuse algorithms. Instead we measure
+ * elapsed time according to the system.  This time can never go
+ * backwards (i.e. it never wraps around, at least not in less than
+ * five million years of system elapsed time). It might not advance
+ * while the system is sleeping. If TimeStamp::SetNow() is not called
+ * at all for hours or days, we might not notice the passage of some
+ * of that time.
+ *
+ * We deliberately do not expose a way to convert TimeStamps to some
+ * particular unit. All you can do is compute a difference between two
+ * TimeStamps to get a TimeDuration. You can also add a TimeDuration
+ * to a TimeStamp to get a new TimeStamp. You can't do something
+ * meaningless like add two TimeStamps.
+ *
+ * Internally this is implemented as either a wrapper around
+ *   - high-resolution, monotonic, system clocks if they exist on this
+ *     platform
+ *   - PRIntervalTime otherwise.  We detect wraparounds of
+ *     PRIntervalTime and work around them.
+ *
+ * This class is similar to C++11's time_point, however it is
+ * explicitly nullable and provides an IsNull() method. time_point
+ * is initialized to the clock's epoch and provides a
+ * time_since_epoch() method that functions similiarly. i.e.
+ * t.IsNull() is equivalent to t.time_since_epoch() ==
+ * decltype(t)::duration::zero();
+ *
+ * Note that, since TimeStamp objects are small, prefer to pass them by value
+ * unless there is a specific reason not to do so.
+ */
+class TimeStamp {
+ public:
+  /**
+   * Initialize to the "null" moment
+   */
+  constexpr TimeStamp() : mValue() {}
+  // Default copy-constructor and assignment are OK
+
+  /**
+   * The system timestamps are the same as the TimeStamp
+   * retrieved by mozilla::TimeStamp. Since we need this for
+   * vsync timestamps, we enable the creation of mozilla::TimeStamps
+   * on platforms that support vsync aligned refresh drivers / compositors
+   * Verified true as of Jan 31, 2015: B2G and OS X
+   * False on Windows 7
+   * Android's event time uses CLOCK_MONOTONIC via SystemClock.uptimeMilles.
+   * So it is same value of TimeStamp posix implementation.
+   * Wayland/GTK event time also uses CLOCK_MONOTONIC on Weston/Mutter
+   * compositors.
+   * UNTESTED ON OTHER PLATFORMS
+   */
+#if defined(XP_DARWIN) || defined(MOZ_WIDGET_ANDROID) || defined(MOZ_WIDGET_GTK)
+  static TimeStamp FromSystemTime(int64_t aSystemTime) {
+    static_assert(sizeof(aSystemTime) == sizeof(TimeStampValue),
+                  "System timestamp should be same units as TimeStampValue");
+    return TimeStamp(TimeStampValue(false, aSystemTime));
+  }
+#endif
+
+  /**
+   * Return true if this is the "null" moment
+   */
+  bool IsNull() const { return mValue.IsNull(); }
+
+  /**
+   * Return true if this is not the "null" moment, may be used in tests, e.g.:
+   * |if (timestamp) { ... }|
+   */
+  explicit operator bool() const { return !IsNull(); }
+
+  bool UsedCanonicalNow() const { return mValue.UsedCanonicalNow(); }
+  static MFBT_API bool GetFuzzyfoxEnabled();
+  static MFBT_API void SetFuzzyfoxEnabled(bool aValue);
+
+  /**
+   * Return a timestamp reflecting the current elapsed system time. This
+   * is monotonically increasing (i.e., does not decrease) over the
+   * lifetime of this process' XPCOM session.
+   *
+   * Now() is trying to ensure the best possible precision on each platform,
+   * at least one millisecond.
+   *
+   * NowLoRes() has been introduced to workaround performance problems of
+   * QueryPerformanceCounter on the Windows platform.  NowLoRes() is giving
+   * lower precision, usually 15.6 ms, but with very good performance benefit.
+   * Use it for measurements of longer times, like >200ms timeouts.
+   */
+  static TimeStamp Now() { return Now(true); }
+  static TimeStamp NowLoRes() { return Now(false); }
+  static TimeStamp NowUnfuzzed() { return NowUnfuzzed(true); }
+
+  static MFBT_API int64_t NowFuzzyTime();
+  /**
+   * Return a timestamp representing the time when the current process was
+   * created which will be comparable with other timestamps taken with this
+   * class. If the actual process creation time is detected to be inconsistent
+   * the @a aIsInconsistent parameter will be set to true, the returned
+   * timestamp however will still be valid though inaccurate.
+   *
+   * @param aIsInconsistent If non-null, set to true if an inconsistency was
+   * detected in the process creation time
+   * @returns A timestamp representing the time when the process was created,
+   * this timestamp is always valid even when errors are reported
+   */
+  static MFBT_API TimeStamp ProcessCreation(bool* aIsInconsistent = nullptr);
+
+  /**
+   * Records a process restart. After this call ProcessCreation() will return
+   * the time when the browser was restarted instead of the actual time when
+   * the process was created.
+   */
+  static MFBT_API void RecordProcessRestart();
+
+  /**
+   * Compute the difference between two timestamps. Both must be non-null.
+   */
+  TimeDuration operator-(const TimeStamp& aOther) const {
+    MOZ_ASSERT(!IsNull(), "Cannot compute with a null value");
+    MOZ_ASSERT(!aOther.IsNull(), "Cannot compute with aOther null value");
+    static_assert(-INT64_MAX > INT64_MIN, "int64_t sanity check");
+    int64_t ticks = int64_t(mValue - aOther.mValue);
+    // Check for overflow.
+    if (mValue > aOther.mValue) {
+      if (ticks < 0) {
+        ticks = INT64_MAX;
+      }
+    } else {
+      if (ticks > 0) {
+        ticks = INT64_MIN;
+      }
+    }
+    return TimeDuration::FromTicks(ticks);
+  }
+
+  TimeStamp operator+(const TimeDuration& aOther) const {
+    TimeStamp result = *this;
+    result += aOther;
+    return result;
+  }
+  TimeStamp operator-(const TimeDuration& aOther) const {
+    TimeStamp result = *this;
+    result -= aOther;
+    return result;
+  }
+  TimeStamp& operator+=(const TimeDuration& aOther) {
+    MOZ_ASSERT(!IsNull(), "Cannot compute with a null value");
+    TimeStampValue value = mValue + aOther.mValue;
+    // Check for underflow.
+    // (We don't check for overflow because it's not obvious what the error
+    //  behavior should be in that case.)
+    if (aOther.mValue < 0 && value > mValue) {
+      value = TimeStampValue();
+    }
+    if (mValue.UsedCanonicalNow()) {
+      value.SetCanonicalNow();
+    }
+    mValue = value;
+    return *this;
+  }
+  TimeStamp& operator-=(const TimeDuration& aOther) {
+    MOZ_ASSERT(!IsNull(), "Cannot compute with a null value");
+    TimeStampValue value = mValue - aOther.mValue;
+    // Check for underflow.
+    // (We don't check for overflow because it's not obvious what the error
+    //  behavior should be in that case.)
+    if (aOther.mValue > 0 && value > mValue) {
+      value = TimeStampValue();
+    }
+    if (mValue.UsedCanonicalNow()) {
+      value.SetCanonicalNow();
+    }
+    mValue = value;
+    return *this;
+  }
+
+  bool operator<(const TimeStamp& aOther) const {
+    MOZ_ASSERT(!IsNull(), "Cannot compute with a null value");
+    MOZ_ASSERT(!aOther.IsNull(), "Cannot compute with aOther null value");
+    return mValue < aOther.mValue;
+  }
+  bool operator<=(const TimeStamp& aOther) const {
+    MOZ_ASSERT(!IsNull(), "Cannot compute with a null value");
+    MOZ_ASSERT(!aOther.IsNull(), "Cannot compute with aOther null value");
+    return mValue <= aOther.mValue;
+  }
+  bool operator>=(const TimeStamp& aOther) const {
+    MOZ_ASSERT(!IsNull(), "Cannot compute with a null value");
+    MOZ_ASSERT(!aOther.IsNull(), "Cannot compute with aOther null value");
+    return mValue >= aOther.mValue;
+  }
+  bool operator>(const TimeStamp& aOther) const {
+    MOZ_ASSERT(!IsNull(), "Cannot compute with a null value");
+    MOZ_ASSERT(!aOther.IsNull(), "Cannot compute with aOther null value");
+    return mValue > aOther.mValue;
+  }
+  bool operator==(const TimeStamp& aOther) const {
+    return IsNull() ? aOther.IsNull()
+                    : !aOther.IsNull() && mValue == aOther.mValue;
+  }
+  bool operator!=(const TimeStamp& aOther) const { return !(*this == aOther); }
+
+  // Comparing TimeStamps for equality should be discouraged. Adding
+  // two TimeStamps, or scaling TimeStamps, is nonsense and must never
+  // be allowed.
+
+  static MFBT_API void Startup();
+  static MFBT_API void Shutdown();
+
+ private:
+  friend struct IPC::ParamTraits<mozilla::TimeStamp>;
+
+  MOZ_IMPLICIT TimeStamp(TimeStampValue aValue) : mValue(aValue) {}
+
+  static MFBT_API TimeStamp Now(bool aHighResolution);
+  static MFBT_API TimeStamp NowUnfuzzed(bool aHighResolution);
+  static MFBT_API TimeStamp NowFuzzy(TimeStampValue aValue);
+
+  static MFBT_API void UpdateFuzzyTime(int64_t aValue);
+  static MFBT_API void UpdateFuzzyTimeStamp(TimeStamp aValue);
+
+  /**
+   * Computes the uptime of the current process in microseconds. The result
+   * is platform-dependent and needs to be checked against existing timestamps
+   * for consistency.
+   *
+   * @returns The number of microseconds since the calling process was started
+   *          or 0 if an error was encountered while computing the uptime
+   */
+  static MFBT_API uint64_t ComputeProcessUptime();
+
+  /**
+   * When built with PRIntervalTime, a value of 0 means this instance
+   * is "null". Otherwise, the low 32 bits represent a PRIntervalTime,
+   * and the high 32 bits represent a counter of the number of
+   * rollovers of PRIntervalTime that we've seen. This counter starts
+   * at 1 to avoid a real time colliding with the "null" value.
+   *
+   * PR_INTERVAL_MAX is set at 100,000 ticks per second. So the minimum
+   * time to wrap around is about 2^64/100000 seconds, i.e. about
+   * 5,849,424 years.
+   *
+   * When using a system clock, a value is system dependent.
+   */
+  TimeStampValue mValue;
+
+  friend class Fuzzyfox;
+};
+
+}  // namespace mozilla
+
+#endif /* mozilla_TimeStamp_h */
diff --git a/mozglue/misc/TimeStamp_darwin.cpp b/mozglue/misc/TimeStamp_darwin.cpp
new file mode 100644
index 0000000000..a0f0fb0681
--- /dev/null
+++ b/mozglue/misc/TimeStamp_darwin.cpp
@@ -0,0 +1,191 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//
+// Implement TimeStamp::Now() with mach_absolute_time
+//
+// The "tick" unit for mach_absolute_time is defined using mach_timebase_info()
+// which gives a conversion ratio to nanoseconds. For more information see
+// Apple's QA1398.
+//
+// This code is inspired by Chromium's time_mac.cc. The biggest
+// differences are that we explicitly initialize using
+// TimeStamp::Initialize() instead of lazily in Now() and that
+// we store the time value in ticks and convert when needed instead
+// of storing the time value in nanoseconds.
+
+#include <mach/mach_time.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Uptime.h"
+
+// Estimate of the smallest duration of time we can measure.
+static uint64_t sResolution;
+static uint64_t sResolutionSigDigs;
+
+static const uint64_t kNsPerMs = 1000000;
+static const uint64_t kUsPerSec = 1000000;
+static const double kNsPerMsd = 1000000.0;
+static const double kNsPerSecd = 1000000000.0;
+
+static bool gInitialized = false;
+static double sNsPerTick;
+
+static uint64_t ClockTime() {
+  // mach_absolute_time is it when it comes to ticks on the Mac.  Other calls
+  // with less precision (such as TickCount) just call through to
+  // mach_absolute_time.
+  //
+  // At the time of writing mach_absolute_time returns the number of nanoseconds
+  // since boot. This won't overflow 64bits for 500+ years so we aren't going
+  // to worry about that possiblity
+  return mach_absolute_time();
+}
+
+static uint64_t ClockResolutionNs() {
+  uint64_t start = ClockTime();
+  uint64_t end = ClockTime();
+  uint64_t minres = (end - start);
+
+  // 10 total trials is arbitrary: what we're trying to avoid by
+  // looping is getting unlucky and being interrupted by a context
+  // switch or signal, or being bitten by paging/cache effects
+  for (int i = 0; i < 9; ++i) {
+    start = ClockTime();
+    end = ClockTime();
+
+    uint64_t candidate = (start - end);
+    if (candidate < minres) {
+      minres = candidate;
+    }
+  }
+
+  if (0 == minres) {
+    // measurable resolution is either incredibly low, ~1ns, or very
+    // high.  fall back on NSPR's resolution assumption
+    minres = 1 * kNsPerMs;
+  }
+
+  return minres;
+}
+
+namespace mozilla {
+
+double BaseTimeDurationPlatformUtils::ToSeconds(int64_t aTicks) {
+  MOZ_ASSERT(gInitialized, "calling TimeDuration too early");
+  return (aTicks * sNsPerTick) / kNsPerSecd;
+}
+
+double BaseTimeDurationPlatformUtils::ToSecondsSigDigits(int64_t aTicks) {
+  MOZ_ASSERT(gInitialized, "calling TimeDuration too early");
+  // don't report a value < mResolution ...
+  int64_t valueSigDigs = sResolution * (aTicks / sResolution);
+  // and chop off insignificant digits
+  valueSigDigs = sResolutionSigDigs * (valueSigDigs / sResolutionSigDigs);
+  return (valueSigDigs * sNsPerTick) / kNsPerSecd;
+}
+
+int64_t BaseTimeDurationPlatformUtils::TicksFromMilliseconds(
+    double aMilliseconds) {
+  MOZ_ASSERT(gInitialized, "calling TimeDuration too early");
+  double result = (aMilliseconds * kNsPerMsd) / sNsPerTick;
+  if (result > double(INT64_MAX)) {
+    return INT64_MAX;
+  } else if (result < double(INT64_MIN)) {
+    return INT64_MIN;
+  }
+
+  return result;
+}
+
+int64_t BaseTimeDurationPlatformUtils::ResolutionInTicks() {
+  MOZ_ASSERT(gInitialized, "calling TimeDuration too early");
+  return static_cast<int64_t>(sResolution);
+}
+
+void TimeStamp::Startup() {
+  if (gInitialized) {
+    return;
+  }
+
+  mach_timebase_info_data_t timebaseInfo;
+  // Apple's QA1398 suggests that the output from mach_timebase_info
+  // will not change while a program is running, so it should be safe
+  // to cache the result.
+  kern_return_t kr = mach_timebase_info(&timebaseInfo);
+  if (kr != KERN_SUCCESS) {
+    MOZ_RELEASE_ASSERT(false, "mach_timebase_info failed");
+  }
+
+  sNsPerTick = double(timebaseInfo.numer) / timebaseInfo.denom;
+
+  sResolution = ClockResolutionNs();
+
+  // find the number of significant digits in sResolution, for the
+  // sake of ToSecondsSigDigits()
+  for (sResolutionSigDigs = 1; !(sResolutionSigDigs == sResolution ||
+                                 10 * sResolutionSigDigs > sResolution);
+       sResolutionSigDigs *= 10)
+    ;
+
+  gInitialized = true;
+
+  return;
+}
+
+void TimeStamp::Shutdown() {}
+
+TimeStamp TimeStamp::Now(bool aHighResolution) {
+  return TimeStamp::NowFuzzy(TimeStampValue(false, ClockTime()));
+}
+
+TimeStamp TimeStamp::NowUnfuzzed(bool aHighResolution) {
+  return TimeStamp(TimeStampValue(false, ClockTime()));
+}
+
+// Computes and returns the process uptime in microseconds.
+// Returns 0 if an error was encountered.
+uint64_t TimeStamp::ComputeProcessUptime() {
+  struct timeval tv;
+  int rv = gettimeofday(&tv, nullptr);
+
+  if (rv == -1) {
+    return 0;
+  }
+
+  int mib[] = {
+      CTL_KERN,
+      KERN_PROC,
+      KERN_PROC_PID,
+      getpid(),
+  };
+  u_int mibLen = sizeof(mib) / sizeof(mib[0]);
+
+  struct kinfo_proc proc;
+  size_t bufferSize = sizeof(proc);
+  rv = sysctl(mib, mibLen, &proc, &bufferSize, nullptr, 0);
+
+  if (rv == -1) {
+    return 0;
+  }
+
+  uint64_t startTime =
+      ((uint64_t)proc.kp_proc.p_un.__p_starttime.tv_sec * kUsPerSec) +
+      proc.kp_proc.p_un.__p_starttime.tv_usec;
+  uint64_t now = (tv.tv_sec * kUsPerSec) + tv.tv_usec;
+
+  if (startTime > now) {
+    return 0;
+  }
+
+  return now - startTime;
+}
+
+}  // namespace mozilla
diff --git a/mozglue/misc/TimeStamp_posix.cpp b/mozglue/misc/TimeStamp_posix.cpp
new file mode 100644
index 0000000000..b07f955505
--- /dev/null
+++ b/mozglue/misc/TimeStamp_posix.cpp
@@ -0,0 +1,336 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//
+// Implement TimeStamp::Now() with POSIX clocks.
+//
+// The "tick" unit for POSIX clocks is simply a nanosecond, as this is
+// the smallest unit of time representable by struct timespec.  That
+// doesn't mean that a nanosecond is the resolution of TimeDurations
+// obtained with this API; see TimeDuration::Resolution;
+//
+
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+#include <string.h>
+
+#if defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || \
+    defined(__OpenBSD__)
+#  include <sys/param.h>
+#  include <sys/sysctl.h>
+#endif
+
+#if defined(__DragonFly__) || defined(__FreeBSD__)
+#  include <sys/user.h>
+#endif
+
+#if defined(__NetBSD__)
+#  undef KERN_PROC
+#  define KERN_PROC KERN_PROC2
+#  define KINFO_PROC struct kinfo_proc2
+#else
+#  define KINFO_PROC struct kinfo_proc
+#endif
+
+#if defined(__DragonFly__)
+#  define KP_START_SEC kp_start.tv_sec
+#  define KP_START_USEC kp_start.tv_usec
+#elif defined(__FreeBSD__)
+#  define KP_START_SEC ki_start.tv_sec
+#  define KP_START_USEC ki_start.tv_usec
+#else
+#  define KP_START_SEC p_ustart_sec
+#  define KP_START_USEC p_ustart_usec
+#endif
+
+#include "mozilla/Sprintf.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Uptime.h"
+#include <pthread.h>
+
+// Estimate of the smallest duration of time we can measure.
+static uint64_t sResolution;
+static uint64_t sResolutionSigDigs;
+
+static const uint16_t kNsPerUs = 1000;
+static const uint64_t kNsPerMs = 1000000;
+static const uint64_t kNsPerSec = 1000000000;
+static const double kNsPerMsd = 1000000.0;
+static const double kNsPerSecd = 1000000000.0;
+
+static uint64_t TimespecToNs(const struct timespec& aTs) {
+  uint64_t baseNs = uint64_t(aTs.tv_sec) * kNsPerSec;
+  return baseNs + uint64_t(aTs.tv_nsec);
+}
+
+static uint64_t ClockTimeNs() {
+  struct timespec ts;
+  // this can't fail: we know &ts is valid, and TimeStamp::Startup()
+  // checks that CLOCK_MONOTONIC is supported (and aborts if not)
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+
+  // tv_sec is defined to be relative to an arbitrary point in time,
+  // but it would be madness for that point in time to be earlier than
+  // the Epoch.  So we can safely assume that even if time_t is 32
+  // bits, tv_sec won't overflow while the browser is open.  Revisit
+  // this argument if we're still building with 32-bit time_t around
+  // the year 2037.
+  return TimespecToNs(ts);
+}
+
+static uint64_t ClockResolutionNs() {
+  // NB: why not rely on clock_getres()?  Two reasons: (i) it might
+  // lie, and (ii) it might return an "ideal" resolution that while
+  // theoretically true, could never be measured in practice.  Since
+  // clock_gettime() likely involves a system call on your platform,
+  // the "actual" timing resolution shouldn't be lower than syscall
+  // overhead.
+
+  uint64_t start = ClockTimeNs();
+  uint64_t end = ClockTimeNs();
+  uint64_t minres = (end - start);
+
+  // 10 total trials is arbitrary: what we're trying to avoid by
+  // looping is getting unlucky and being interrupted by a context
+  // switch or signal, or being bitten by paging/cache effects
+  for (int i = 0; i < 9; ++i) {
+    start = ClockTimeNs();
+    end = ClockTimeNs();
+
+    uint64_t candidate = (start - end);
+    if (candidate < minres) {
+      minres = candidate;
+    }
+  }
+
+  if (0 == minres) {
+    // measurable resolution is either incredibly low, ~1ns, or very
+    // high.  fall back on clock_getres()
+    struct timespec ts;
+    if (0 == clock_getres(CLOCK_MONOTONIC, &ts)) {
+      minres = TimespecToNs(ts);
+    }
+  }
+
+  if (0 == minres) {
+    // clock_getres probably failed.  fall back on NSPR's resolution
+    // assumption
+    minres = 1 * kNsPerMs;
+  }
+
+  return minres;
+}
+
+namespace mozilla {
+
+double BaseTimeDurationPlatformUtils::ToSeconds(int64_t aTicks) {
+  return double(aTicks) / kNsPerSecd;
+}
+
+double BaseTimeDurationPlatformUtils::ToSecondsSigDigits(int64_t aTicks) {
+  // don't report a value < mResolution ...
+  int64_t valueSigDigs = sResolution * (aTicks / sResolution);
+  // and chop off insignificant digits
+  valueSigDigs = sResolutionSigDigs * (valueSigDigs / sResolutionSigDigs);
+  return double(valueSigDigs) / kNsPerSecd;
+}
+
+int64_t BaseTimeDurationPlatformUtils::TicksFromMilliseconds(
+    double aMilliseconds) {
+  double result = aMilliseconds * kNsPerMsd;
+  if (result > double(INT64_MAX)) {
+    return INT64_MAX;
+  }
+  if (result < INT64_MIN) {
+    return INT64_MIN;
+  }
+
+  return result;
+}
+
+int64_t BaseTimeDurationPlatformUtils::ResolutionInTicks() {
+  return static_cast<int64_t>(sResolution);
+}
+
+static bool gInitialized = false;
+
+void TimeStamp::Startup() {
+  if (gInitialized) {
+    return;
+  }
+
+  struct timespec dummy;
+  if (clock_gettime(CLOCK_MONOTONIC, &dummy) != 0) {
+    MOZ_CRASH("CLOCK_MONOTONIC is absent!");
+  }
+
+  sResolution = ClockResolutionNs();
+
+  // find the number of significant digits in sResolution, for the
+  // sake of ToSecondsSigDigits()
+  for (sResolutionSigDigs = 1; !(sResolutionSigDigs == sResolution ||
+                                 10 * sResolutionSigDigs > sResolution);
+       sResolutionSigDigs *= 10)
+    ;
+
+  gInitialized = true;
+}
+
+void TimeStamp::Shutdown() {}
+
+TimeStamp TimeStamp::Now(bool aHighResolution) {
+  return TimeStamp::NowFuzzy(TimeStampValue(false, ClockTimeNs()));
+}
+
+TimeStamp TimeStamp::NowUnfuzzed(bool aHighResolution) {
+  return TimeStamp(TimeStampValue(false, ClockTimeNs()));
+}
+
+#if defined(XP_LINUX) || defined(ANDROID)
+
+// Calculates the amount of jiffies that have elapsed since boot and up to the
+// starttime value of a specific process as found in its /proc/*/stat file.
+// Returns 0 if an error occurred.
+
+static uint64_t JiffiesSinceBoot(const char* aFile) {
+  char stat[512];
+
+  FILE* f = fopen(aFile, "r");
+  if (!f) {
+    return 0;
+  }
+
+  int n = fread(&stat, 1, sizeof(stat) - 1, f);
+
+  fclose(f);
+
+  if (n <= 0) {
+    return 0;
+  }
+
+  stat[n] = 0;
+
+  long long unsigned startTime = 0;  // instead of uint64_t to keep GCC quiet
+  char* s = strrchr(stat, ')');
+
+  if (!s) {
+    return 0;
+  }
+
+  int rv = sscanf(s + 2,
+                  "%*c %*d %*d %*d %*d %*d %*u %*u %*u %*u "
+                  "%*u %*u %*u %*d %*d %*d %*d %*d %*d %llu",
+                  &startTime);
+
+  if (rv != 1 || !startTime) {
+    return 0;
+  }
+
+  return startTime;
+}
+
+// Computes the interval that has elapsed between the thread creation and the
+// process creation by comparing the starttime fields in the respective
+// /proc/*/stat files. The resulting value will be a good approximation of the
+// process uptime. This value will be stored at the address pointed by aTime;
+// if an error occurred 0 will be stored instead.
+
+static void* ComputeProcessUptimeThread(void* aTime) {
+  uint64_t* uptime = static_cast<uint64_t*>(aTime);
+  long hz = sysconf(_SC_CLK_TCK);
+
+  *uptime = 0;
+
+  if (!hz) {
+    return nullptr;
+  }
+
+  char threadStat[40];
+  SprintfLiteral(threadStat, "/proc/self/task/%d/stat",
+                 (pid_t)syscall(__NR_gettid));
+
+  uint64_t threadJiffies = JiffiesSinceBoot(threadStat);
+  uint64_t selfJiffies = JiffiesSinceBoot("/proc/self/stat");
+
+  if (!threadJiffies || !selfJiffies) {
+    return nullptr;
+  }
+
+  *uptime = ((threadJiffies - selfJiffies) * kNsPerSec) / hz;
+  return nullptr;
+}
+
+// Computes and returns the process uptime in us on Linux & its derivatives.
+// Returns 0 if an error was encountered.
+
+uint64_t TimeStamp::ComputeProcessUptime() {
+  uint64_t uptime = 0;
+  pthread_t uptime_pthread;
+
+  if (pthread_create(&uptime_pthread, nullptr, ComputeProcessUptimeThread,
+                     &uptime)) {
+    MOZ_CRASH("Failed to create process uptime thread.");
+    return 0;
+  }
+
+  pthread_join(uptime_pthread, NULL);
+
+  return uptime / kNsPerUs;
+}
+
+#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || \
+    defined(__OpenBSD__)
+
+// Computes and returns the process uptime in us on various BSD flavors.
+// Returns 0 if an error was encountered.
+
+uint64_t TimeStamp::ComputeProcessUptime() {
+  struct timespec ts;
+  int rv = clock_gettime(CLOCK_REALTIME, &ts);
+
+  if (rv == -1) {
+    return 0;
+  }
+
+  int mib[] = {
+    CTL_KERN,
+    KERN_PROC,
+    KERN_PROC_PID,
+    getpid(),
+#  if defined(__NetBSD__) || defined(__OpenBSD__)
+    sizeof(KINFO_PROC),
+    1,
+#  endif
+  };
+  u_int mibLen = sizeof(mib) / sizeof(mib[0]);
+
+  KINFO_PROC proc;
+  size_t bufferSize = sizeof(proc);
+  rv = sysctl(mib, mibLen, &proc, &bufferSize, nullptr, 0);
+
+  if (rv == -1) {
+    return 0;
+  }
+
+  uint64_t startTime = ((uint64_t)proc.KP_START_SEC * kNsPerSec) +
+                       (proc.KP_START_USEC * kNsPerUs);
+  uint64_t now = ((uint64_t)ts.tv_sec * kNsPerSec) + ts.tv_nsec;
+
+  if (startTime > now) {
+    return 0;
+  }
+
+  return (now - startTime) / kNsPerUs;
+}
+
+#else
+
+uint64_t TimeStamp::ComputeProcessUptime() { return 0; }
+
+#endif
+
+}  // namespace mozilla
diff --git a/mozglue/misc/TimeStamp_windows.cpp b/mozglue/misc/TimeStamp_windows.cpp
new file mode 100644
index 0000000000..e4739ced29
--- /dev/null
+++ b/mozglue/misc/TimeStamp_windows.cpp
@@ -0,0 +1,535 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Implement TimeStamp::Now() with QueryPerformanceCounter() controlled with
+// values of GetTickCount64().
+
+#include "mozilla/DynamicallyLinkedFunctionPtr.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Uptime.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <intrin.h>
+#include <windows.h>
+
+// To enable logging define to your favorite logging API
+#define LOG(x)
+
+class AutoCriticalSection {
+ public:
+  explicit AutoCriticalSection(LPCRITICAL_SECTION aSection)
+      : mSection(aSection) {
+    ::EnterCriticalSection(mSection);
+  }
+  ~AutoCriticalSection() { ::LeaveCriticalSection(mSection); }
+
+ private:
+  LPCRITICAL_SECTION mSection;
+};
+
+// Estimate of the smallest duration of time we can measure.
+static volatile ULONGLONG sResolution;
+static volatile ULONGLONG sResolutionSigDigs;
+static const double kNsPerSecd = 1000000000.0;
+static const LONGLONG kNsPerMillisec = 1000000;
+
+// ----------------------------------------------------------------------------
+// Global constants
+// ----------------------------------------------------------------------------
+
+// Tolerance to failures settings.
+//
+// What is the interval we want to have failure free.
+// in [ms]
+static const uint32_t kFailureFreeInterval = 5000;
+// How many failures we are willing to tolerate in the interval.
+static const uint32_t kMaxFailuresPerInterval = 4;
+// What is the threshold to treat fluctuations as actual failures.
+// in [ms]
+static const uint32_t kFailureThreshold = 50;
+
+// If we are not able to get the value of GTC time increment, use this value
+// which is the most usual increment.
+static const DWORD kDefaultTimeIncrement = 156001;
+
+// ----------------------------------------------------------------------------
+// Global variables, not changing at runtime
+// ----------------------------------------------------------------------------
+
+// Result of QueryPerformanceFrequency
+// We use default of 1 for the case we can't use QueryPerformanceCounter
+// to make mt/ms conversions work despite that.
+static uint64_t sFrequencyPerSec = 1;
+
+namespace mozilla {
+
+MFBT_API uint64_t GetQueryPerformanceFrequencyPerSec() {
+  return sFrequencyPerSec;
+}
+
+}  // namespace mozilla
+
+// How much we are tolerant to GTC occasional loose of resoltion.
+// This number says how many multiples of the minimal GTC resolution
+// detected on the system are acceptable.  This number is empirical.
+static const LONGLONG kGTCTickLeapTolerance = 4;
+
+// Base tolerance (more: "inability of detection" range) threshold is calculated
+// dynamically, and kept in sGTCResolutionThreshold.
+//
+// Schematically, QPC worked "100%" correctly if ((GTC_now - GTC_epoch) -
+// (QPC_now - QPC_epoch)) was in  [-sGTCResolutionThreshold,
+// sGTCResolutionThreshold] interval every time we'd compared two time stamps.
+// If not, then we check the overflow behind this basic threshold
+// is in kFailureThreshold.  If not, we condider it as a QPC failure.  If too
+// many failures in short time are detected, QPC is considered faulty and
+// disabled.
+//
+// Kept in [mt]
+static LONGLONG sGTCResolutionThreshold;
+
+// If QPC is found faulty for two stamps in this interval, we engage
+// the fault detection algorithm.  For duration larger then this limit
+// we bypass using durations calculated from QPC when jitter is detected,
+// but don't touch the sUseQPC flag.
+//
+// Value is in [ms].
+static const uint32_t kHardFailureLimit = 2000;
+// Conversion to [mt]
+static LONGLONG sHardFailureLimit;
+
+// Conversion of kFailureFreeInterval and kFailureThreshold to [mt]
+static LONGLONG sFailureFreeInterval;
+static LONGLONG sFailureThreshold;
+
+// ----------------------------------------------------------------------------
+// Systemm status flags
+// ----------------------------------------------------------------------------
+
+// Flag for stable TSC that indicates platform where QPC is stable.
+static bool sHasStableTSC = false;
+
+// ----------------------------------------------------------------------------
+// Global state variables, changing at runtime
+// ----------------------------------------------------------------------------
+
+// Initially true, set to false when QPC is found unstable and never
+// returns back to true since that time.
+static bool volatile sUseQPC = true;
+
+// ----------------------------------------------------------------------------
+// Global lock
+// ----------------------------------------------------------------------------
+
+// Thread spin count before entering the full wait state for sTimeStampLock.
+// Inspired by Rob Arnold's work on PRMJ_Now().
+static const DWORD kLockSpinCount = 4096;
+
+// Common mutex (thanks the relative complexity of the logic, this is better
+// then using CMPXCHG8B.)
+// It is protecting the globals bellow.
+static CRITICAL_SECTION sTimeStampLock;
+
+// ----------------------------------------------------------------------------
+// Global lock protected variables
+// ----------------------------------------------------------------------------
+
+// Timestamp in future until QPC must behave correctly.
+// Set to now + kFailureFreeInterval on first QPC failure detection.
+// Set to now + E * kFailureFreeInterval on following errors,
+//   where E is number of errors detected during last kFailureFreeInterval
+//   milliseconds, calculated simply as:
+//   E = (sFaultIntoleranceCheckpoint - now) / kFailureFreeInterval + 1.
+// When E > kMaxFailuresPerInterval -> disable QPC.
+//
+// Kept in [mt]
+static ULONGLONG sFaultIntoleranceCheckpoint = 0;
+
+namespace mozilla {
+
+// Result is in [mt]
+static inline ULONGLONG PerformanceCounter() {
+  LARGE_INTEGER pc;
+  ::QueryPerformanceCounter(&pc);
+
+  // QueryPerformanceCounter may slightly jitter (not be 100% monotonic.)
+  // This is a simple go-backward protection for such a faulty hardware.
+  AutoCriticalSection lock(&sTimeStampLock);
+
+  static decltype(LARGE_INTEGER::QuadPart) last;
+  if (last > pc.QuadPart) {
+    return last * 1000ULL;
+  }
+  last = pc.QuadPart;
+  return pc.QuadPart * 1000ULL;
+}
+
+static void InitThresholds() {
+  DWORD timeAdjustment = 0, timeIncrement = 0;
+  BOOL timeAdjustmentDisabled;
+  GetSystemTimeAdjustment(&timeAdjustment, &timeIncrement,
+                          &timeAdjustmentDisabled);
+
+  LOG(("TimeStamp: timeIncrement=%d [100ns]", timeIncrement));
+
+  if (!timeIncrement) {
+    timeIncrement = kDefaultTimeIncrement;
+  }
+
+  // Ceiling to a millisecond
+  // Example values: 156001, 210000
+  DWORD timeIncrementCeil = timeIncrement;
+  // Don't want to round up if already rounded, values will be: 156000, 209999
+  timeIncrementCeil -= 1;
+  // Convert to ms, values will be: 15, 20
+  timeIncrementCeil /= 10000;
+  // Round up, values will be: 16, 21
+  timeIncrementCeil += 1;
+  // Convert back to 100ns, values will be: 160000, 210000
+  timeIncrementCeil *= 10000;
+
+  // How many milli-ticks has the interval rounded up
+  LONGLONG ticksPerGetTickCountResolutionCeiling =
+      (int64_t(timeIncrementCeil) * sFrequencyPerSec) / 10000LL;
+
+  // GTC may jump by 32 (2*16) ms in two steps, therefor use the ceiling value.
+  sGTCResolutionThreshold =
+      LONGLONG(kGTCTickLeapTolerance * ticksPerGetTickCountResolutionCeiling);
+
+  sHardFailureLimit = ms2mt(kHardFailureLimit);
+  sFailureFreeInterval = ms2mt(kFailureFreeInterval);
+  sFailureThreshold = ms2mt(kFailureThreshold);
+}
+
+static void InitResolution() {
+  // 10 total trials is arbitrary: what we're trying to avoid by
+  // looping is getting unlucky and being interrupted by a context
+  // switch or signal, or being bitten by paging/cache effects
+
+  ULONGLONG minres = ~0ULL;
+  if (sUseQPC) {
+    int loops = 10;
+    do {
+      ULONGLONG start = PerformanceCounter();
+      ULONGLONG end = PerformanceCounter();
+
+      ULONGLONG candidate = (end - start);
+      if (candidate < minres) {
+        minres = candidate;
+      }
+    } while (--loops && minres);
+
+    if (0 == minres) {
+      minres = 1;
+    }
+  } else {
+    // GetTickCount has only ~16ms known resolution
+    minres = ms2mt(16);
+  }
+
+  // Converting minres that is in [mt] to nanosecods, multiplicating
+  // the argument to preserve resolution.
+  ULONGLONG result = mt2ms(minres * kNsPerMillisec);
+  if (0 == result) {
+    result = 1;
+  }
+
+  sResolution = result;
+
+  // find the number of significant digits in mResolution, for the
+  // sake of ToSecondsSigDigits()
+  ULONGLONG sigDigs;
+  for (sigDigs = 1; !(sigDigs == result || 10 * sigDigs > result);
+       sigDigs *= 10)
+    ;
+
+  sResolutionSigDigs = sigDigs;
+}
+
+// ----------------------------------------------------------------------------
+// TimeStampValue implementation
+// ----------------------------------------------------------------------------
+MFBT_API
+TimeStampValue::TimeStampValue(ULONGLONG aGTC, ULONGLONG aQPC, bool aHasQPC,
+                               bool aUsedCanonicalNow)
+    : mGTC(aGTC),
+      mQPC(aQPC),
+      mUsedCanonicalNow(aUsedCanonicalNow),
+      mHasQPC(aHasQPC) {
+  mIsNull = aGTC == 0 && aQPC == 0;
+}
+
+MFBT_API TimeStampValue& TimeStampValue::operator+=(const int64_t aOther) {
+  mGTC += aOther;
+  mQPC += aOther;
+  return *this;
+}
+
+MFBT_API TimeStampValue& TimeStampValue::operator-=(const int64_t aOther) {
+  mGTC -= aOther;
+  mQPC -= aOther;
+  return *this;
+}
+
+// If the duration is less then two seconds, perform check of QPC stability
+// by comparing both GTC and QPC calculated durations of this and aOther.
+MFBT_API uint64_t TimeStampValue::CheckQPC(const TimeStampValue& aOther) const {
+  uint64_t deltaGTC = mGTC - aOther.mGTC;
+
+  if (!mHasQPC || !aOther.mHasQPC) {  // Both not holding QPC
+    return deltaGTC;
+  }
+
+  uint64_t deltaQPC = mQPC - aOther.mQPC;
+
+  if (sHasStableTSC) {  // For stable TSC there is no need to check
+    return deltaQPC;
+  }
+
+  // Check QPC is sane before using it.
+  int64_t diff = DeprecatedAbs(int64_t(deltaQPC) - int64_t(deltaGTC));
+  if (diff <= sGTCResolutionThreshold) {
+    return deltaQPC;
+  }
+
+  // Treat absolutely for calibration purposes
+  int64_t duration = DeprecatedAbs(int64_t(deltaGTC));
+  int64_t overflow = diff - sGTCResolutionThreshold;
+
+  LOG(("TimeStamp: QPC check after %llums with overflow %1.4fms",
+       mt2ms(duration), mt2ms_f(overflow)));
+
+  if (overflow <= sFailureThreshold) {  // We are in the limit, let go.
+    return deltaQPC;
+  }
+
+  // QPC deviates, don't use it, since now this method may only return deltaGTC.
+
+  if (!sUseQPC) {  // QPC already disabled, no need to run the fault tolerance
+                   // algorithm.
+    return deltaGTC;
+  }
+
+  LOG(("TimeStamp: QPC jittered over failure threshold"));
+
+  if (duration < sHardFailureLimit) {
+    // Interval between the two time stamps is very short, consider
+    // QPC as unstable and record a failure.
+    uint64_t now = ms2mt(GetTickCount64());
+
+    AutoCriticalSection lock(&sTimeStampLock);
+
+    if (sFaultIntoleranceCheckpoint && sFaultIntoleranceCheckpoint > now) {
+      // There's already been an error in the last fault intollerant interval.
+      // Time since now to the checkpoint actually holds information on how many
+      // failures there were in the failure free interval we have defined.
+      uint64_t failureCount =
+          (sFaultIntoleranceCheckpoint - now + sFailureFreeInterval - 1) /
+          sFailureFreeInterval;
+      if (failureCount > kMaxFailuresPerInterval) {
+        sUseQPC = false;
+        LOG(("TimeStamp: QPC disabled"));
+      } else {
+        // Move the fault intolerance checkpoint more to the future, prolong it
+        // to reflect the number of detected failures.
+        ++failureCount;
+        sFaultIntoleranceCheckpoint = now + failureCount * sFailureFreeInterval;
+        LOG(("TimeStamp: recording %dth QPC failure", failureCount));
+      }
+    } else {
+      // Setup fault intolerance checkpoint in the future for first detected
+      // error.
+      sFaultIntoleranceCheckpoint = now + sFailureFreeInterval;
+      LOG(("TimeStamp: recording 1st QPC failure"));
+    }
+  }
+
+  return deltaGTC;
+}
+
+MFBT_API uint64_t
+TimeStampValue::operator-(const TimeStampValue& aOther) const {
+  if (IsNull() && aOther.IsNull()) {
+    return uint64_t(0);
+  }
+
+  return CheckQPC(aOther);
+}
+
+// ----------------------------------------------------------------------------
+// TimeDuration and TimeStamp implementation
+// ----------------------------------------------------------------------------
+
+MFBT_API double BaseTimeDurationPlatformUtils::ToSeconds(int64_t aTicks) {
+  // Converting before arithmetic avoids blocked store forward
+  return double(aTicks) / (double(sFrequencyPerSec) * 1000.0);
+}
+
+MFBT_API double BaseTimeDurationPlatformUtils::ToSecondsSigDigits(
+    int64_t aTicks) {
+  // don't report a value < mResolution ...
+  LONGLONG resolution = sResolution;
+  LONGLONG resolutionSigDigs = sResolutionSigDigs;
+  LONGLONG valueSigDigs = resolution * (aTicks / resolution);
+  // and chop off insignificant digits
+  valueSigDigs = resolutionSigDigs * (valueSigDigs / resolutionSigDigs);
+  return double(valueSigDigs) / kNsPerSecd;
+}
+
+MFBT_API int64_t
+BaseTimeDurationPlatformUtils::TicksFromMilliseconds(double aMilliseconds) {
+  double result = ms2mt(aMilliseconds);
+  if (result > double(INT64_MAX)) {
+    return INT64_MAX;
+  } else if (result < double(INT64_MIN)) {
+    return INT64_MIN;
+  }
+
+  return result;
+}
+
+MFBT_API int64_t BaseTimeDurationPlatformUtils::ResolutionInTicks() {
+  return static_cast<int64_t>(sResolution);
+}
+
+static bool HasStableTSC() {
+#if defined(_M_ARM64)
+  // AArch64 defines that its system counter run at a constant rate
+  // regardless of the current clock frequency of the system.  See "The
+  // Generic Timer", section D7, in the ARMARM for ARMv8.
+  return true;
+#else
+  union {
+    int regs[4];
+    struct {
+      int nIds;
+      char cpuString[12];
+    };
+  } cpuInfo;
+
+  __cpuid(cpuInfo.regs, 0);
+  // Only allow Intel or AMD CPUs for now.
+  // The order of the registers is reg[1], reg[3], reg[2].  We just adjust the
+  // string so that we can compare in one go.
+  if (_strnicmp(cpuInfo.cpuString, "GenuntelineI", sizeof(cpuInfo.cpuString)) &&
+      _strnicmp(cpuInfo.cpuString, "AuthcAMDenti", sizeof(cpuInfo.cpuString))) {
+    return false;
+  }
+
+  int regs[4];
+
+  // detect if the Advanced Power Management feature is supported
+  __cpuid(regs, 0x80000000);
+  if ((unsigned int)regs[0] < 0x80000007) {
+    // XXX should we return true here?  If there is no APM there may be
+    // no way how TSC can run out of sync among cores.
+    return false;
+  }
+
+  __cpuid(regs, 0x80000007);
+  // if bit 8 is set than TSC will run at a constant rate
+  // in all ACPI P-states, C-states and T-states
+  return regs[3] & (1 << 8);
+#endif
+}
+
+static bool gInitialized = false;
+
+MFBT_API void TimeStamp::Startup() {
+  if (gInitialized) {
+    return;
+  }
+
+  gInitialized = true;
+
+  // Decide which implementation to use for the high-performance timer.
+
+  InitializeCriticalSectionAndSpinCount(&sTimeStampLock, kLockSpinCount);
+
+  bool forceGTC = false;
+  bool forceQPC = false;
+
+  char* modevar = getenv("MOZ_TIMESTAMP_MODE");
+  if (modevar) {
+    if (!strcmp(modevar, "QPC")) {
+      forceQPC = true;
+    } else if (!strcmp(modevar, "GTC")) {
+      forceGTC = true;
+    }
+  }
+
+  LARGE_INTEGER freq;
+  sUseQPC = !forceGTC && ::QueryPerformanceFrequency(&freq);
+  if (!sUseQPC) {
+    // No Performance Counter.  Fall back to use GetTickCount64.
+    InitResolution();
+
+    LOG(("TimeStamp: using GetTickCount64"));
+    return;
+  }
+
+  sHasStableTSC = forceQPC || HasStableTSC();
+  LOG(("TimeStamp: HasStableTSC=%d", sHasStableTSC));
+
+  sFrequencyPerSec = freq.QuadPart;
+  LOG(("TimeStamp: QPC frequency=%llu", sFrequencyPerSec));
+
+  InitThresholds();
+  InitResolution();
+
+  return;
+}
+
+MFBT_API void TimeStamp::Shutdown() { DeleteCriticalSection(&sTimeStampLock); }
+
+TimeStampValue NowInternal(bool aHighResolution) {
+  // sUseQPC is volatile
+  bool useQPC = (aHighResolution && sUseQPC);
+
+  // Both values are in [mt] units.
+  ULONGLONG QPC = useQPC ? PerformanceCounter() : uint64_t(0);
+  ULONGLONG GTC = ms2mt(GetTickCount64());
+  return TimeStampValue(GTC, QPC, useQPC, false);
+}
+
+MFBT_API TimeStamp TimeStamp::Now(bool aHighResolution) {
+  return TimeStamp::NowFuzzy(NowInternal(aHighResolution));
+}
+
+MFBT_API TimeStamp TimeStamp::NowUnfuzzed(bool aHighResolution) {
+  return TimeStamp(NowInternal(aHighResolution));
+}
+
+// Computes and returns the process uptime in microseconds.
+// Returns 0 if an error was encountered.
+
+MFBT_API uint64_t TimeStamp::ComputeProcessUptime() {
+  FILETIME start, foo, bar, baz;
+  bool success = GetProcessTimes(GetCurrentProcess(), &start, &foo, &bar, &baz);
+  if (!success) {
+    return 0;
+  }
+
+  static const StaticDynamicallyLinkedFunctionPtr<void(WINAPI*)(LPFILETIME)>
+      pGetSystemTimePreciseAsFileTime(L"kernel32.dll",
+                                      "GetSystemTimePreciseAsFileTime");
+
+  FILETIME now;
+  if (pGetSystemTimePreciseAsFileTime) {
+    pGetSystemTimePreciseAsFileTime(&now);
+  } else {
+    GetSystemTimeAsFileTime(&now);
+  }
+
+  ULARGE_INTEGER startUsec = {{start.dwLowDateTime, start.dwHighDateTime}};
+  ULARGE_INTEGER nowUsec = {{now.dwLowDateTime, now.dwHighDateTime}};
+
+  return (nowUsec.QuadPart - startUsec.QuadPart) / 10ULL;
+}
+
+}  // namespace mozilla
diff --git a/mozglue/misc/TimeStamp_windows.h b/mozglue/misc/TimeStamp_windows.h
new file mode 100644
index 0000000000..1953aca62b
--- /dev/null
+++ b/mozglue/misc/TimeStamp_windows.h
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_TimeStamp_windows_h
+#define mozilla_TimeStamp_windows_h
+
+#include "mozilla/Types.h"
+
+namespace mozilla {
+
+/**
+ * The [mt] unit:
+ *
+ * Many values are kept in ticks of the Performance Counter x 1000,
+ * further just referred as [mt], meaning milli-ticks.
+ *
+ * This is needed to preserve maximum precision of the performance frequency
+ * representation.  GetTickCount64 values in milliseconds are multiplied with
+ * frequency per second.  Therefore we need to multiply QPC value by 1000 to
+ * have the same units to allow simple arithmentic with both QPC and GTC.
+ */
+#define ms2mt(x) ((x)*mozilla::GetQueryPerformanceFrequencyPerSec())
+#define mt2ms(x) ((x) / mozilla::GetQueryPerformanceFrequencyPerSec())
+#define mt2ms_f(x) (double(x) / mozilla::GetQueryPerformanceFrequencyPerSec())
+
+MFBT_API uint64_t GetQueryPerformanceFrequencyPerSec();
+
+class TimeStamp;
+class TimeStampValue;
+
+TimeStampValue NowInternal(bool aHighResolution);
+
+class TimeStampValue {
+  friend TimeStampValue NowInternal(bool);
+  friend bool IsCanonicalTimeStamp(TimeStampValue);
+  friend struct IPC::ParamTraits<mozilla::TimeStampValue>;
+  friend class TimeStamp;
+  friend class Fuzzyfox;
+
+  // Both QPC and GTC are kept in [mt] units.
+  uint64_t mGTC;
+  uint64_t mQPC;
+
+  bool mUsedCanonicalNow;
+  bool mIsNull;
+  bool mHasQPC;
+
+  MFBT_API TimeStampValue(uint64_t aGTC, uint64_t aQPC, bool aHasQPC,
+                          bool aUsedCanonicalNow);
+
+  MFBT_API uint64_t CheckQPC(const TimeStampValue& aOther) const;
+
+  constexpr MOZ_IMPLICIT TimeStampValue()
+      : mGTC(0),
+        mQPC(0),
+        mUsedCanonicalNow(false),
+        mIsNull(true),
+        mHasQPC(false) {}
+
+ public:
+  MFBT_API uint64_t operator-(const TimeStampValue& aOther) const;
+
+  TimeStampValue operator+(const int64_t aOther) const {
+    return TimeStampValue(mGTC + aOther, mQPC + aOther, mHasQPC,
+                          mUsedCanonicalNow);
+  }
+  TimeStampValue operator-(const int64_t aOther) const {
+    return TimeStampValue(mGTC - aOther, mQPC - aOther, mHasQPC,
+                          mUsedCanonicalNow);
+  }
+  MFBT_API TimeStampValue& operator+=(const int64_t aOther);
+  MFBT_API TimeStampValue& operator-=(const int64_t aOther);
+
+  bool operator<(const TimeStampValue& aOther) const {
+    return int64_t(*this - aOther) < 0;
+  }
+  bool operator>(const TimeStampValue& aOther) const {
+    return int64_t(*this - aOther) > 0;
+  }
+  bool operator<=(const TimeStampValue& aOther) const {
+    return int64_t(*this - aOther) <= 0;
+  }
+  bool operator>=(const TimeStampValue& aOther) const {
+    return int64_t(*this - aOther) >= 0;
+  }
+  bool operator==(const TimeStampValue& aOther) const {
+    return int64_t(*this - aOther) == 0;
+  }
+  bool operator!=(const TimeStampValue& aOther) const {
+    return int64_t(*this - aOther) != 0;
+  }
+  bool UsedCanonicalNow() const { return mUsedCanonicalNow; }
+  void SetCanonicalNow() { mUsedCanonicalNow = true; }
+  bool IsNull() const { return mIsNull; }
+};
+
+}  // namespace mozilla
+
+#endif /* mozilla_TimeStamp_h */
diff --git a/mozglue/misc/Uptime.cpp b/mozglue/misc/Uptime.cpp
new file mode 100644
index 0000000000..bded4017ec
--- /dev/null
+++ b/mozglue/misc/Uptime.cpp
@@ -0,0 +1,150 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Uptime.h"
+
+#ifdef XP_WIN
+#  include "mozilla/DynamicallyLinkedFunctionPtr.h"
+#endif  // XP_WIN
+
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Assertions.h"
+
+using namespace mozilla;
+
+namespace {
+
+Maybe<uint64_t> NowIncludingSuspendMs();
+Maybe<uint64_t> NowExcludingSuspendMs();
+static Maybe<uint64_t> mStartExcludingSuspendMs;
+static Maybe<uint64_t> mStartIncludingSuspendMs;
+
+// Apple things
+#if defined(__APPLE__) && defined(__MACH__)
+#  include <time.h>
+#  include <sys/time.h>
+#  include <sys/types.h>
+#  include <mach/mach_time.h>
+
+const uint64_t kNSperMS = 1000000;
+
+Maybe<uint64_t> NowExcludingSuspendMs() {
+  return Some(clock_gettime_nsec_np(CLOCK_UPTIME_RAW) / kNSperMS);
+}
+
+Maybe<uint64_t> NowIncludingSuspendMs() {
+  return Some(clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW) / kNSperMS);
+}
+
+#endif  // macOS
+
+#if defined(XP_WIN)
+
+// Number of hundreds of nanoseconds in a millisecond
+static constexpr uint64_t kHNSperMS = 10000;
+
+Maybe<uint64_t> NowExcludingSuspendMs() {
+  ULONGLONG interrupt_time;
+  if (!QueryUnbiasedInterruptTime(&interrupt_time)) {
+    return Nothing();
+  }
+  return Some(interrupt_time / kHNSperMS);
+}
+
+Maybe<uint64_t> NowIncludingSuspendMs() {
+  static const mozilla::StaticDynamicallyLinkedFunctionPtr<void(WINAPI*)(
+      PULONGLONG)>
+      pQueryInterruptTime(L"KernelBase.dll", "QueryInterruptTime");
+  if (!pQueryInterruptTime) {
+    // On Windows, this does include the time the computer was suspended so it's
+    // an adequate fallback.
+    TimeStamp processCreation = TimeStamp::ProcessCreation();
+    TimeStamp now = TimeStamp::Now();
+    if (!processCreation.IsNull() && !now.IsNull()) {
+      return Some(uint64_t((now - processCreation).ToMilliseconds()));
+    } else {
+      return Nothing();
+    }
+  }
+  ULONGLONG interrupt_time;
+  pQueryInterruptTime(&interrupt_time);
+  return Some(interrupt_time / kHNSperMS);
+}
+#endif  // XP_WIN
+
+#if defined(XP_LINUX)  // including Android
+#  include <time.h>
+
+// Number of nanoseconds in a millisecond.
+static constexpr uint64_t kNSperMS = 1000000;
+
+uint64_t TimespecToMilliseconds(struct timespec aTs) {
+  return aTs.tv_sec * 1000 + aTs.tv_nsec / kNSperMS;
+}
+
+Maybe<uint64_t> NowExcludingSuspendMs() {
+  struct timespec ts = {0};
+
+  if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+    return Nothing();
+  }
+  return Some(TimespecToMilliseconds(ts));
+}
+
+Maybe<uint64_t> NowIncludingSuspendMs() {
+#  ifndef CLOCK_BOOTTIME
+  return Nothing();
+#  else
+  struct timespec ts = {0};
+
+  if (clock_gettime(CLOCK_BOOTTIME, &ts)) {
+    return Nothing();
+  }
+  return Some(TimespecToMilliseconds(ts));
+#  endif
+}
+
+#endif  // XP_LINUX
+
+};  // anonymous namespace
+
+namespace mozilla {
+
+void InitializeUptime() {
+  MOZ_RELEASE_ASSERT(mStartIncludingSuspendMs.isNothing() &&
+                         mStartExcludingSuspendMs.isNothing(),
+                     "Must not be called more than once");
+  mStartIncludingSuspendMs = NowIncludingSuspendMs();
+  mStartExcludingSuspendMs = NowExcludingSuspendMs();
+}
+
+Maybe<uint64_t> ProcessUptimeMs() {
+  if (!mStartIncludingSuspendMs) {
+    return Nothing();
+  }
+  Maybe<uint64_t> maybeNow = NowIncludingSuspendMs();
+  if (!maybeNow) {
+    return Nothing();
+  }
+  return Some(maybeNow.value() - mStartIncludingSuspendMs.value());
+}
+
+Maybe<uint64_t> ProcessUptimeExcludingSuspendMs() {
+  if (!mStartExcludingSuspendMs) {
+    return Nothing();
+  }
+  Maybe<uint64_t> maybeNow = NowExcludingSuspendMs();
+  if (!maybeNow) {
+    return Nothing();
+  }
+  return Some(maybeNow.value() - mStartExcludingSuspendMs.value());
+}
+
+};  // namespace mozilla
diff --git a/mozglue/misc/Uptime.h b/mozglue/misc/Uptime.h
new file mode 100644
index 0000000000..4438e0d6d1
--- /dev/null
+++ b/mozglue/misc/Uptime.h
@@ -0,0 +1,26 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_Uptime_h
+#define mozilla_Uptime_h
+
+#include <stdint.h>
+
+#include "mozilla/Maybe.h"
+
+namespace mozilla {
+
+// Called at the beginning of the process from TimeStamp::Startup.
+MFBT_API void InitializeUptime();
+// Returns the number of milliseconds the calling process has lived for.
+MFBT_API Maybe<uint64_t> ProcessUptimeMs();
+// Returns the number of milliseconds the calling process has lived for,
+// excluding the time period the system was suspended.
+MFBT_API Maybe<uint64_t> ProcessUptimeExcludingSuspendMs();
+
+};  // namespace mozilla
+
+#endif  // mozilla_Uptime_h
diff --git a/mozglue/misc/WinUtils.h b/mozglue/misc/WinUtils.h
new file mode 100644
index 0000000000..2291a352a5
--- /dev/null
+++ b/mozglue/misc/WinUtils.h
@@ -0,0 +1,140 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_glue_MozglueUtils_h
+#define mozilla_glue_MozglueUtils_h
+
+#include <windows.h>
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Attributes.h"
+
+namespace mozilla {
+namespace glue {
+
+#ifdef DEBUG
+
+class MOZ_STATIC_CLASS Win32SRWLock final {
+ public:
+  // Microsoft guarantees that '0' is never a valid thread id
+  // https://docs.microsoft.com/en-ca/windows/desktop/ProcThread/thread-handles-and-identifiers
+  static const DWORD kInvalidThreadId = 0;
+
+  constexpr Win32SRWLock()
+      : mExclusiveThreadId(kInvalidThreadId), mLock(SRWLOCK_INIT) {}
+
+  ~Win32SRWLock() { MOZ_ASSERT(mExclusiveThreadId == kInvalidThreadId); }
+
+  void LockShared() {
+    MOZ_ASSERT(
+        mExclusiveThreadId != GetCurrentThreadId(),
+        "Deadlock detected - A thread attempted to acquire a shared lock on "
+        "a SRWLOCK when it already owns the exclusive lock on it.");
+
+    ::AcquireSRWLockShared(&mLock);
+  }
+
+  void UnlockShared() { ::ReleaseSRWLockShared(&mLock); }
+
+  void LockExclusive() {
+    MOZ_ASSERT(
+        mExclusiveThreadId != GetCurrentThreadId(),
+        "Deadlock detected - A thread attempted to acquire an exclusive lock "
+        "on a SRWLOCK when it already owns the exclusive lock on it.");
+
+    ::AcquireSRWLockExclusive(&mLock);
+    mExclusiveThreadId = GetCurrentThreadId();
+  }
+
+  void UnlockExclusive() {
+    MOZ_ASSERT(mExclusiveThreadId == GetCurrentThreadId());
+
+    mExclusiveThreadId = kInvalidThreadId;
+    ::ReleaseSRWLockExclusive(&mLock);
+  }
+
+  Win32SRWLock(const Win32SRWLock&) = delete;
+  Win32SRWLock(Win32SRWLock&&) = delete;
+  Win32SRWLock& operator=(const Win32SRWLock&) = delete;
+  Win32SRWLock& operator=(Win32SRWLock&&) = delete;
+
+ private:
+  // "Relaxed" memory ordering is fine. Threads will see other thread IDs
+  // appear here in some non-deterministic ordering (or not at all) and simply
+  // ignore them.
+  //
+  // But a thread will only read its own ID if it previously wrote it, and a
+  // single thread doesn't need a memory barrier to read its own write.
+
+  Atomic<DWORD, Relaxed> mExclusiveThreadId;
+  SRWLOCK mLock;
+};
+
+#else  // DEBUG
+
+class MOZ_STATIC_CLASS Win32SRWLock final {
+ public:
+  constexpr Win32SRWLock() : mLock(SRWLOCK_INIT) {}
+
+  void LockShared() { ::AcquireSRWLockShared(&mLock); }
+
+  void UnlockShared() { ::ReleaseSRWLockShared(&mLock); }
+
+  void LockExclusive() { ::AcquireSRWLockExclusive(&mLock); }
+
+  void UnlockExclusive() { ::ReleaseSRWLockExclusive(&mLock); }
+
+  ~Win32SRWLock() = default;
+
+  Win32SRWLock(const Win32SRWLock&) = delete;
+  Win32SRWLock(Win32SRWLock&&) = delete;
+  Win32SRWLock& operator=(const Win32SRWLock&) = delete;
+  Win32SRWLock& operator=(Win32SRWLock&&) = delete;
+
+ private:
+  SRWLOCK mLock;
+};
+
+#endif
+
+class MOZ_RAII AutoSharedLock final {
+ public:
+  explicit AutoSharedLock(Win32SRWLock& aLock) : mLock(aLock) {
+    mLock.LockShared();
+  }
+
+  ~AutoSharedLock() { mLock.UnlockShared(); }
+
+  AutoSharedLock(const AutoSharedLock&) = delete;
+  AutoSharedLock(AutoSharedLock&&) = delete;
+  AutoSharedLock& operator=(const AutoSharedLock&) = delete;
+  AutoSharedLock& operator=(AutoSharedLock&&) = delete;
+
+ private:
+  Win32SRWLock& mLock;
+};
+
+class MOZ_RAII AutoExclusiveLock final {
+ public:
+  explicit AutoExclusiveLock(Win32SRWLock& aLock) : mLock(aLock) {
+    mLock.LockExclusive();
+  }
+
+  ~AutoExclusiveLock() { mLock.UnlockExclusive(); }
+
+  AutoExclusiveLock(const AutoExclusiveLock&) = delete;
+  AutoExclusiveLock(AutoExclusiveLock&&) = delete;
+  AutoExclusiveLock& operator=(const AutoExclusiveLock&) = delete;
+  AutoExclusiveLock& operator=(AutoExclusiveLock&&) = delete;
+
+ private:
+  Win32SRWLock& mLock;
+};
+
+}  // namespace glue
+}  // namespace mozilla
+
+#endif  //  mozilla_glue_MozglueUtils_h
diff --git a/mozglue/misc/WindowsDpiAwareness.h b/mozglue/misc/WindowsDpiAwareness.h
new file mode 100644
index 0000000000..104b135536
--- /dev/null
+++ b/mozglue/misc/WindowsDpiAwareness.h
@@ -0,0 +1,41 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef WindowsDpiAwareness_h_
+#define WindowsDpiAwareness_h_
+
+#include <windows.h>
+
+#if !defined(DPI_AWARENESS_CONTEXT_DECLARED) && \
+    !defined(DPI_AWARENESS_CONTEXT_UNAWARE)
+
+DECLARE_HANDLE(DPI_AWARENESS_CONTEXT);
+
+typedef enum DPI_AWARENESS {
+  DPI_AWARENESS_INVALID = -1,
+  DPI_AWARENESS_UNAWARE = 0,
+  DPI_AWARENESS_SYSTEM_AWARE = 1,
+  DPI_AWARENESS_PER_MONITOR_AWARE = 2
+} DPI_AWARENESS;
+
+#  define DPI_AWARENESS_CONTEXT_UNAWARE ((DPI_AWARENESS_CONTEXT)-1)
+#  define DPI_AWARENESS_CONTEXT_SYSTEM_AWARE ((DPI_AWARENESS_CONTEXT)-2)
+#  define DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE ((DPI_AWARENESS_CONTEXT)-3)
+
+#  define DPI_AWARENESS_CONTEXT_DECLARED
+#endif  // (DPI_AWARENESS_CONTEXT_DECLARED)
+
+#if WINVER < 0x0605
+WINUSERAPI DPI_AWARENESS_CONTEXT WINAPI GetThreadDpiAwarenessContext();
+WINUSERAPI BOOL WINAPI AreDpiAwarenessContextsEqual(DPI_AWARENESS_CONTEXT,
+                                                    DPI_AWARENESS_CONTEXT);
+#endif /* WINVER < 0x0605 */
+typedef DPI_AWARENESS_CONTEXT(WINAPI* SetThreadDpiAwarenessContextProc)(
+    DPI_AWARENESS_CONTEXT);
+typedef BOOL(WINAPI* EnableNonClientDpiScalingProc)(HWND);
+typedef int(WINAPI* GetSystemMetricsForDpiProc)(int, UINT);
+
+#endif
diff --git a/mozglue/misc/WindowsMapRemoteView.cpp b/mozglue/misc/WindowsMapRemoteView.cpp
new file mode 100644
index 0000000000..4cd60ba7f1
--- /dev/null
+++ b/mozglue/misc/WindowsMapRemoteView.cpp
@@ -0,0 +1,124 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/WindowsMapRemoteView.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/DynamicallyLinkedFunctionPtr.h"
+
+#include <winternl.h>
+
+#if (NTDDI_VERSION < NTDDI_WIN10_RS2)
+
+// MapViewOfFile2 is just an inline function that calls MapViewOfFileNuma2 with
+// its preferred node set to NUMA_NO_PREFERRED_NODE
+WINBASEAPI PVOID WINAPI MapViewOfFileNuma2(HANDLE aFileMapping, HANDLE aProcess,
+                                           ULONG64 aOffset, PVOID aBaseAddress,
+                                           SIZE_T aViewSize,
+                                           ULONG aAllocationType,
+                                           ULONG aPageProtection,
+                                           ULONG aPreferredNode);
+
+WINBASEAPI BOOL WINAPI UnmapViewOfFile2(HANDLE aProcess, PVOID aBaseAddress,
+                                        ULONG aUnmapFlags);
+
+#endif  // (NTDDI_VERSION < NTDDI_WIN10_RS2)
+
+enum SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 };
+
+NTSTATUS NTAPI NtMapViewOfSection(
+    HANDLE aSection, HANDLE aProcess, PVOID* aBaseAddress, ULONG_PTR aZeroBits,
+    SIZE_T aCommitSize, PLARGE_INTEGER aSectionOffset, PSIZE_T aViewSize,
+    SECTION_INHERIT aInheritDisposition, ULONG aAllocationType,
+    ULONG aProtectionFlags);
+
+NTSTATUS NTAPI NtUnmapViewOfSection(HANDLE aProcess, PVOID aBaseAddress);
+
+static DWORD GetWin32ErrorCode(NTSTATUS aNtStatus) {
+  static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+      &RtlNtStatusToDosError)>
+      pRtlNtStatusToDosError(L"ntdll.dll", "RtlNtStatusToDosError");
+
+  MOZ_ASSERT(!!pRtlNtStatusToDosError);
+  if (!pRtlNtStatusToDosError) {
+    return ERROR_GEN_FAILURE;
+  }
+
+  return pRtlNtStatusToDosError(aNtStatus);
+}
+
+namespace mozilla {
+
+MFBT_API void* MapRemoteViewOfFile(HANDLE aFileMapping, HANDLE aProcess,
+                                   ULONG64 aOffset, PVOID aBaseAddress,
+                                   SIZE_T aViewSize, ULONG aAllocationType,
+                                   ULONG aProtectionFlags) {
+  static const StaticDynamicallyLinkedFunctionPtr<decltype(&MapViewOfFileNuma2)>
+      pMapViewOfFileNuma2(L"Api-ms-win-core-memory-l1-1-5.dll",
+                          "MapViewOfFileNuma2");
+
+  if (!!pMapViewOfFileNuma2) {
+    return pMapViewOfFileNuma2(aFileMapping, aProcess, aOffset, aBaseAddress,
+                               aViewSize, aAllocationType, aProtectionFlags,
+                               NUMA_NO_PREFERRED_NODE);
+  }
+
+  static const StaticDynamicallyLinkedFunctionPtr<decltype(&NtMapViewOfSection)>
+      pNtMapViewOfSection(L"ntdll.dll", "NtMapViewOfSection");
+
+  MOZ_ASSERT(!!pNtMapViewOfSection);
+  if (!pNtMapViewOfSection) {
+    return nullptr;
+  }
+
+  // For the sake of consistency, we only permit the same flags that
+  // MapViewOfFileNuma2 allows
+  if (aAllocationType != 0 && aAllocationType != MEM_RESERVE &&
+      aAllocationType != MEM_LARGE_PAGES) {
+    ::SetLastError(ERROR_INVALID_PARAMETER);
+    return nullptr;
+  }
+
+  NTSTATUS ntStatus;
+
+  LARGE_INTEGER offset;
+  offset.QuadPart = aOffset;
+
+  ntStatus = pNtMapViewOfSection(aFileMapping, aProcess, &aBaseAddress, 0, 0,
+                                 &offset, &aViewSize, ViewUnmap,
+                                 aAllocationType, aProtectionFlags);
+  if (NT_SUCCESS(ntStatus)) {
+    ::SetLastError(ERROR_SUCCESS);
+    return aBaseAddress;
+  }
+
+  ::SetLastError(GetWin32ErrorCode(ntStatus));
+  return nullptr;
+}
+
+MFBT_API bool UnmapRemoteViewOfFile(HANDLE aProcess, PVOID aBaseAddress) {
+  static const StaticDynamicallyLinkedFunctionPtr<decltype(&UnmapViewOfFile2)>
+      pUnmapViewOfFile2(L"kernel32.dll", "UnmapViewOfFile2");
+
+  if (!!pUnmapViewOfFile2) {
+    return !!pUnmapViewOfFile2(aProcess, aBaseAddress, 0);
+  }
+
+  static const StaticDynamicallyLinkedFunctionPtr<decltype(
+      &NtUnmapViewOfSection)>
+      pNtUnmapViewOfSection(L"ntdll.dll", "NtUnmapViewOfSection");
+
+  MOZ_ASSERT(!!pNtUnmapViewOfSection);
+  if (!pNtUnmapViewOfSection) {
+    return false;
+  }
+
+  NTSTATUS ntStatus = pNtUnmapViewOfSection(aProcess, aBaseAddress);
+  ::SetLastError(GetWin32ErrorCode(ntStatus));
+  return NT_SUCCESS(ntStatus);
+}
+
+}  // namespace mozilla
diff --git a/mozglue/misc/WindowsMapRemoteView.h b/mozglue/misc/WindowsMapRemoteView.h
new file mode 100644
index 0000000000..6ab88074b5
--- /dev/null
+++ b/mozglue/misc/WindowsMapRemoteView.h
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_WindowsMapRemoteView_h
+#define mozilla_WindowsMapRemoteView_h
+
+#include "mozilla/Types.h"
+
+#include <windows.h>
+
+namespace mozilla {
+
+MFBT_API PVOID MapRemoteViewOfFile(HANDLE aFileMapping, HANDLE aProcess,
+                                   ULONG64 aOffset, PVOID aBaseAddress,
+                                   SIZE_T aViewSize, ULONG aAllocationType,
+                                   ULONG aProtectionFlags);
+
+MFBT_API bool UnmapRemoteViewOfFile(HANDLE aProcess, PVOID aBaseAddress);
+
+}  // namespace mozilla
+
+#endif  // mozilla_WindowsMapRemoteView_h
diff --git a/mozglue/misc/WindowsProcessMitigations.cpp b/mozglue/misc/WindowsProcessMitigations.cpp
new file mode 100644
index 0000000000..647def7217
--- /dev/null
+++ b/mozglue/misc/WindowsProcessMitigations.cpp
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/DynamicallyLinkedFunctionPtr.h"
+#include "mozilla/WindowsProcessMitigations.h"
+
+#include <processthreadsapi.h>
+
+#if (_WIN32_WINNT < 0x0602)
+BOOL WINAPI GetProcessMitigationPolicy(
+    HANDLE hProcess, PROCESS_MITIGATION_POLICY MitigationPolicy, PVOID lpBuffer,
+    SIZE_T dwLength);
+#endif  // (_WIN32_WINNT < 0x0602)
+
+namespace mozilla {
+
+static decltype(&::GetProcessMitigationPolicy)
+FetchGetProcessMitigationPolicyFunc() {
+  static const StaticDynamicallyLinkedFunctionPtr<decltype(
+      &::GetProcessMitigationPolicy)>
+      pGetProcessMitigationPolicy(L"kernel32.dll",
+                                  "GetProcessMitigationPolicy");
+  return pGetProcessMitigationPolicy;
+}
+
+MFBT_API bool IsWin32kLockedDown() {
+  auto pGetProcessMitigationPolicy = FetchGetProcessMitigationPolicyFunc();
+  if (!pGetProcessMitigationPolicy) {
+    return false;
+  }
+
+  PROCESS_MITIGATION_SYSTEM_CALL_DISABLE_POLICY polInfo;
+  if (!pGetProcessMitigationPolicy(::GetCurrentProcess(),
+                                   ProcessSystemCallDisablePolicy, &polInfo,
+                                   sizeof(polInfo))) {
+    return false;
+  }
+
+  return polInfo.DisallowWin32kSystemCalls;
+}
+
+MFBT_API bool IsDynamicCodeDisabled() {
+  auto pGetProcessMitigationPolicy = FetchGetProcessMitigationPolicyFunc();
+  if (!pGetProcessMitigationPolicy) {
+    return false;
+  }
+
+  PROCESS_MITIGATION_DYNAMIC_CODE_POLICY polInfo;
+  if (!pGetProcessMitigationPolicy(::GetCurrentProcess(),
+                                   ProcessDynamicCodePolicy, &polInfo,
+                                   sizeof(polInfo))) {
+    return false;
+  }
+
+  return polInfo.ProhibitDynamicCode;
+}
+
+MFBT_API bool IsEafPlusEnabled() {
+  auto pGetProcessMitigationPolicy = FetchGetProcessMitigationPolicyFunc();
+  if (!pGetProcessMitigationPolicy) {
+    return false;
+  }
+
+  PROCESS_MITIGATION_PAYLOAD_RESTRICTION_POLICY polInfo;
+  if (!pGetProcessMitigationPolicy(::GetCurrentProcess(),
+                                   ProcessPayloadRestrictionPolicy, &polInfo,
+                                   sizeof(polInfo))) {
+    return false;
+  }
+
+  return polInfo.EnableExportAddressFilterPlus;
+}
+
+}  // namespace mozilla
diff --git a/mozglue/misc/WindowsProcessMitigations.h b/mozglue/misc/WindowsProcessMitigations.h
new file mode 100644
index 0000000000..31a93f9b69
--- /dev/null
+++ b/mozglue/misc/WindowsProcessMitigations.h
@@ -0,0 +1,20 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_WindowsProcessMitigations_h
+#define mozilla_WindowsProcessMitigations_h
+
+#include "mozilla/Types.h"
+
+namespace mozilla {
+
+MFBT_API bool IsWin32kLockedDown();
+MFBT_API bool IsDynamicCodeDisabled();
+MFBT_API bool IsEafPlusEnabled();
+
+}  // namespace mozilla
+
+#endif  // mozilla_WindowsProcessMitigations_h
diff --git a/mozglue/misc/WindowsUnicode.cpp b/mozglue/misc/WindowsUnicode.cpp
new file mode 100644
index 0000000000..464380b6da
--- /dev/null
+++ b/mozglue/misc/WindowsUnicode.cpp
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "WindowsUnicode.h"
+
+#include <windows.h>
+// For UNICODE_STRING
+#include <winternl.h>
+
+#include <string.h>
+
+namespace mozilla {
+namespace glue {
+
+mozilla::UniquePtr<char[]> WideToUTF8(const wchar_t* aStr,
+                                      const size_t aStrLenExclNul) {
+  int numConv = ::WideCharToMultiByte(CP_UTF8, 0, aStr, aStrLenExclNul, nullptr,
+                                      0, nullptr, nullptr);
+  if (!numConv) {
+    return nullptr;
+  }
+
+  // Include room for the null terminator by adding one
+  auto buf = mozilla::MakeUnique<char[]>(numConv + 1);
+
+  numConv = ::WideCharToMultiByte(CP_UTF8, 0, aStr, aStrLenExclNul, buf.get(),
+                                  numConv, nullptr, nullptr);
+  if (!numConv) {
+    return nullptr;
+  }
+
+  // Add null termination. numConv does not include the terminator, so we don't
+  // subtract 1 when indexing into buf.
+  buf[numConv] = 0;
+
+  return buf;
+}
+
+mozilla::UniquePtr<char[]> WideToUTF8(const wchar_t* aStr) {
+  return WideToUTF8(aStr, wcslen(aStr));
+}
+
+mozilla::UniquePtr<char[]> WideToUTF8(const std::wstring& aStr) {
+  return WideToUTF8(aStr.data(), aStr.length());
+}
+
+mozilla::UniquePtr<char[]> WideToUTF8(PCUNICODE_STRING aStr) {
+  if (!aStr) {
+    return nullptr;
+  }
+
+  return WideToUTF8(aStr->Buffer, aStr->Length / sizeof(WCHAR));
+}
+
+}  // namespace glue
+}  // namespace mozilla
diff --git a/mozglue/misc/WindowsUnicode.h b/mozglue/misc/WindowsUnicode.h
new file mode 100644
index 0000000000..77fc376b92
--- /dev/null
+++ b/mozglue/misc/WindowsUnicode.h
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_glue_WindowsUnicode_h
+#define mozilla_glue_WindowsUnicode_h
+
+#include "mozilla/UniquePtr.h"
+
+#include <string>
+
+struct _UNICODE_STRING;
+
+namespace mozilla {
+namespace glue {
+
+mozilla::UniquePtr<char[]> WideToUTF8(const wchar_t* aStr,
+                                      const size_t aStrLenExclNul);
+
+mozilla::UniquePtr<char[]> WideToUTF8(const wchar_t* aStr);
+mozilla::UniquePtr<char[]> WideToUTF8(const std::wstring& aStr);
+mozilla::UniquePtr<char[]> WideToUTF8(const _UNICODE_STRING* aStr);
+
+#if defined(bstr_t)
+inline mozilla::UniquePtr<char[]> WideToUTF8(const _bstr_t& aStr) {
+  return WideToUTF8(static_cast<const wchar_t*>(aStr), aStr.length());
+}
+#endif  // defined(bstr_t)
+
+}  // namespace glue
+}  // namespace mozilla
+
+#endif  // mozilla_glue_WindowsUnicode_h
diff --git a/mozglue/misc/decimal/Decimal.cpp b/mozglue/misc/decimal/Decimal.cpp
new file mode 100644
index 0000000000..cc828e2843
--- /dev/null
+++ b/mozglue/misc/decimal/Decimal.cpp
@@ -0,0 +1,1063 @@
+/*
+ * Copyright (C) 2012 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "Decimal.h"
+#include "moz-decimal-utils.h"
+#include "DoubleConversion.h"
+
+using namespace moz_decimal_utils;
+
+#include <algorithm>
+#include <float.h>
+
+namespace blink {
+
+namespace DecimalPrivate {
+
+static int const ExponentMax = 1023;
+static int const ExponentMin = -1023;
+static int const Precision = 18;
+
+static const uint64_t MaxCoefficient = UINT64_C(0xDE0B6B3A763FFFF); // 999999999999999999 == 18 9's
+
+// This class handles Decimal special values.
+class SpecialValueHandler {
+    STACK_ALLOCATED();
+    WTF_MAKE_NONCOPYABLE(SpecialValueHandler);
+public:
+    enum HandleResult {
+        BothFinite,
+        BothInfinity,
+        EitherNaN,
+        LHSIsInfinity,
+        RHSIsInfinity,
+    };
+
+    SpecialValueHandler(const Decimal& lhs, const Decimal& rhs);
+    HandleResult handle();
+    Decimal value() const;
+
+private:
+    enum Result {
+        ResultIsLHS,
+        ResultIsRHS,
+        ResultIsUnknown,
+    };
+
+    const Decimal& m_lhs;
+    const Decimal& m_rhs;
+    Result m_result;
+};
+
+SpecialValueHandler::SpecialValueHandler(const Decimal& lhs, const Decimal& rhs)
+    : m_lhs(lhs), m_rhs(rhs), m_result(ResultIsUnknown)
+{
+}
+
+SpecialValueHandler::HandleResult SpecialValueHandler::handle()
+{
+    if (m_lhs.isFinite() && m_rhs.isFinite())
+        return BothFinite;
+
+    const Decimal::EncodedData::FormatClass lhsClass = m_lhs.value().formatClass();
+    const Decimal::EncodedData::FormatClass rhsClass = m_rhs.value().formatClass();
+    if (lhsClass == Decimal::EncodedData::ClassNaN) {
+        m_result = ResultIsLHS;
+        return EitherNaN;
+    }
+
+    if (rhsClass == Decimal::EncodedData::ClassNaN) {
+        m_result = ResultIsRHS;
+        return EitherNaN;
+    }
+
+    if (lhsClass == Decimal::EncodedData::ClassInfinity)
+        return rhsClass == Decimal::EncodedData::ClassInfinity ? BothInfinity : LHSIsInfinity;
+
+    if (rhsClass == Decimal::EncodedData::ClassInfinity)
+        return RHSIsInfinity;
+
+    ASSERT_NOT_REACHED();
+    return BothFinite;
+}
+
+Decimal SpecialValueHandler::value() const
+{
+    switch (m_result) {
+    case ResultIsLHS:
+        return m_lhs;
+    case ResultIsRHS:
+        return m_rhs;
+    case ResultIsUnknown:
+    default:
+        ASSERT_NOT_REACHED();
+        return m_lhs;
+    }
+}
+
+// This class is used for 128 bit unsigned integer arithmetic.
+class UInt128 {
+public:
+    UInt128(uint64_t low, uint64_t high)
+        : m_high(high), m_low(low)
+    {
+    }
+
+    UInt128& operator/=(uint32_t);
+
+    uint64_t high() const { return m_high; }
+    uint64_t low() const { return m_low; }
+
+    static UInt128 multiply(uint64_t u, uint64_t v) { return UInt128(u * v, multiplyHigh(u, v)); }
+
+private:
+    static uint32_t highUInt32(uint64_t x) { return static_cast<uint32_t>(x >> 32); }
+    static uint32_t lowUInt32(uint64_t x) { return static_cast<uint32_t>(x & ((static_cast<uint64_t>(1) << 32) - 1)); }
+    static uint64_t makeUInt64(uint32_t low, uint32_t high) { return low | (static_cast<uint64_t>(high) << 32); }
+
+    static uint64_t multiplyHigh(uint64_t, uint64_t);
+
+    uint64_t m_high;
+    uint64_t m_low;
+};
+
+UInt128& UInt128::operator/=(const uint32_t divisor)
+{
+    ASSERT(divisor);
+
+    if (!m_high) {
+        m_low /= divisor;
+        return *this;
+    }
+
+    uint32_t dividend[4];
+    dividend[0] = lowUInt32(m_low);
+    dividend[1] = highUInt32(m_low);
+    dividend[2] = lowUInt32(m_high);
+    dividend[3] = highUInt32(m_high);
+
+    uint32_t quotient[4];
+    uint32_t remainder = 0;
+    for (int i = 3; i >= 0; --i) {
+        const uint64_t work = makeUInt64(dividend[i], remainder);
+        remainder = static_cast<uint32_t>(work % divisor);
+        quotient[i] = static_cast<uint32_t>(work / divisor);
+    }
+    m_low = makeUInt64(quotient[0], quotient[1]);
+    m_high = makeUInt64(quotient[2], quotient[3]);
+    return *this;
+}
+
+// Returns high 64bit of 128bit product.
+uint64_t UInt128::multiplyHigh(uint64_t u, uint64_t v)
+{
+    const uint64_t uLow = lowUInt32(u);
+    const uint64_t uHigh = highUInt32(u);
+    const uint64_t vLow = lowUInt32(v);
+    const uint64_t vHigh = highUInt32(v);
+    const uint64_t partialProduct = uHigh * vLow + highUInt32(uLow * vLow);
+    return uHigh * vHigh + highUInt32(partialProduct) + highUInt32(uLow * vHigh + lowUInt32(partialProduct));
+}
+
+static int countDigits(uint64_t x)
+{
+    int numberOfDigits = 0;
+    for (uint64_t powerOfTen = 1; x >= powerOfTen; powerOfTen *= 10) {
+        ++numberOfDigits;
+        if (powerOfTen >= std::numeric_limits<uint64_t>::max() / 10)
+            break;
+    }
+    return numberOfDigits;
+}
+
+static uint64_t scaleDown(uint64_t x, int n)
+{
+    ASSERT(n >= 0);
+    while (n > 0 && x) {
+        x /= 10;
+        --n;
+    }
+    return x;
+}
+
+static uint64_t scaleUp(uint64_t x, int n)
+{
+    ASSERT(n >= 0);
+    ASSERT(n <= Precision);
+
+    uint64_t y = 1;
+    uint64_t z = 10;
+    for (;;) {
+        if (n & 1)
+            y = y * z;
+
+        n >>= 1;
+        if (!n)
+            return x * y;
+
+        z = z * z;
+    }
+}
+
+} // namespace DecimalPrivate
+
+using namespace DecimalPrivate;
+
+Decimal::EncodedData::EncodedData(Sign sign, FormatClass formatClass)
+    : m_coefficient(0)
+    , m_exponent(0)
+    , m_formatClass(formatClass)
+    , m_sign(sign)
+{
+}
+
+Decimal::EncodedData::EncodedData(Sign sign, int exponent, uint64_t coefficient)
+    : m_formatClass(coefficient ? ClassNormal : ClassZero)
+    , m_sign(sign)
+{
+    if (exponent >= ExponentMin && exponent <= ExponentMax) {
+        while (coefficient > MaxCoefficient) {
+            coefficient /= 10;
+            ++exponent;
+        }
+    }
+
+    if (exponent > ExponentMax) {
+        m_coefficient = 0;
+        m_exponent = 0;
+        m_formatClass = ClassInfinity;
+        return;
+    }
+
+    if (exponent < ExponentMin) {
+        m_coefficient = 0;
+        m_exponent = 0;
+        m_formatClass = ClassZero;
+        return;
+    }
+
+    m_coefficient = coefficient;
+    m_exponent = static_cast<int16_t>(exponent);
+}
+
+bool Decimal::EncodedData::operator==(const EncodedData& another) const
+{
+    return m_sign == another.m_sign
+        && m_formatClass == another.m_formatClass
+        && m_exponent == another.m_exponent
+        && m_coefficient == another.m_coefficient;
+}
+
+Decimal::Decimal(int32_t i32)
+    : m_data(i32 < 0 ? Negative : Positive, 0, i32 < 0 ? static_cast<uint64_t>(-static_cast<int64_t>(i32)) : static_cast<uint64_t>(i32))
+{
+}
+
+Decimal::Decimal(Sign sign, int exponent, uint64_t coefficient)
+    : m_data(sign, coefficient ? exponent : 0, coefficient)
+{
+}
+
+Decimal::Decimal(const EncodedData& data)
+    : m_data(data)
+{
+}
+
+Decimal::Decimal(const Decimal& other)
+    : m_data(other.m_data)
+{
+}
+
+Decimal& Decimal::operator=(const Decimal& other)
+{
+    m_data = other.m_data;
+    return *this;
+}
+
+Decimal& Decimal::operator+=(const Decimal& other)
+{
+    m_data = (*this + other).m_data;
+    return *this;
+}
+
+Decimal& Decimal::operator-=(const Decimal& other)
+{
+    m_data = (*this - other).m_data;
+    return *this;
+}
+
+Decimal& Decimal::operator*=(const Decimal& other)
+{
+    m_data = (*this * other).m_data;
+    return *this;
+}
+
+Decimal& Decimal::operator/=(const Decimal& other)
+{
+    m_data = (*this / other).m_data;
+    return *this;
+}
+
+Decimal Decimal::operator-() const
+{
+    if (isNaN())
+        return *this;
+
+    Decimal result(*this);
+    result.m_data.setSign(invertSign(m_data.sign()));
+    return result;
+}
+
+Decimal Decimal::operator+(const Decimal& rhs) const
+{
+    const Decimal& lhs = *this;
+    const Sign lhsSign = lhs.sign();
+    const Sign rhsSign = rhs.sign();
+
+    SpecialValueHandler handler(lhs, rhs);
+    switch (handler.handle()) {
+    case SpecialValueHandler::BothFinite:
+        break;
+
+    case SpecialValueHandler::BothInfinity:
+        return lhsSign == rhsSign ? lhs : nan();
+
+    case SpecialValueHandler::EitherNaN:
+        return handler.value();
+
+    case SpecialValueHandler::LHSIsInfinity:
+        return lhs;
+
+    case SpecialValueHandler::RHSIsInfinity:
+        return rhs;
+    }
+
+    const AlignedOperands alignedOperands = alignOperands(lhs, rhs);
+
+    const uint64_t result = lhsSign == rhsSign
+        ? alignedOperands.lhsCoefficient + alignedOperands.rhsCoefficient
+        : alignedOperands.lhsCoefficient - alignedOperands.rhsCoefficient;
+
+    if (lhsSign == Negative && rhsSign == Positive && !result)
+        return Decimal(Positive, alignedOperands.exponent, 0);
+
+    return static_cast<int64_t>(result) >= 0
+        ? Decimal(lhsSign, alignedOperands.exponent, result)
+        : Decimal(invertSign(lhsSign), alignedOperands.exponent, -static_cast<int64_t>(result));
+}
+
+Decimal Decimal::operator-(const Decimal& rhs) const
+{
+    const Decimal& lhs = *this;
+    const Sign lhsSign = lhs.sign();
+    const Sign rhsSign = rhs.sign();
+
+    SpecialValueHandler handler(lhs, rhs);
+    switch (handler.handle()) {
+    case SpecialValueHandler::BothFinite:
+        break;
+
+    case SpecialValueHandler::BothInfinity:
+        return lhsSign == rhsSign ? nan() : lhs;
+
+    case SpecialValueHandler::EitherNaN:
+        return handler.value();
+
+    case SpecialValueHandler::LHSIsInfinity:
+        return lhs;
+
+    case SpecialValueHandler::RHSIsInfinity:
+        return infinity(invertSign(rhsSign));
+    }
+
+    const AlignedOperands alignedOperands = alignOperands(lhs, rhs);
+
+    const uint64_t result = lhsSign == rhsSign
+        ? alignedOperands.lhsCoefficient - alignedOperands.rhsCoefficient
+        : alignedOperands.lhsCoefficient + alignedOperands.rhsCoefficient;
+
+    if (lhsSign == Negative && rhsSign == Negative && !result)
+        return Decimal(Positive, alignedOperands.exponent, 0);
+
+    return static_cast<int64_t>(result) >= 0
+        ? Decimal(lhsSign, alignedOperands.exponent, result)
+        : Decimal(invertSign(lhsSign), alignedOperands.exponent, -static_cast<int64_t>(result));
+}
+
+Decimal Decimal::operator*(const Decimal& rhs) const
+{
+    const Decimal& lhs = *this;
+    const Sign lhsSign = lhs.sign();
+    const Sign rhsSign = rhs.sign();
+    const Sign resultSign = lhsSign == rhsSign ? Positive : Negative;
+
+    SpecialValueHandler handler(lhs, rhs);
+    switch (handler.handle()) {
+    case SpecialValueHandler::BothFinite: {
+        const uint64_t lhsCoefficient = lhs.m_data.coefficient();
+        const uint64_t rhsCoefficient = rhs.m_data.coefficient();
+        int resultExponent = lhs.exponent() + rhs.exponent();
+        UInt128 work(UInt128::multiply(lhsCoefficient, rhsCoefficient));
+        while (work.high()) {
+            work /= 10;
+            ++resultExponent;
+        }
+        return Decimal(resultSign, resultExponent, work.low());
+    }
+
+    case SpecialValueHandler::BothInfinity:
+        return infinity(resultSign);
+
+    case SpecialValueHandler::EitherNaN:
+        return handler.value();
+
+    case SpecialValueHandler::LHSIsInfinity:
+        return rhs.isZero() ? nan() : infinity(resultSign);
+
+    case SpecialValueHandler::RHSIsInfinity:
+        return lhs.isZero() ? nan() : infinity(resultSign);
+    }
+
+    ASSERT_NOT_REACHED();
+    return nan();
+}
+
+Decimal Decimal::operator/(const Decimal& rhs) const
+{
+    const Decimal& lhs = *this;
+    const Sign lhsSign = lhs.sign();
+    const Sign rhsSign = rhs.sign();
+    const Sign resultSign = lhsSign == rhsSign ? Positive : Negative;
+
+    SpecialValueHandler handler(lhs, rhs);
+    switch (handler.handle()) {
+    case SpecialValueHandler::BothFinite:
+        break;
+
+    case SpecialValueHandler::BothInfinity:
+        return nan();
+
+    case SpecialValueHandler::EitherNaN:
+        return handler.value();
+
+    case SpecialValueHandler::LHSIsInfinity:
+        return infinity(resultSign);
+
+    case SpecialValueHandler::RHSIsInfinity:
+        return zero(resultSign);
+    }
+
+    ASSERT(lhs.isFinite());
+    ASSERT(rhs.isFinite());
+
+    if (rhs.isZero())
+        return lhs.isZero() ? nan() : infinity(resultSign);
+
+    int resultExponent = lhs.exponent() - rhs.exponent();
+
+    if (lhs.isZero())
+        return Decimal(resultSign, resultExponent, 0);
+
+    uint64_t remainder = lhs.m_data.coefficient();
+    const uint64_t divisor = rhs.m_data.coefficient();
+    uint64_t result = 0;
+    for (;;) {
+        while (remainder < divisor && result < MaxCoefficient / 10) {
+            remainder *= 10;
+            result *= 10;
+            --resultExponent;
+        }
+        if (remainder < divisor)
+            break;
+        uint64_t quotient = remainder / divisor;
+        if (result > MaxCoefficient - quotient)
+            break;
+        result += quotient;
+        remainder %= divisor;
+        if (!remainder)
+            break;
+    }
+
+    if (remainder > divisor / 2)
+        ++result;
+
+    return Decimal(resultSign, resultExponent, result);
+}
+
+bool Decimal::operator==(const Decimal& rhs) const
+{
+    if (isNaN() || rhs.isNaN())
+        return false;
+    return m_data == rhs.m_data || compareTo(rhs).isZero();
+}
+
+bool Decimal::operator!=(const Decimal& rhs) const
+{
+    if (isNaN() || rhs.isNaN())
+        return true;
+    if (m_data == rhs.m_data)
+        return false;
+    const Decimal result = compareTo(rhs);
+    if (result.isNaN())
+        return false;
+    return !result.isZero();
+}
+
+bool Decimal::operator<(const Decimal& rhs) const
+{
+    const Decimal result = compareTo(rhs);
+    if (result.isNaN())
+        return false;
+    return !result.isZero() && result.isNegative();
+}
+
+bool Decimal::operator<=(const Decimal& rhs) const
+{
+    if (isNaN() || rhs.isNaN())
+        return false;
+    if (m_data == rhs.m_data)
+        return true;
+    const Decimal result = compareTo(rhs);
+    if (result.isNaN())
+        return false;
+    return result.isZero() || result.isNegative();
+}
+
+bool Decimal::operator>(const Decimal& rhs) const
+{
+    const Decimal result = compareTo(rhs);
+    if (result.isNaN())
+        return false;
+    return !result.isZero() && result.isPositive();
+}
+
+bool Decimal::operator>=(const Decimal& rhs) const
+{
+    if (isNaN() || rhs.isNaN())
+        return false;
+    if (m_data == rhs.m_data)
+        return true;
+    const Decimal result = compareTo(rhs);
+    if (result.isNaN())
+        return false;
+    return result.isZero() || !result.isNegative();
+}
+
+Decimal Decimal::abs() const
+{
+    Decimal result(*this);
+    result.m_data.setSign(Positive);
+    return result;
+}
+
+Decimal::AlignedOperands Decimal::alignOperands(const Decimal& lhs, const Decimal& rhs)
+{
+    ASSERT(lhs.isFinite());
+    ASSERT(rhs.isFinite());
+
+    const int lhsExponent = lhs.exponent();
+    const int rhsExponent = rhs.exponent();
+    int exponent = std::min(lhsExponent, rhsExponent);
+    uint64_t lhsCoefficient = lhs.m_data.coefficient();
+    uint64_t rhsCoefficient = rhs.m_data.coefficient();
+
+    if (lhsExponent > rhsExponent) {
+        const int numberOfLHSDigits = countDigits(lhsCoefficient);
+        if (numberOfLHSDigits) {
+            const int lhsShiftAmount = lhsExponent - rhsExponent;
+            const int overflow = numberOfLHSDigits + lhsShiftAmount - Precision;
+            if (overflow <= 0) {
+                lhsCoefficient = scaleUp(lhsCoefficient, lhsShiftAmount);
+            } else {
+                lhsCoefficient = scaleUp(lhsCoefficient, lhsShiftAmount - overflow);
+                rhsCoefficient = scaleDown(rhsCoefficient, overflow);
+                exponent += overflow;
+            }
+        }
+
+    } else if (lhsExponent < rhsExponent) {
+        const int numberOfRHSDigits = countDigits(rhsCoefficient);
+        if (numberOfRHSDigits) {
+            const int rhsShiftAmount = rhsExponent - lhsExponent;
+            const int overflow = numberOfRHSDigits + rhsShiftAmount - Precision;
+            if (overflow <= 0) {
+                rhsCoefficient = scaleUp(rhsCoefficient, rhsShiftAmount);
+            } else {
+                rhsCoefficient = scaleUp(rhsCoefficient, rhsShiftAmount - overflow);
+                lhsCoefficient = scaleDown(lhsCoefficient, overflow);
+                exponent += overflow;
+            }
+        }
+    }
+
+    AlignedOperands alignedOperands;
+    alignedOperands.exponent = exponent;
+    alignedOperands.lhsCoefficient = lhsCoefficient;
+    alignedOperands.rhsCoefficient = rhsCoefficient;
+    return alignedOperands;
+}
+
+static bool isMultiplePowersOfTen(uint64_t coefficient, int n)
+{
+    return !coefficient || !(coefficient % scaleUp(1, n));
+}
+
+// Round toward positive infinity.
+Decimal Decimal::ceil() const
+{
+    if (isSpecial())
+        return *this;
+
+    if (exponent() >= 0)
+        return *this;
+
+    uint64_t result = m_data.coefficient();
+    const int numberOfDigits = countDigits(result);
+    const int numberOfDropDigits = -exponent();
+    if (numberOfDigits <= numberOfDropDigits)
+        return isPositive() ? Decimal(1) : zero(Positive);
+
+    result = scaleDown(result, numberOfDropDigits);
+    if (isPositive() && !isMultiplePowersOfTen(m_data.coefficient(), numberOfDropDigits))
+        ++result;
+    return Decimal(sign(), 0, result);
+}
+
+Decimal Decimal::compareTo(const Decimal& rhs) const
+{
+    const Decimal result(*this - rhs);
+    switch (result.m_data.formatClass()) {
+    case EncodedData::ClassInfinity:
+        return result.isNegative() ? Decimal(-1) : Decimal(1);
+
+    case EncodedData::ClassNaN:
+    case EncodedData::ClassNormal:
+        return result;
+
+    case EncodedData::ClassZero:
+        return zero(Positive);
+
+    default:
+        ASSERT_NOT_REACHED();
+        return nan();
+    }
+}
+
+// Round toward negative infinity.
+Decimal Decimal::floor() const
+{
+    if (isSpecial())
+        return *this;
+
+    if (exponent() >= 0)
+        return *this;
+
+    uint64_t result = m_data.coefficient();
+    const int numberOfDigits = countDigits(result);
+    const int numberOfDropDigits = -exponent();
+    if (numberOfDigits < numberOfDropDigits)
+        return isPositive() ? zero(Positive) : Decimal(-1);
+
+    result = scaleDown(result, numberOfDropDigits);
+    if (isNegative() && !isMultiplePowersOfTen(m_data.coefficient(), numberOfDropDigits))
+        ++result;
+    return Decimal(sign(), 0, result);
+}
+
+Decimal Decimal::fromDouble(double doubleValue)
+{
+    if (std::isfinite(doubleValue))
+        return fromString(mozToString(doubleValue));
+
+    if (std::isinf(doubleValue))
+        return infinity(doubleValue < 0 ? Negative : Positive);
+
+    return nan();
+}
+
+Decimal Decimal::fromString(const String& str)
+{
+    int exponent = 0;
+    Sign exponentSign = Positive;
+    int numberOfDigits = 0;
+    int numberOfDigitsAfterDot = 0;
+    int numberOfExtraDigits = 0;
+    Sign sign = Positive;
+
+    enum {
+        StateDigit,
+        StateDot,
+        StateDotDigit,
+        StateE,
+        StateEDigit,
+        StateESign,
+        StateSign,
+        StateStart,
+        StateZero,
+    } state = StateStart;
+
+#define HandleCharAndBreak(expected, nextState) \
+    if (ch == expected) { \
+        state = nextState; \
+        break; \
+    }
+
+#define HandleTwoCharsAndBreak(expected1, expected2, nextState) \
+    if (ch == expected1 || ch == expected2) { \
+        state = nextState; \
+        break; \
+    }
+
+    uint64_t accumulator = 0;
+    for (unsigned index = 0; index < str.length(); ++index) {
+        const int ch = str[index];
+        switch (state) {
+        case StateDigit:
+            if (ch >= '0' && ch <= '9') {
+                if (numberOfDigits < Precision) {
+                    ++numberOfDigits;
+                    accumulator *= 10;
+                    accumulator += ch - '0';
+                } else {
+                    ++numberOfExtraDigits;
+                }
+                break;
+            }
+
+            HandleCharAndBreak('.', StateDot);
+            HandleTwoCharsAndBreak('E', 'e', StateE);
+            return nan();
+
+        case StateDot:
+        case StateDotDigit:
+            if (ch >= '0' && ch <= '9') {
+                if (numberOfDigits < Precision) {
+                    ++numberOfDigits;
+                    ++numberOfDigitsAfterDot;
+                    accumulator *= 10;
+                    accumulator += ch - '0';
+                }
+                state = StateDotDigit;
+                break;
+            }
+
+            HandleTwoCharsAndBreak('E', 'e', StateE);
+            return nan();
+
+        case StateE:
+            if (ch == '+') {
+                exponentSign = Positive;
+                state = StateESign;
+                break;
+            }
+
+            if (ch == '-') {
+                exponentSign = Negative;
+                state = StateESign;
+                break;
+            }
+
+            if (ch >= '0' && ch <= '9') {
+                exponent = ch - '0';
+                state = StateEDigit;
+                break;
+            }
+
+            return nan();
+
+        case StateEDigit:
+            if (ch >= '0' && ch <= '9') {
+                exponent *= 10;
+                exponent += ch - '0';
+                if (exponent > ExponentMax + Precision) {
+                    if (accumulator)
+                        return exponentSign == Negative ? zero(Positive) : infinity(sign);
+                    return zero(sign);
+                }
+                state = StateEDigit;
+                break;
+            }
+
+            return nan();
+
+        case StateESign:
+            if (ch >= '0' && ch <= '9') {
+                exponent = ch - '0';
+                state = StateEDigit;
+                break;
+            }
+
+            return nan();
+
+        case StateSign:
+            if (ch >= '1' && ch <= '9') {
+                accumulator = ch - '0';
+                numberOfDigits = 1;
+                state = StateDigit;
+                break;
+            }
+
+            HandleCharAndBreak('0', StateZero);
+            return nan();
+
+        case StateStart:
+            if (ch >= '1' && ch <= '9') {
+                accumulator = ch - '0';
+                numberOfDigits = 1;
+                state = StateDigit;
+                break;
+            }
+
+            if (ch == '-') {
+                sign = Negative;
+                state = StateSign;
+                break;
+            }
+
+            if (ch == '+') {
+                sign = Positive;
+                state = StateSign;
+                break;
+            }
+
+            HandleCharAndBreak('0', StateZero);
+            HandleCharAndBreak('.', StateDot);
+            return nan();
+
+        case StateZero:
+            if (ch == '0')
+                break;
+
+            if (ch >= '1' && ch <= '9') {
+                accumulator = ch - '0';
+                numberOfDigits = 1;
+                state = StateDigit;
+                break;
+            }
+
+            HandleCharAndBreak('.', StateDot);
+            HandleTwoCharsAndBreak('E', 'e', StateE);
+            return nan();
+
+        default:
+            ASSERT_NOT_REACHED();
+            return nan();
+        }
+    }
+
+    if (state == StateZero)
+        return zero(sign);
+
+    if (state == StateDigit || state == StateEDigit || state == StateDotDigit) {
+        int resultExponent = exponent * (exponentSign == Negative ? -1 : 1) - numberOfDigitsAfterDot + numberOfExtraDigits;
+        if (resultExponent < ExponentMin)
+            return zero(Positive);
+
+        const int overflow = resultExponent - ExponentMax + 1;
+        if (overflow > 0) {
+            if (overflow + numberOfDigits - numberOfDigitsAfterDot > Precision)
+                return infinity(sign);
+            accumulator = scaleUp(accumulator, overflow);
+            resultExponent -= overflow;
+        }
+
+        return Decimal(sign, resultExponent, accumulator);
+    }
+
+    return nan();
+}
+
+Decimal Decimal::infinity(const Sign sign)
+{
+    return Decimal(EncodedData(sign, EncodedData::ClassInfinity));
+}
+
+Decimal Decimal::nan()
+{
+    return Decimal(EncodedData(Positive, EncodedData::ClassNaN));
+}
+
+Decimal Decimal::remainder(const Decimal& rhs) const
+{
+    const Decimal quotient = *this / rhs;
+    return quotient.isSpecial() ? quotient : *this - (quotient.isNegative() ? quotient.ceil() : quotient.floor()) * rhs;
+}
+
+Decimal Decimal::round() const
+{
+    if (isSpecial())
+        return *this;
+
+    if (exponent() >= 0)
+        return *this;
+
+    uint64_t result = m_data.coefficient();
+    const int numberOfDigits = countDigits(result);
+    const int numberOfDropDigits = -exponent();
+    if (numberOfDigits < numberOfDropDigits)
+        return zero(Positive);
+
+    result = scaleDown(result, numberOfDropDigits - 1);
+    if (result % 10 >= 5)
+        result += 10;
+    result /= 10;
+    return Decimal(sign(), 0, result);
+}
+
+double Decimal::toDouble() const
+{
+    if (isFinite()) {
+        bool valid;
+        const double doubleValue = mozToDouble(toString(), &valid);
+        return valid ? doubleValue : std::numeric_limits<double>::quiet_NaN();
+    }
+
+    if (isInfinity())
+        return isNegative() ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity();
+
+    return std::numeric_limits<double>::quiet_NaN();
+}
+
+String Decimal::toString() const
+{
+    switch (m_data.formatClass()) {
+    case EncodedData::ClassInfinity:
+        return sign() ? "-Infinity" : "Infinity";
+
+    case EncodedData::ClassNaN:
+        return "NaN";
+
+    case EncodedData::ClassNormal:
+    case EncodedData::ClassZero:
+        break;
+
+    default:
+        ASSERT_NOT_REACHED();
+        return "";
+    }
+
+    StringBuilder builder;
+    if (sign())
+        builder.append('-');
+
+    int originalExponent = exponent();
+    uint64_t coefficient = m_data.coefficient();
+
+    if (originalExponent < 0) {
+        const int maxDigits = DBL_DIG;
+        uint64_t lastDigit = 0;
+        while (countDigits(coefficient) > maxDigits) {
+            lastDigit = coefficient % 10;
+            coefficient /= 10;
+            ++originalExponent;
+        }
+
+        if (lastDigit >= 5)
+            ++coefficient;
+
+        while (originalExponent < 0 && coefficient && !(coefficient % 10)) {
+            coefficient /= 10;
+            ++originalExponent;
+        }
+    }
+
+    const String digits = mozToString(coefficient);
+    int coefficientLength = static_cast<int>(digits.length());
+    const int adjustedExponent = originalExponent + coefficientLength - 1;
+    if (originalExponent <= 0 && adjustedExponent >= -6) {
+        if (!originalExponent) {
+            builder.append(digits);
+            return builder.toString();
+        }
+
+        if (adjustedExponent >= 0) {
+            for (int i = 0; i < coefficientLength; ++i) {
+                builder.append(digits[i]);
+                if (i == adjustedExponent)
+                    builder.append('.');
+            }
+            return builder.toString();
+        }
+
+        builder.appendLiteral("0.");
+        for (int i = adjustedExponent + 1; i < 0; ++i)
+            builder.append('0');
+
+        builder.append(digits);
+
+    } else {
+        builder.append(digits[0]);
+        while (coefficientLength >= 2 && digits[coefficientLength - 1] == '0')
+            --coefficientLength;
+        if (coefficientLength >= 2) {
+            builder.append('.');
+            for (int i = 1; i < coefficientLength; ++i)
+                builder.append(digits[i]);
+        }
+
+        if (adjustedExponent) {
+            builder.append(adjustedExponent < 0 ? "e" : "e+");
+            builder.appendNumber(adjustedExponent);
+        }
+    }
+    return builder.toString();
+}
+
+bool Decimal::toString(char* strBuf, size_t bufLength) const
+{
+  ASSERT(bufLength > 0);
+  String str = toString();
+  size_t length = str.copy(strBuf, bufLength);
+  if (length < bufLength) {
+    strBuf[length] = '\0';
+    return true;
+  }
+  strBuf[bufLength - 1] = '\0';
+  return false;
+}
+
+Decimal Decimal::zero(Sign sign)
+{
+    return Decimal(EncodedData(sign, EncodedData::ClassZero));
+}
+
+} // namespace blink
+
+// Implementation of DoubleConversion.h:
+
+namespace mozilla {
+
+Maybe<double> StringToDouble(Span<const char> aStringSpan) {
+    bool valid = false;
+    double result = mozToDouble(aStringSpan, &valid);
+    return valid ? Some(result) : Nothing();
+}
+
+}
diff --git a/mozglue/misc/decimal/Decimal.h b/mozglue/misc/decimal/Decimal.h
new file mode 100644
index 0000000000..10d0e2c7ce
--- /dev/null
+++ b/mozglue/misc/decimal/Decimal.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2012 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Imported from:
+ * https://chromium.googlesource.com/chromium/src.git/+/master/third_party/WebKit/Source/platform/Decimal.h
+ * Check UPSTREAM-GIT-SHA for the commit ID of the last update from Blink core.
+ */
+
+#ifndef Decimal_h
+#define Decimal_h
+
+#include "mozilla/Assertions.h"
+#include <stdint.h>
+#include "mozilla/Types.h"
+
+#include <string>
+
+#ifndef ASSERT
+#define DEFINED_ASSERT_FOR_DECIMAL_H 1
+#define ASSERT MOZ_ASSERT
+#endif
+
+#define PLATFORM_EXPORT
+
+// To use USING_FAST_MALLOC we'd need:
+// https://chromium.googlesource.com/chromium/src.git/+/master/third_party/WebKit/Source/wtf/Allocator.h
+// Since we don't allocate Decimal objects, no need.
+#define USING_FAST_MALLOC(type) \
+  void ignore_this_dummy_method() = delete
+
+#define DISALLOW_NEW()                                          \
+    private:                                                    \
+        void* operator new(size_t) = delete;                    \
+        void* operator new(size_t, void*) = delete;             \
+    public:
+
+namespace blink {
+
+namespace DecimalPrivate {
+class SpecialValueHandler;
+}
+
+// This class represents decimal base floating point number.
+//
+// FIXME: Once all C++ compiler support decimal type, we should replace this
+// class to compiler supported one. See below URI for current status of decimal
+// type for C++: // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n1977.html
+class PLATFORM_EXPORT Decimal {
+    USING_FAST_MALLOC(Decimal);
+public:
+    enum Sign {
+        Positive,
+        Negative,
+    };
+
+    // You should not use EncodedData other than unit testing.
+    class EncodedData {
+        DISALLOW_NEW();
+        // For accessing FormatClass.
+        friend class Decimal;
+        friend class DecimalPrivate::SpecialValueHandler;
+    public:
+        EncodedData(Sign, int exponent, uint64_t coefficient);
+
+        bool operator==(const EncodedData&) const;
+        bool operator!=(const EncodedData& another) const { return !operator==(another); }
+
+        uint64_t coefficient() const { return m_coefficient; }
+        int countDigits() const;
+        int exponent() const { return m_exponent; }
+        bool isFinite() const { return !isSpecial(); }
+        bool isInfinity() const { return m_formatClass == ClassInfinity; }
+        bool isNaN() const { return m_formatClass == ClassNaN; }
+        bool isSpecial() const { return m_formatClass == ClassInfinity || m_formatClass == ClassNaN; }
+        bool isZero() const { return m_formatClass == ClassZero; }
+        Sign sign() const { return m_sign; }
+        void setSign(Sign sign) { m_sign = sign; }
+
+    private:
+        enum FormatClass {
+            ClassInfinity,
+            ClassNormal,
+            ClassNaN,
+            ClassZero,
+        };
+
+        EncodedData(Sign, FormatClass);
+        FormatClass formatClass() const { return m_formatClass; }
+
+        uint64_t m_coefficient;
+        int16_t m_exponent;
+        FormatClass m_formatClass;
+        Sign m_sign;
+    };
+
+    MFBT_API explicit Decimal(int32_t = 0);
+    MFBT_API Decimal(Sign, int exponent, uint64_t coefficient);
+    MFBT_API Decimal(const Decimal&);
+
+    MFBT_API Decimal& operator=(const Decimal&);
+    MFBT_API Decimal& operator+=(const Decimal&);
+    MFBT_API Decimal& operator-=(const Decimal&);
+    MFBT_API Decimal& operator*=(const Decimal&);
+    MFBT_API Decimal& operator/=(const Decimal&);
+
+    MFBT_API Decimal operator-() const;
+
+    MFBT_API bool operator==(const Decimal&) const;
+    MFBT_API bool operator!=(const Decimal&) const;
+    MFBT_API bool operator<(const Decimal&) const;
+    MFBT_API bool operator<=(const Decimal&) const;
+    MFBT_API bool operator>(const Decimal&) const;
+    MFBT_API bool operator>=(const Decimal&) const;
+
+    MFBT_API Decimal operator+(const Decimal&) const;
+    MFBT_API Decimal operator-(const Decimal&) const;
+    MFBT_API Decimal operator*(const Decimal&) const;
+    MFBT_API Decimal operator/(const Decimal&) const;
+
+    int exponent() const
+    {
+        ASSERT(isFinite());
+        return m_data.exponent();
+    }
+
+    bool isFinite() const { return m_data.isFinite(); }
+    bool isInfinity() const { return m_data.isInfinity(); }
+    bool isNaN() const { return m_data.isNaN(); }
+    bool isNegative() const { return sign() == Negative; }
+    bool isPositive() const { return sign() == Positive; }
+    bool isSpecial() const { return m_data.isSpecial(); }
+    bool isZero() const { return m_data.isZero(); }
+
+    MFBT_API Decimal abs() const;
+    MFBT_API Decimal ceil() const;
+    MFBT_API Decimal floor() const;
+    MFBT_API Decimal remainder(const Decimal&) const;
+    MFBT_API Decimal round() const;
+
+    MFBT_API double toDouble() const;
+    // Note: toString method supports infinity and nan but fromString not.
+    MFBT_API std::string toString() const;
+    MFBT_API bool toString(char* strBuf, size_t bufLength) const;
+
+    static MFBT_API Decimal fromDouble(double);
+    // fromString supports following syntax EBNF:
+    //  number ::= sign? digit+ ('.' digit*) (exponent-marker sign? digit+)?
+    //          | sign? '.' digit+ (exponent-marker sign? digit+)?
+    //  sign ::= '+' | '-'
+    //  exponent-marker ::= 'e' | 'E'
+    //  digit ::= '0' | '1' | ... | '9'
+    // Note: fromString doesn't support "infinity" and "nan".
+    static MFBT_API Decimal fromString(const std::string& aValue);
+    static MFBT_API Decimal infinity(Sign);
+    static MFBT_API Decimal nan();
+    static MFBT_API Decimal zero(Sign);
+
+    // You should not use below methods. We expose them for unit testing.
+    MFBT_API explicit Decimal(const EncodedData&);
+    const EncodedData& value() const { return m_data; }
+
+private:
+    struct AlignedOperands {
+        uint64_t lhsCoefficient;
+        uint64_t rhsCoefficient;
+        int exponent;
+    };
+
+    MFBT_API explicit Decimal(double);
+    MFBT_API Decimal compareTo(const Decimal&) const;
+
+    static MFBT_API AlignedOperands alignOperands(const Decimal& lhs, const Decimal& rhs);
+    static inline Sign invertSign(Sign sign) { return sign == Negative ? Positive : Negative; }
+
+    Sign sign() const { return m_data.sign(); }
+
+    EncodedData m_data;
+};
+
+} // namespace blink
+
+namespace mozilla {
+typedef blink::Decimal Decimal;
+} // namespace mozilla
+
+#undef USING_FAST_MALLOC
+
+#ifdef DEFINED_ASSERT_FOR_DECIMAL_H
+#undef DEFINED_ASSERT_FOR_DECIMAL_H
+#undef ASSERT
+#endif
+
+#endif // Decimal_h
diff --git a/mozglue/misc/decimal/DoubleConversion.h b/mozglue/misc/decimal/DoubleConversion.h
new file mode 100644
index 0000000000..14c19e2540
--- /dev/null
+++ b/mozglue/misc/decimal/DoubleConversion.h
@@ -0,0 +1,27 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* A utility function that converts a string to a double independent of OS locale. */
+
+#ifndef MOZILLA_DOUBLECONVERSION_H
+#define MOZILLA_DOUBLECONVERSION_H
+
+#include "mozilla/Maybe.h"
+#include "mozilla/Span.h"
+
+#include <string>
+
+namespace mozilla {
+
+// Parses aStringSpan into a double floating point value. Always treats . as the
+// decimal separator, regardless of OS locale. Consumes the entire string;
+// trailing garbage is invalid. Returns Nothing() for invalid input.
+// The implementation uses double_conversion::StringToDoubleConverter with
+// NO_FLAGS, see double-conversion/string-to-double.h for more documentation.
+Maybe<double> StringToDouble(Span<const char> aStringSpan);
+
+}
+
+#endif // MOZILLA_DOUBLECONVERSION_H
diff --git a/mozglue/misc/decimal/UPSTREAM-GIT-SHA b/mozglue/misc/decimal/UPSTREAM-GIT-SHA
new file mode 100644
index 0000000000..ed86150b28
--- /dev/null
+++ b/mozglue/misc/decimal/UPSTREAM-GIT-SHA
@@ -0,0 +1 @@
+cad4c9e3b3c9e80bb189059373db528272bca96f
diff --git a/mozglue/misc/decimal/add-doubleconversion-impl.patch b/mozglue/misc/decimal/add-doubleconversion-impl.patch
new file mode 100644
index 0000000000..1cf0fb6ff1
--- /dev/null
+++ b/mozglue/misc/decimal/add-doubleconversion-impl.patch
@@ -0,0 +1,42 @@
+diff --git a/mozglue/misc/decimal/Decimal.cpp b/mozglue/misc/decimal/Decimal.cpp
+--- a/mozglue/misc/decimal/Decimal.cpp
++++ b/mozglue/misc/decimal/Decimal.cpp
+@@ -25,16 +25,17 @@
+  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
+ #include "Decimal.h"
+ #include "moz-decimal-utils.h"
++#include "DoubleConversion.h"
+ 
+ using namespace moz_decimal_utils;
+ 
+ #include <algorithm>
+ #include <float.h>
+ 
+ namespace blink {
+ 
+@@ -1043,8 +1044,20 @@ bool Decimal::toString(char* strBuf, siz
+ }
+ 
+ Decimal Decimal::zero(Sign sign)
+ {
+     return Decimal(EncodedData(sign, EncodedData::ClassZero));
+ }
+ 
+ } // namespace blink
++
++// Implementation of DoubleConversion.h:
++
++namespace mozilla {
++
++Maybe<double> StringToDouble(Span<const char> aStringSpan) {
++    bool valid = false;
++    double result = mozToDouble(aStringSpan, &valid);
++    return valid ? Some(result) : Nothing();
++}
++
++}
diff --git a/mozglue/misc/decimal/comparison-with-nan.patch b/mozglue/misc/decimal/comparison-with-nan.patch
new file mode 100644
index 0000000000..0e274ce033
--- /dev/null
+++ b/mozglue/misc/decimal/comparison-with-nan.patch
@@ -0,0 +1,67 @@
+diff --git a/mozglue/misc/decimal/Decimal.cpp b/mozglue/misc/decimal/Decimal.cpp
+--- a/mozglue/misc/decimal/Decimal.cpp
++++ b/mozglue/misc/decimal/Decimal.cpp
+@@ -509,21 +509,25 @@ Decimal Decimal::operator/(const Decimal
+     if (remainder > divisor / 2)
+         ++result;
+ 
+     return Decimal(resultSign, resultExponent, result);
+ }
+ 
+ bool Decimal::operator==(const Decimal& rhs) const
+ {
++    if (isNaN() || rhs.isNaN())
++        return false;
+     return m_data == rhs.m_data || compareTo(rhs).isZero();
+ }
+ 
+ bool Decimal::operator!=(const Decimal& rhs) const
+ {
++    if (isNaN() || rhs.isNaN())
++        return true;
+     if (m_data == rhs.m_data)
+         return false;
+     const Decimal result = compareTo(rhs);
+     if (result.isNaN())
+         return false;
+     return !result.isZero();
+ }
+ 
+@@ -532,16 +536,18 @@ bool Decimal::operator<(const Decimal& r
+     const Decimal result = compareTo(rhs);
+     if (result.isNaN())
+         return false;
+     return !result.isZero() && result.isNegative();
+ }
+ 
+ bool Decimal::operator<=(const Decimal& rhs) const
+ {
++    if (isNaN() || rhs.isNaN())
++        return false;
+     if (m_data == rhs.m_data)
+         return true;
+     const Decimal result = compareTo(rhs);
+     if (result.isNaN())
+         return false;
+     return result.isZero() || result.isNegative();
+ }
+ 
+@@ -550,16 +556,18 @@ bool Decimal::operator>(const Decimal& r
+     const Decimal result = compareTo(rhs);
+     if (result.isNaN())
+         return false;
+     return !result.isZero() && result.isPositive();
+ }
+ 
+ bool Decimal::operator>=(const Decimal& rhs) const
+ {
++    if (isNaN() || rhs.isNaN())
++        return false;
+     if (m_data == rhs.m_data)
+         return true;
+     const Decimal result = compareTo(rhs);
+     if (result.isNaN())
+         return false;
+     return result.isZero() || !result.isNegative();
+ }
+ 
diff --git a/mozglue/misc/decimal/fix-wshadow-warnings.patch b/mozglue/misc/decimal/fix-wshadow-warnings.patch
new file mode 100644
index 0000000000..465c61a223
--- /dev/null
+++ b/mozglue/misc/decimal/fix-wshadow-warnings.patch
@@ -0,0 +1,171 @@
+diff --git a/mozglue/misc/decimal/Decimal.cpp b/mozglue/misc/decimal/Decimal.cpp
+--- a/mozglue/misc/decimal/Decimal.cpp
++++ b/mozglue/misc/decimal/Decimal.cpp
+@@ -118,18 +118,18 @@ Decimal SpecialValueHandler::value() con
+         ASSERT_NOT_REACHED();
+         return m_lhs;
+     }
+ }
+ 
+ // This class is used for 128 bit unsigned integer arithmetic.
+ class UInt128 {
+ public:
+-    UInt128(uint64_t low, uint64_t high)
+-        : m_high(high), m_low(low)
++    UInt128(uint64_t aLow, uint64_t aHigh)
++        : m_high(aHigh), m_low(aLow)
+     {
+     }
+ 
+     UInt128& operator/=(uint32_t);
+ 
+     uint64_t high() const { return m_high; }
+     uint64_t low() const { return m_low; }
+ 
+@@ -224,68 +224,68 @@ static uint64_t scaleUp(uint64_t x, int 
+         z = z * z;
+     }
+ }
+ 
+ } // namespace DecimalPrivate
+ 
+ using namespace DecimalPrivate;
+ 
+-Decimal::EncodedData::EncodedData(Sign sign, FormatClass formatClass)
++Decimal::EncodedData::EncodedData(Sign aSign, FormatClass aFormatClass)
+     : m_coefficient(0)
+     , m_exponent(0)
+-    , m_formatClass(formatClass)
+-    , m_sign(sign)
++    , m_formatClass(aFormatClass)
++    , m_sign(aSign)
+ {
+ }
+ 
+-Decimal::EncodedData::EncodedData(Sign sign, int exponent, uint64_t coefficient)
+-    : m_formatClass(coefficient ? ClassNormal : ClassZero)
+-    , m_sign(sign)
++Decimal::EncodedData::EncodedData(Sign aSign, int aExponent, uint64_t aCoefficient)
++    : m_formatClass(aCoefficient ? ClassNormal : ClassZero)
++    , m_sign(aSign)
+ {
+-    if (exponent >= ExponentMin && exponent <= ExponentMax) {
+-        while (coefficient > MaxCoefficient) {
+-            coefficient /= 10;
+-            ++exponent;
++    if (aExponent >= ExponentMin && aExponent <= ExponentMax) {
++        while (aCoefficient > MaxCoefficient) {
++            aCoefficient /= 10;
++            ++aExponent;
+         }
+     }
+ 
+-    if (exponent > ExponentMax) {
++    if (aExponent > ExponentMax) {
+         m_coefficient = 0;
+         m_exponent = 0;
+         m_formatClass = ClassInfinity;
+         return;
+     }
+ 
+-    if (exponent < ExponentMin) {
++    if (aExponent < ExponentMin) {
+         m_coefficient = 0;
+         m_exponent = 0;
+         m_formatClass = ClassZero;
+         return;
+     }
+ 
+-    m_coefficient = coefficient;
+-    m_exponent = static_cast<int16_t>(exponent);
++    m_coefficient = aCoefficient;
++    m_exponent = static_cast<int16_t>(aExponent);
+ }
+ 
+ bool Decimal::EncodedData::operator==(const EncodedData& another) const
+ {
+     return m_sign == another.m_sign
+         && m_formatClass == another.m_formatClass
+         && m_exponent == another.m_exponent
+         && m_coefficient == another.m_coefficient;
+ }
+ 
+ Decimal::Decimal(int32_t i32)
+     : m_data(i32 < 0 ? Negative : Positive, 0, i32 < 0 ? static_cast<uint64_t>(-static_cast<int64_t>(i32)) : static_cast<uint64_t>(i32))
+ {
+ }
+ 
+-Decimal::Decimal(Sign sign, int exponent, uint64_t coefficient)
+-    : m_data(sign, coefficient ? exponent : 0, coefficient)
++Decimal::Decimal(Sign aSign, int aExponent, uint64_t aCoefficient)
++    : m_data(aSign, aCoefficient ? aExponent : 0, aCoefficient)
+ {
+ }
+ 
+ Decimal::Decimal(const EncodedData& data)
+     : m_data(data)
+ {
+ }
+ 
+@@ -479,32 +479,32 @@ Decimal Decimal::operator/(const Decimal
+     if (rhs.isZero())
+         return lhs.isZero() ? nan() : infinity(resultSign);
+ 
+     int resultExponent = lhs.exponent() - rhs.exponent();
+ 
+     if (lhs.isZero())
+         return Decimal(resultSign, resultExponent, 0);
+ 
+-    uint64_t remainder = lhs.m_data.coefficient();
++    uint64_t lhsRemainder = lhs.m_data.coefficient();
+     const uint64_t divisor = rhs.m_data.coefficient();
+     uint64_t result = 0;
+     while (result < MaxCoefficient / 100) {
+-        while (remainder < divisor) {
+-            remainder *= 10;
++        while (lhsRemainder < divisor) {
++            lhsRemainder *= 10;
+             result *= 10;
+             --resultExponent;
+         }
+-        result += remainder / divisor;
+-        remainder %= divisor;
+-        if (!remainder)
++        result += lhsRemainder / divisor;
++        lhsRemainder %= divisor;
++        if (!lhsRemainder)
+             break;
+     }
+ 
+-    if (remainder > divisor / 2)
++    if (lhsRemainder > divisor / 2)
+         ++result;
+ 
+     return Decimal(resultSign, resultExponent, result);
+ }
+ 
+ bool Decimal::operator==(const Decimal& rhs) const
+ {
+     if (isNaN() || rhs.isNaN())
+diff --git a/mozglue/misc/decimal/Decimal.h b/mozglue/misc/decimal/Decimal.h
+--- a/mozglue/misc/decimal/Decimal.h
++++ b/mozglue/misc/decimal/Decimal.h
+@@ -88,17 +88,17 @@ public:
+         int countDigits() const;
+         int exponent() const { return m_exponent; }
+         bool isFinite() const { return !isSpecial(); }
+         bool isInfinity() const { return m_formatClass == ClassInfinity; }
+         bool isNaN() const { return m_formatClass == ClassNaN; }
+         bool isSpecial() const { return m_formatClass == ClassInfinity || m_formatClass == ClassNaN; }
+         bool isZero() const { return m_formatClass == ClassZero; }
+         Sign sign() const { return m_sign; }
+-        void setSign(Sign sign) { m_sign = sign; }
++        void setSign(Sign aSign) { m_sign = aSign; }
+ 
+     private:
+         enum FormatClass {
+             ClassInfinity,
+             ClassNormal,
+             ClassNaN,
+             ClassZero,
+         };
diff --git a/mozglue/misc/decimal/mfbt-abi-markers.patch b/mozglue/misc/decimal/mfbt-abi-markers.patch
new file mode 100644
index 0000000000..1d50d3d643
--- /dev/null
+++ b/mozglue/misc/decimal/mfbt-abi-markers.patch
@@ -0,0 +1,150 @@
+diff --git a/mozglue/misc/decimal/Decimal.h b/mozglue/misc/decimal/Decimal.h
+--- a/mozglue/misc/decimal/Decimal.h
++++ b/mozglue/misc/decimal/Decimal.h
+@@ -26,16 +26,18 @@
+  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
+ #ifndef Decimal_h
+ #define Decimal_h
+ 
++#include "mozilla/Types.h"
++
+ #include "platform/PlatformExport.h"
+ #include "wtf/Allocator.h"
+ #include "wtf/Assertions.h"
+ #include "wtf/text/WTFString.h"
+ #include <stdint.h>
+ 
+ namespace blink {
+ 
+@@ -91,92 +93,92 @@ public:
+         FormatClass formatClass() const { return m_formatClass; }
+ 
+         uint64_t m_coefficient;
+         int16_t m_exponent;
+         FormatClass m_formatClass;
+         Sign m_sign;
+     };
+ 
+-    Decimal(int32_t = 0);
+-    Decimal(Sign, int exponent, uint64_t coefficient);
+-    Decimal(const Decimal&);
++    MFBT_API explicit Decimal(int32_t = 0);
++    MFBT_API Decimal(Sign, int exponent, uint64_t coefficient);
++    MFBT_API Decimal(const Decimal&);
+ 
+-    Decimal& operator=(const Decimal&);
+-    Decimal& operator+=(const Decimal&);
+-    Decimal& operator-=(const Decimal&);
+-    Decimal& operator*=(const Decimal&);
+-    Decimal& operator/=(const Decimal&);
++    MFBT_API Decimal& operator=(const Decimal&);
++    MFBT_API Decimal& operator+=(const Decimal&);
++    MFBT_API Decimal& operator-=(const Decimal&);
++    MFBT_API Decimal& operator*=(const Decimal&);
++    MFBT_API Decimal& operator/=(const Decimal&);
+ 
+-    Decimal operator-() const;
++    MFBT_API Decimal operator-() const;
+ 
+-    bool operator==(const Decimal&) const;
+-    bool operator!=(const Decimal&) const;
+-    bool operator<(const Decimal&) const;
+-    bool operator<=(const Decimal&) const;
+-    bool operator>(const Decimal&) const;
+-    bool operator>=(const Decimal&) const;
++    MFBT_API bool operator==(const Decimal&) const;
++    MFBT_API bool operator!=(const Decimal&) const;
++    MFBT_API bool operator<(const Decimal&) const;
++    MFBT_API bool operator<=(const Decimal&) const;
++    MFBT_API bool operator>(const Decimal&) const;
++    MFBT_API bool operator>=(const Decimal&) const;
+ 
+-    Decimal operator+(const Decimal&) const;
+-    Decimal operator-(const Decimal&) const;
+-    Decimal operator*(const Decimal&) const;
+-    Decimal operator/(const Decimal&) const;
++    MFBT_API Decimal operator+(const Decimal&) const;
++    MFBT_API Decimal operator-(const Decimal&) const;
++    MFBT_API Decimal operator*(const Decimal&) const;
++    MFBT_API Decimal operator/(const Decimal&) const;
+ 
+     int exponent() const
+     {
+         ASSERT(isFinite());
+         return m_data.exponent();
+     }
+ 
+     bool isFinite() const { return m_data.isFinite(); }
+     bool isInfinity() const { return m_data.isInfinity(); }
+     bool isNaN() const { return m_data.isNaN(); }
+     bool isNegative() const { return sign() == Negative; }
+     bool isPositive() const { return sign() == Positive; }
+     bool isSpecial() const { return m_data.isSpecial(); }
+     bool isZero() const { return m_data.isZero(); }
+ 
+-    Decimal abs() const;
+-    Decimal ceil() const;
+-    Decimal floor() const;
+-    Decimal remainder(const Decimal&) const;
+-    Decimal round() const;
++    MFBT_API Decimal abs() const;
++    MFBT_API Decimal ceil() const;
++    MFBT_API Decimal floor() const;
++    MFBT_API Decimal remainder(const Decimal&) const;
++    MFBT_API Decimal round() const;
+ 
+-    double toDouble() const;
++    MFBT_API double toDouble() const;
+     // Note: toString method supports infinity and nan but fromString not.
+-    String toString() const;
++    MFBT_API String toString() const;
+ 
+-    static Decimal fromDouble(double);
++    static MFBT_API Decimal fromDouble(double);
+     // fromString supports following syntax EBNF:
+     //  number ::= sign? digit+ ('.' digit*) (exponent-marker sign? digit+)?
+     //          | sign? '.' digit+ (exponent-marker sign? digit+)?
+     //  sign ::= '+' | '-'
+     //  exponent-marker ::= 'e' | 'E'
+     //  digit ::= '0' | '1' | ... | '9'
+     // Note: fromString doesn't support "infinity" and "nan".
+-    static Decimal fromString(const String&);
+-    static Decimal infinity(Sign);
+-    static Decimal nan();
+-    static Decimal zero(Sign);
++    static MFBT_API Decimal fromString(const String&);
++    static MFBT_API Decimal infinity(Sign);
++    static MFBT_API Decimal nan();
++    static MFBT_API Decimal zero(Sign);
+ 
+     // You should not use below methods. We expose them for unit testing.
+-    explicit Decimal(const EncodedData&);
++    MFBT_API explicit Decimal(const EncodedData&);
+     const EncodedData& value() const { return m_data; }
+ 
+ private:
+     struct AlignedOperands {
+         uint64_t lhsCoefficient;
+         uint64_t rhsCoefficient;
+         int exponent;
+     };
+ 
+-    Decimal(double);
+-    Decimal compareTo(const Decimal&) const;
++    MFBT_API explicit Decimal(double);
++    MFBT_API Decimal compareTo(const Decimal&) const;
+ 
+-    static AlignedOperands alignOperands(const Decimal& lhs, const Decimal& rhs);
++    static MFBT_API AlignedOperands alignOperands(const Decimal& lhs, const Decimal& rhs);
+     static inline Sign invertSign(Sign sign) { return sign == Negative ? Positive : Negative; }
+ 
+     Sign sign() const { return m_data.sign(); }
+ 
+     EncodedData m_data;
+ };
+ 
+ } // namespace blink
diff --git a/mozglue/misc/decimal/moz-decimal-utils.h b/mozglue/misc/decimal/moz-decimal-utils.h
new file mode 100644
index 0000000000..390bdaf02d
--- /dev/null
+++ b/mozglue/misc/decimal/moz-decimal-utils.h
@@ -0,0 +1,111 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_DECIMAL_UTILS_H
+#define MOZ_DECIMAL_UTILS_H
+
+// This file contains extra includes, defines and typedefs to allow compilation
+// of Decimal.cpp under the Mozilla source without blink core dependencies. Do
+// not include it into any file other than Decimal.cpp.
+
+#include "double-conversion/double-conversion.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Casting.h"
+#include "mozilla/FloatingPoint.h"
+#include "mozilla/Span.h"
+
+#include <cmath>
+#include <cstring>
+#include <iomanip>
+#include <limits>
+#include <sstream>
+
+#ifndef UINT64_C
+// For Android toolchain
+#define UINT64_C(c) (c ## ULL)
+#endif
+
+#ifdef ASSERT
+#undef ASSERT
+#endif
+#define ASSERT MOZ_ASSERT
+
+#define ASSERT_NOT_REACHED() MOZ_ASSERT_UNREACHABLE("moz-decimal-utils.h")
+
+#define STACK_ALLOCATED() DISALLOW_NEW()
+
+#define WTF_MAKE_NONCOPYABLE(ClassName) \
+  private: \
+    ClassName(const ClassName&) = delete; \
+    void operator=(const ClassName&) = delete;
+
+typedef std::string String;
+
+double mozToDouble(mozilla::Span<const char> aStr, bool *valid) {
+  double_conversion::StringToDoubleConverter converter(
+    double_conversion::StringToDoubleConverter::NO_FLAGS,
+    mozilla::UnspecifiedNaN<double>(), mozilla::UnspecifiedNaN<double>(), nullptr, nullptr);
+  const char* str = aStr.Elements();
+  int length = mozilla::AssertedCast<int>(aStr.Length());
+  int processed_char_count; // unused - NO_FLAGS requires the whole string to parse
+  double result = converter.StringToDouble(str, length, &processed_char_count);
+  *valid = mozilla::IsFinite(result);
+  return result;
+}
+
+double mozToDouble(const String &aStr, bool *valid) {
+  return mozToDouble(mozilla::MakeStringSpan(aStr.c_str()), valid);
+}
+
+String mozToString(double aNum) {
+  char buffer[64];
+  int buffer_length = mozilla::ArrayLength(buffer);
+  const double_conversion::DoubleToStringConverter& converter =
+    double_conversion::DoubleToStringConverter::EcmaScriptConverter();
+  double_conversion::StringBuilder builder(buffer, buffer_length);
+  converter.ToShortest(aNum, &builder);
+  return String(builder.Finalize());
+}
+
+String mozToString(int64_t aNum) {
+  std::ostringstream o;
+  o << std::setprecision(std::numeric_limits<int64_t>::digits10) << aNum;
+  return o.str();
+}
+
+String mozToString(uint64_t aNum) {
+  std::ostringstream o;
+  o << std::setprecision(std::numeric_limits<uint64_t>::digits10) << aNum;
+  return o.str();
+}
+
+namespace moz_decimal_utils {
+
+class StringBuilder
+{
+public:
+  void append(char c) {
+    mStr += c;
+  }
+  void appendLiteral(const char *aStr) {
+    mStr += aStr;
+  }
+  void appendNumber(int aNum) {
+    mStr += mozToString(int64_t(aNum));
+  }
+  void append(const String& aStr) {
+    mStr += aStr;
+  }
+  std::string toString() const {
+    return mStr;
+  }
+private:
+  std::string mStr;
+};
+
+} // namespace moz_decimal_utils
+
+#endif
+
diff --git a/mozglue/misc/decimal/to-moz-dependencies.patch b/mozglue/misc/decimal/to-moz-dependencies.patch
new file mode 100644
index 0000000000..bf19a6da96
--- /dev/null
+++ b/mozglue/misc/decimal/to-moz-dependencies.patch
@@ -0,0 +1,224 @@
+diff --git a/mozglue/misc/decimal/Decimal.cpp b/mozglue/misc/decimal/Decimal.cpp
+--- a/mozglue/misc/decimal/Decimal.cpp
++++ b/mozglue/misc/decimal/Decimal.cpp
+@@ -23,22 +23,20 @@
+  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
+-#include "platform/Decimal.h"
++#include "Decimal.h"
++#include "moz-decimal-utils.h"
+ 
+-#include "wtf/Allocator.h"
+-#include "wtf/MathExtras.h"
+-#include "wtf/Noncopyable.h"
+-#include "wtf/text/StringBuilder.h"
++using namespace moz_decimal_utils;
+ 
+ #include <algorithm>
+ #include <float.h>
+ 
+ namespace blink {
+ 
+ namespace DecimalPrivate {
+ 
+@@ -690,17 +688,17 @@ Decimal Decimal::floor() const
+     if (isNegative() && !isMultiplePowersOfTen(m_data.coefficient(), numberOfDropDigits))
+         ++result;
+     return Decimal(sign(), 0, result);
+ }
+ 
+ Decimal Decimal::fromDouble(double doubleValue)
+ {
+     if (std::isfinite(doubleValue))
+-        return fromString(String::numberToStringECMAScript(doubleValue));
++        return fromString(mozToString(doubleValue));
+ 
+     if (std::isinf(doubleValue))
+         return infinity(doubleValue < 0 ? Negative : Positive);
+ 
+     return nan();
+ }
+ 
+ Decimal Decimal::fromString(const String& str)
+@@ -931,17 +929,17 @@ Decimal Decimal::round() const
+     result /= 10;
+     return Decimal(sign(), 0, result);
+ }
+ 
+ double Decimal::toDouble() const
+ {
+     if (isFinite()) {
+         bool valid;
+-        const double doubleValue = toString().toDouble(&valid);
++        const double doubleValue = mozToDouble(toString(), &valid);
+         return valid ? doubleValue : std::numeric_limits<double>::quiet_NaN();
+     }
+ 
+     if (isInfinity())
+         return isNegative() ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity();
+ 
+     return std::numeric_limits<double>::quiet_NaN();
+ }
+@@ -984,17 +982,17 @@ String Decimal::toString() const
+             ++coefficient;
+ 
+         while (originalExponent < 0 && coefficient && !(coefficient % 10)) {
+             coefficient /= 10;
+             ++originalExponent;
+         }
+     }
+ 
+-    const String digits = String::number(coefficient);
++    const String digits = mozToString(coefficient);
+     int coefficientLength = static_cast<int>(digits.length());
+     const int adjustedExponent = originalExponent + coefficientLength - 1;
+     if (originalExponent <= 0 && adjustedExponent >= -6) {
+         if (!originalExponent) {
+             builder.append(digits);
+             return builder.toString();
+         }
+ 
+@@ -1026,14 +1024,27 @@ String Decimal::toString() const
+         if (adjustedExponent) {
+             builder.append(adjustedExponent < 0 ? "e" : "e+");
+             builder.appendNumber(adjustedExponent);
+         }
+     }
+     return builder.toString();
+ }
+ 
++bool Decimal::toString(char* strBuf, size_t bufLength) const
++{
++  ASSERT(bufLength > 0);
++  String str = toString();
++  size_t length = str.copy(strBuf, bufLength);
++  if (length < bufLength) {
++    strBuf[length] = '\0';
++    return true;
++  }
++  strBuf[bufLength - 1] = '\0';
++  return false;
++}
++
+ Decimal Decimal::zero(Sign sign)
+ {
+     return Decimal(EncodedData(sign, EncodedData::ClassZero));
+ }
+ 
+ } // namespace blink
+diff --git a/mozglue/misc/decimal/Decimal.h b/mozglue/misc/decimal/Decimal.h
+--- a/mozglue/misc/decimal/Decimal.h
++++ b/mozglue/misc/decimal/Decimal.h
+@@ -23,26 +23,49 @@
+  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
++/**
++ * Imported from:
++ * https://chromium.googlesource.com/chromium/src.git/+/master/third_party/WebKit/Source/platform/Decimal.h
++ * Check UPSTREAM-GIT-SHA for the commit ID of the last update from Blink core.
++ */
++
+ #ifndef Decimal_h
+ #define Decimal_h
+ 
++#include "mozilla/Assertions.h"
++#include <stdint.h>
+ #include "mozilla/Types.h"
+ 
+-#include "platform/PlatformExport.h"
+-#include "wtf/Allocator.h"
+-#include "wtf/Assertions.h"
+-#include "wtf/text/WTFString.h"
+-#include <stdint.h>
++#include <string>
++
++#ifndef ASSERT
++#define DEFINED_ASSERT_FOR_DECIMAL_H 1
++#define ASSERT MOZ_ASSERT
++#endif
++
++#define PLATFORM_EXPORT
++
++// To use USING_FAST_MALLOC we'd need:
++// https://chromium.googlesource.com/chromium/src.git/+/master/third_party/WebKit/Source/wtf/Allocator.h
++// Since we don't allocate Decimal objects, no need.
++#define USING_FAST_MALLOC(type) \
++  void ignore_this_dummy_method() = delete
++
++#define DISALLOW_NEW()                                          \
++    private:                                                    \
++        void* operator new(size_t) = delete;                    \
++        void* operator new(size_t, void*) = delete;             \
++    public:
+ 
+ namespace blink {
+ 
+ namespace DecimalPrivate {
+ class SpecialValueHandler;
+ }
+ 
+ // This class represents decimal base floating point number.
+@@ -139,27 +162,28 @@ public:
+     MFBT_API Decimal abs() const;
+     MFBT_API Decimal ceil() const;
+     MFBT_API Decimal floor() const;
+     MFBT_API Decimal remainder(const Decimal&) const;
+     MFBT_API Decimal round() const;
+ 
+     MFBT_API double toDouble() const;
+     // Note: toString method supports infinity and nan but fromString not.
+-    MFBT_API String toString() const;
++    MFBT_API std::string toString() const;
++    MFBT_API bool toString(char* strBuf, size_t bufLength) const;
+ 
+     static MFBT_API Decimal fromDouble(double);
+     // fromString supports following syntax EBNF:
+     //  number ::= sign? digit+ ('.' digit*) (exponent-marker sign? digit+)?
+     //          | sign? '.' digit+ (exponent-marker sign? digit+)?
+     //  sign ::= '+' | '-'
+     //  exponent-marker ::= 'e' | 'E'
+     //  digit ::= '0' | '1' | ... | '9'
+     // Note: fromString doesn't support "infinity" and "nan".
+-    static MFBT_API Decimal fromString(const String&);
++    static MFBT_API Decimal fromString(const std::string& aValue);
+     static MFBT_API Decimal infinity(Sign);
+     static MFBT_API Decimal nan();
+     static MFBT_API Decimal zero(Sign);
+ 
+     // You should not use below methods. We expose them for unit testing.
+     MFBT_API explicit Decimal(const EncodedData&);
+     const EncodedData& value() const { return m_data; }
+ 
+@@ -178,9 +202,20 @@ private:
+ 
+     Sign sign() const { return m_data.sign(); }
+ 
+     EncodedData m_data;
+ };
+ 
+ } // namespace blink
+ 
++namespace mozilla {
++typedef blink::Decimal Decimal;
++} // namespace mozilla
++
++#undef USING_FAST_MALLOC
++
++#ifdef DEFINED_ASSERT_FOR_DECIMAL_H
++#undef DEFINED_ASSERT_FOR_DECIMAL_H
++#undef ASSERT
++#endif
++
+ #endif // Decimal_h
diff --git a/mozglue/misc/decimal/update.sh b/mozglue/misc/decimal/update.sh
new file mode 100755
index 0000000000..23748ebe2c
--- /dev/null
+++ b/mozglue/misc/decimal/update.sh
@@ -0,0 +1,60 @@
+# Usage: ./update.sh [blink-core-source-directory]
+#
+# Copies the needed files from a directory containing the original
+# Decimal.h and Decimal.cpp source that we need.
+# If [blink-core-source-directory] is not specified, this script will
+# attempt to download the latest versions using git.
+
+set -e
+
+FILES=(
+  "Decimal.h"
+  "Decimal.cpp"
+)
+
+OWN_NAME=`basename $0`
+
+if [ $# -gt 1 ]; then
+  echo "$OWN_NAME: Too many arguments">&2
+  exit 1
+fi
+
+if [ $# -eq 1 ]; then
+  BLINK_CORE_DIR="$1"
+  for F in "${FILES[@]}"
+  do
+    P="$BLINK_CORE_DIR/$F"
+    if [ ! -f "$P" ]; then
+      echo "$OWN_NAME: Couldn't find file: $P">&2
+      exit 1
+    fi
+  done
+  for F in "${FILES[@]}"
+  do
+    P="$BLINK_CORE_DIR/$F"
+    cp "$P" .
+  done
+else
+  #LATEST_SHA=$(cat UPSTREAM-GIT-SHA)
+  LATEST_SHA=$(git ls-remote https://chromium.googlesource.com/chromium/src.git/ | awk "/refs\/heads\/master/ {print \$1}")
+  REPO_PATH="https://chromium.googlesource.com/chromium/src.git/+/$LATEST_SHA/third_party/WebKit/Source/platform"
+  #REPO_PATH="https://github.com/WebKit/webkit/tree/master/Source/WebCore/platform"
+  for F in "${FILES[@]}"
+  do
+    printf "Downloading `basename $F`..."
+    curl "$REPO_PATH/${F}?format=TEXT" | base64 -D > "$F"
+    echo done.
+  done
+  echo $LATEST_SHA > UPSTREAM-GIT-SHA
+fi
+
+# Apply patches:
+
+patch -p4 < zero-serialization.patch
+patch -p4 < comparison-with-nan.patch
+patch -p4 < mfbt-abi-markers.patch
+patch -p4 < to-moz-dependencies.patch
+patch -p4 < add-doubleconversion-impl.patch
+# The following is disabled. See
+# https://bugzilla.mozilla.org/show_bug.cgi?id=1208357#c7
+#patch -p4 < fix-wshadow-warnings.patch
diff --git a/mozglue/misc/decimal/zero-serialization.patch b/mozglue/misc/decimal/zero-serialization.patch
new file mode 100644
index 0000000000..b8de9241bd
--- /dev/null
+++ b/mozglue/misc/decimal/zero-serialization.patch
@@ -0,0 +1,22 @@
+diff --git a/mozglue/misc/decimal/Decimal.cpp b/mozglue/misc/decimal/Decimal.cpp
+--- a/mozglue/misc/decimal/Decimal.cpp
++++ b/mozglue/misc/decimal/Decimal.cpp
+@@ -277,17 +277,17 @@ bool Decimal::EncodedData::operator==(co
+ }
+ 
+ Decimal::Decimal(int32_t i32)
+     : m_data(i32 < 0 ? Negative : Positive, 0, i32 < 0 ? static_cast<uint64_t>(-static_cast<int64_t>(i32)) : static_cast<uint64_t>(i32))
+ {
+ }
+ 
+ Decimal::Decimal(Sign sign, int exponent, uint64_t coefficient)
+-    : m_data(sign, exponent, coefficient)
++    : m_data(sign, coefficient ? exponent : 0, coefficient)
+ {
+ }
+ 
+ Decimal::Decimal(const EncodedData& data)
+     : m_data(data)
+ {
+ }
+ 
diff --git a/mozglue/misc/interceptor/Arm64.cpp b/mozglue/misc/interceptor/Arm64.cpp
new file mode 100644
index 0000000000..81d8e6d09b
--- /dev/null
+++ b/mozglue/misc/interceptor/Arm64.cpp
@@ -0,0 +1,89 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "Arm64.h"
+
+#include "mozilla/ResultVariant.h"
+
+namespace mozilla {
+namespace interceptor {
+namespace arm64 {
+
+struct PCRelativeLoadTest {
+  // Bitmask to be ANDed with the instruction to isolate the bits that this
+  // instance is interested in
+  uint32_t mTestMask;
+  // The desired bits that we want to see after masking
+  uint32_t mMatchBits;
+  // If we match, mDecodeFn provide the code to decode the instruction.
+  LoadOrBranch (*mDecodeFn)(const uintptr_t aPC, const uint32_t aInst);
+};
+
+static LoadOrBranch ADRPDecode(const uintptr_t aPC, const uint32_t aInst) {
+  // Keep in mind that on Windows aarch64, uint32_t is little-endian
+  const uint32_t kMaskDataProcImmPcRelativeImmLo = 0x60000000;
+  const uint32_t kMaskDataProcImmPcRelativeImmHi = 0x00FFFFE0;
+
+  uintptr_t base = aPC;
+  intptr_t offset = SignExtend<intptr_t>(
+      ((aInst & kMaskDataProcImmPcRelativeImmHi) >> 3) |
+          ((aInst & kMaskDataProcImmPcRelativeImmLo) >> 29),
+      21);
+
+  base &= ~0xFFFULL;
+  offset <<= 12;
+
+  uint8_t reg = aInst & 0x1F;
+
+  return LoadOrBranch(base + offset, reg);
+}
+
+MFBT_API LoadOrBranch BUncondImmDecode(const uintptr_t aPC,
+                                       const uint32_t aInst) {
+  int32_t offset = SignExtend<int32_t>(aInst & 0x03FFFFFFU, 26);
+  return LoadOrBranch(aPC + offset);
+}
+
+// Order is important here; more specific encoding tests must be placed before
+// less specific encoding tests.
+static const PCRelativeLoadTest gPCRelTests[] = {
+    {0x9FC00000, 0x10000000, nullptr},      // ADR
+    {0x9FC00000, 0x90000000, &ADRPDecode},  // ADRP
+    {0xFF000000, 0x58000000, nullptr},      // LDR (literal) 64-bit GPR
+    {0x3B000000, 0x18000000, nullptr},      // LDR (literal) (remaining forms)
+    {0x7C000000, 0x14000000, nullptr},      // B (unconditional immediate)
+    {0xFE000000, 0x54000000, nullptr},      // B.Cond
+    {0x7E000000, 0x34000000, nullptr},      // Compare and branch (imm)
+    {0x7E000000, 0x36000000, nullptr},      // Test and branch (imm)
+    {0xFE000000, 0xD6000000, nullptr}       // Unconditional branch (reg)
+};
+
+/**
+ * In this function we interate through each entry in |gPCRelTests|, AND
+ * |aInst| with |test.mTestMask| to isolate the bits that we're interested in,
+ * then compare that result against |test.mMatchBits|. If we have a match,
+ * then that particular entry is applicable to |aInst|. If |test.mDecodeFn| is
+ * present, then we call it to decode the instruction. If it is not present,
+ * then we assume that this particular instruction is unsupported.
+ */
+MFBT_API Result<LoadOrBranch, PCRelCheckError> CheckForPCRel(
+    const uintptr_t aPC, const uint32_t aInst) {
+  for (auto&& test : gPCRelTests) {
+    if ((aInst & test.mTestMask) == test.mMatchBits) {
+      if (!test.mDecodeFn) {
+        return Err(PCRelCheckError::NoDecoderAvailable);
+      }
+
+      return test.mDecodeFn(aPC, aInst);
+    }
+  }
+
+  return Err(PCRelCheckError::InstructionNotPCRel);
+}
+
+}  // namespace arm64
+}  // namespace interceptor
+}  // namespace mozilla
diff --git a/mozglue/misc/interceptor/Arm64.h b/mozglue/misc/interceptor/Arm64.h
new file mode 100644
index 0000000000..ebb30ecd6b
--- /dev/null
+++ b/mozglue/misc/interceptor/Arm64.h
@@ -0,0 +1,221 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_Arm64_h
+#define mozilla_interceptor_Arm64_h
+
+#include <type_traits>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Result.h"
+#include "mozilla/Saturate.h"
+#include "mozilla/Types.h"
+
+namespace mozilla {
+namespace interceptor {
+namespace arm64 {
+
+// clang-format off
+enum class IntegerConditionCode : uint8_t {
+  // From the ARMv8 Architectural Reference Manual, Section C1.2.4
+  //               Description           Condition Flags
+  EQ = 0b0000,  // ==                    Z == 1
+  NE = 0b0001,  // !=                    Z == 0
+  CS = 0b0010,  // carry set             C == 1
+  HS = 0b0010,  // carry set (alias)     C == 1
+  CC = 0b0011,  // carry clear           C == 0
+  LO = 0b0011,  // carry clear (alias)   C == 0
+  MI = 0b0100,  // < 0                   N == 1
+  PL = 0b0101,  // >= 0                  N == 0
+  VS = 0b0110,  // overflow              V == 1
+  VC = 0b0111,  // no overflow           V == 0
+  HI = 0b1000,  // unsigned >            C == 1 && Z == 0
+  LS = 0b1001,  // unsigned <=           !(C == 1 && Z == 0)
+  GE = 0b1010,  // signed >=             N == V
+  LT = 0b1011,  // signed <              N != V
+  GT = 0b1100,  // signed >              Z == 0 && N == V
+  LE = 0b1101,  // signed <=             !(Z == 0 && N == V)
+  AL = 0b1110,  // unconditional         <Any>
+  NV = 0b1111   // unconditional (but AL is the preferred encoding)
+};
+// clang-format on
+
+struct LoadOrBranch {
+  enum class Type {
+    Load,
+    Branch,
+  };
+
+  // Load constructor
+  LoadOrBranch(const uintptr_t aAbsAddress, const uint8_t aDestReg)
+      : mType(Type::Load), mAbsAddress(aAbsAddress), mDestReg(aDestReg) {
+    MOZ_ASSERT(aDestReg < 32);
+  }
+
+  // Unconditional branch constructor
+  explicit LoadOrBranch(const uintptr_t aAbsAddress)
+      : mType(Type::Branch),
+        mAbsAddress(aAbsAddress),
+        mCond(IntegerConditionCode::AL) {}
+
+  // Conditional branch constructor
+  LoadOrBranch(const uintptr_t aAbsAddress, const IntegerConditionCode aCond)
+      : mType(Type::Branch), mAbsAddress(aAbsAddress), mCond(aCond) {}
+
+  Type mType;
+
+  // The absolute address to be loaded into a register, or branched to
+  uintptr_t mAbsAddress;
+
+  union {
+    // The destination register for the load
+    uint8_t mDestReg;
+
+    // The condition code for the branch
+    IntegerConditionCode mCond;
+  };
+};
+
+enum class PCRelCheckError {
+  InstructionNotPCRel,
+  NoDecoderAvailable,
+};
+
+MFBT_API Result<LoadOrBranch, PCRelCheckError> CheckForPCRel(
+    const uintptr_t aPC, const uint32_t aInst);
+
+/**
+ * Casts |aValue| to a |ResultT| via sign extension.
+ *
+ * This function should be used when extracting signed immediate values from
+ * an instruction.
+ *
+ * @param aValue The value to be sign extended. This value should already be
+ *               isolated from the remainder of the instruction's bits and
+ *               shifted all the way to the right.
+ * @param aNumValidBits The number of bits in |aValue| that contain the
+ *                      immediate signed value, including the sign bit.
+ */
+template <typename ResultT>
+inline ResultT SignExtend(const uint32_t aValue, const uint8_t aNumValidBits) {
+  static_assert(std::is_integral_v<ResultT> && std::is_signed_v<ResultT>,
+                "ResultT must be a signed integral type");
+  MOZ_ASSERT(aNumValidBits < 32U && aNumValidBits > 1);
+
+  using UnsignedResultT = std::decay_t<std::make_unsigned_t<ResultT>>;
+
+  const uint8_t kResultWidthBits = sizeof(ResultT) * 8;
+
+  // Shift left unsigned
+  const uint8_t shiftAmt = kResultWidthBits - aNumValidBits;
+  UnsignedResultT shiftedLeft = static_cast<UnsignedResultT>(aValue)
+                                << shiftAmt;
+
+  // Now shift right signed
+  auto result = static_cast<ResultT>(shiftedLeft);
+  result >>= shiftAmt;
+
+  return result;
+}
+
+inline static uint32_t BuildUnconditionalBranchToRegister(const uint32_t aReg) {
+  MOZ_ASSERT(aReg < 32);
+  // BR aReg
+  return 0xD61F0000 | (aReg << 5);
+}
+
+MFBT_API LoadOrBranch BUncondImmDecode(const uintptr_t aPC,
+                                       const uint32_t aInst);
+
+/**
+ * If |aTarget| is more than 128MB away from |aPC|, we need to use a veneer.
+ */
+inline static bool IsVeneerRequired(const uintptr_t aPC,
+                                    const uintptr_t aTarget) {
+  detail::Saturate<intptr_t> saturated(aTarget);
+  saturated -= aPC;
+
+  uintptr_t absDiff = Abs(saturated.value());
+
+  return absDiff >= 0x08000000U;
+}
+
+inline static bool IsUnconditionalBranchImm(const uint32_t aInst) {
+  return (aInst & 0xFC000000U) == 0x14000000U;
+}
+
+inline static Maybe<uint32_t> BuildUnconditionalBranchImm(
+    const uintptr_t aPC, const uintptr_t aTarget) {
+  detail::Saturate<intptr_t> saturated(aTarget);
+  saturated -= aPC;
+
+  CheckedInt<int32_t> offset(saturated.value());
+  if (!offset.isValid()) {
+    return Nothing();
+  }
+
+  // offset should be a multiple of 4
+  MOZ_ASSERT(offset.value() % 4 == 0);
+  if (offset.value() % 4) {
+    return Nothing();
+  }
+
+  offset /= 4;
+  if (!offset.isValid()) {
+    return Nothing();
+  }
+
+  int32_t signbits = offset.value() & 0xFE000000;
+  // Ensure that offset is small enough to fit into the 26 bit region.
+  // We check that the sign bits are either all ones or all zeros.
+  MOZ_ASSERT(signbits == 0xFE000000 || !signbits);
+  if (signbits && signbits != 0xFE000000) {
+    return Nothing();
+  }
+
+  int32_t masked = offset.value() & 0x03FFFFFF;
+
+  // B imm26
+  return Some(0x14000000U | masked);
+}
+
+/**
+ * Allocate and construct a veneer that provides an absolute 64-bit branch to
+ * the hook function.
+ */
+template <typename TrampPoolT>
+inline static uintptr_t MakeVeneer(TrampPoolT& aTrampPool, void* aPrimaryTramp,
+                                   const uintptr_t aDestAddress) {
+  auto maybeVeneer = aTrampPool.GetNextTrampoline();
+  if (!maybeVeneer) {
+    return 0;
+  }
+
+  Trampoline<typename TrampPoolT::MMPolicyT> veneer(
+      std::move(maybeVeneer.ref()));
+
+  // Write the same header information that is used for trampolines
+  veneer.WriteEncodedPointer(nullptr);
+  veneer.WriteEncodedPointer(aPrimaryTramp);
+
+  veneer.StartExecutableCode();
+
+  // Register 16 is explicitly intended for veneers in ARM64, so we use that
+  // register without fear of clobbering anything important.
+  veneer.WriteLoadLiteral(aDestAddress, 16);
+  veneer.WriteInstruction(BuildUnconditionalBranchToRegister(16));
+
+  return reinterpret_cast<uintptr_t>(veneer.EndExecutableCode());
+}
+
+}  // namespace arm64
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // mozilla_interceptor_Arm64_h
diff --git a/mozglue/misc/interceptor/MMPolicies.h b/mozglue/misc/interceptor/MMPolicies.h
new file mode 100644
index 0000000000..9eacc1896f
--- /dev/null
+++ b/mozglue/misc/interceptor/MMPolicies.h
@@ -0,0 +1,981 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_MMPolicies_h
+#define mozilla_interceptor_MMPolicies_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/DynamicallyLinkedFunctionPtr.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Span.h"
+#include "mozilla/TypedEnumBits.h"
+#include "mozilla/Types.h"
+#include "mozilla/WindowsMapRemoteView.h"
+
+#include <windows.h>
+
+#if (NTDDI_VERSION < NTDDI_WIN10_RS4) || defined(__MINGW32__)
+PVOID WINAPI VirtualAlloc2(HANDLE Process, PVOID BaseAddress, SIZE_T Size,
+                           ULONG AllocationType, ULONG PageProtection,
+                           MEM_EXTENDED_PARAMETER* ExtendedParameters,
+                           ULONG ParameterCount);
+PVOID WINAPI MapViewOfFile3(HANDLE FileMapping, HANDLE Process,
+                            PVOID BaseAddress, ULONG64 Offset, SIZE_T ViewSize,
+                            ULONG AllocationType, ULONG PageProtection,
+                            MEM_EXTENDED_PARAMETER* ExtendedParameters,
+                            ULONG ParameterCount);
+#endif  // (NTDDI_VERSION < NTDDI_WIN10_RS4) || defined(__MINGW32__)
+
+// _CRT_RAND_S is not defined everywhere, but we need it.
+#if !defined(_CRT_RAND_S)
+extern "C" errno_t rand_s(unsigned int* randomValue);
+#endif  // !defined(_CRT_RAND_S)
+
+// Declaring only the functions we need in NativeNt.h.  To include the entire
+// NativeNt.h causes circular dependency.
+namespace mozilla {
+namespace nt {
+SIZE_T WINAPI VirtualQueryEx(HANDLE aProcess, LPCVOID aAddress,
+                             PMEMORY_BASIC_INFORMATION aMemInfo,
+                             SIZE_T aMemInfoLen);
+
+SIZE_T WINAPI VirtualQuery(LPCVOID aAddress, PMEMORY_BASIC_INFORMATION aMemInfo,
+                           SIZE_T aMemInfoLen);
+}  // namespace nt
+}  // namespace mozilla
+
+namespace mozilla {
+namespace interceptor {
+
+// This class implements memory operations not involving any kernel32's
+// functions, so that derived classes can use them.
+class MOZ_TRIVIAL_CTOR_DTOR MMPolicyInProcessPrimitive {
+ protected:
+  bool ProtectInternal(decltype(&::VirtualProtect) aVirtualProtect,
+                       void* aVAddress, size_t aSize, uint32_t aProtFlags,
+                       uint32_t* aPrevProtFlags) const {
+    MOZ_ASSERT(aPrevProtFlags);
+    BOOL ok = aVirtualProtect(aVAddress, aSize, aProtFlags,
+                              reinterpret_cast<PDWORD>(aPrevProtFlags));
+    if (!ok && aPrevProtFlags) {
+      // VirtualProtect can fail but still set valid protection flags.
+      // Let's clear those upon failure.
+      *aPrevProtFlags = 0;
+    }
+
+    return !!ok;
+  }
+
+ public:
+  bool Read(void* aToPtr, const void* aFromPtr, size_t aLen) const {
+    ::memcpy(aToPtr, aFromPtr, aLen);
+    return true;
+  }
+
+  bool Write(void* aToPtr, const void* aFromPtr, size_t aLen) const {
+    ::memcpy(aToPtr, aFromPtr, aLen);
+    return true;
+  }
+
+  /**
+   * @return true if the page that hosts aVAddress is accessible.
+   */
+  bool IsPageAccessible(uintptr_t aVAddress) const {
+    MEMORY_BASIC_INFORMATION mbi;
+    SIZE_T result = nt::VirtualQuery(reinterpret_cast<LPCVOID>(aVAddress), &mbi,
+                                     sizeof(mbi));
+
+    return result && mbi.AllocationProtect && mbi.State == MEM_COMMIT &&
+           mbi.Protect != PAGE_NOACCESS;
+  }
+};
+
+class MOZ_TRIVIAL_CTOR_DTOR MMPolicyBase {
+ protected:
+  static uintptr_t AlignDown(const uintptr_t aUnaligned,
+                             const uintptr_t aAlignTo) {
+    MOZ_ASSERT(IsPowerOfTwo(aAlignTo));
+#pragma warning(suppress : 4146)
+    return aUnaligned & (-aAlignTo);
+  }
+
+  static uintptr_t AlignUp(const uintptr_t aUnaligned,
+                           const uintptr_t aAlignTo) {
+    MOZ_ASSERT(IsPowerOfTwo(aAlignTo));
+#pragma warning(suppress : 4146)
+    return aUnaligned + ((-aUnaligned) & (aAlignTo - 1));
+  }
+
+  static PVOID AlignUpToRegion(PVOID aUnaligned, uintptr_t aAlignTo,
+                               size_t aLen, size_t aDesiredLen) {
+    uintptr_t unaligned = reinterpret_cast<uintptr_t>(aUnaligned);
+    uintptr_t aligned = AlignUp(unaligned, aAlignTo);
+    MOZ_ASSERT(aligned >= unaligned);
+
+    if (aLen < aligned - unaligned) {
+      return nullptr;
+    }
+
+    aLen -= (aligned - unaligned);
+    return reinterpret_cast<PVOID>((aLen >= aDesiredLen) ? aligned : 0);
+  }
+
+ public:
+#if defined(NIGHTLY_BUILD)
+  Maybe<DetourError> mLastError;
+  const Maybe<DetourError>& GetLastDetourError() const { return mLastError; }
+  template <typename... Args>
+  void SetLastDetourError(Args&&... aArgs) {
+    mLastError = Some(DetourError(std::forward<Args>(aArgs)...));
+  }
+#else
+  template <typename... Args>
+  void SetLastDetourError(Args&&... aArgs) {}
+#endif  // defined(NIGHTLY_BUILD)
+
+  DWORD ComputeAllocationSize(const uint32_t aRequestedSize) const {
+    MOZ_ASSERT(aRequestedSize);
+    DWORD result = aRequestedSize;
+
+    const uint32_t granularity = GetAllocGranularity();
+
+    uint32_t mod = aRequestedSize % granularity;
+    if (mod) {
+      result += (granularity - mod);
+    }
+
+    return result;
+  }
+
+  DWORD GetAllocGranularity() const {
+    static const DWORD kAllocGranularity = []() -> DWORD {
+      SYSTEM_INFO sysInfo;
+      ::GetSystemInfo(&sysInfo);
+      return sysInfo.dwAllocationGranularity;
+    }();
+
+    return kAllocGranularity;
+  }
+
+  DWORD GetPageSize() const {
+    static const DWORD kPageSize = []() -> DWORD {
+      SYSTEM_INFO sysInfo;
+      ::GetSystemInfo(&sysInfo);
+      return sysInfo.dwPageSize;
+    }();
+
+    return kPageSize;
+  }
+
+  uintptr_t GetMaxUserModeAddress() const {
+    static const uintptr_t kMaxUserModeAddr = []() -> uintptr_t {
+      SYSTEM_INFO sysInfo;
+      ::GetSystemInfo(&sysInfo);
+      return reinterpret_cast<uintptr_t>(sysInfo.lpMaximumApplicationAddress);
+    }();
+
+    return kMaxUserModeAddr;
+  }
+
+  static const uint8_t* GetLowerBound(const Span<const uint8_t>& aBounds) {
+    return &(*aBounds.cbegin());
+  }
+
+  static const uint8_t* GetUpperBoundIncl(const Span<const uint8_t>& aBounds) {
+    // We return an upper bound that is inclusive.
+    return &(*(aBounds.cend() - 1));
+  }
+
+  static const uint8_t* GetUpperBoundExcl(const Span<const uint8_t>& aBounds) {
+    // We return an upper bound that is exclusive by adding 1 to the inclusive
+    // upper bound.
+    return GetUpperBoundIncl(aBounds) + 1;
+  }
+
+  /**
+   * It is convenient for us to provide address range information based on a
+   * "pivot" and a distance from that pivot, as branch instructions operate
+   * within a range of the program counter. OTOH, to actually manage the
+   * regions of memory, it is easier to think about them in terms of their
+   * lower and upper bounds. This function converts from the former format to
+   * the latter format.
+   */
+  Maybe<Span<const uint8_t>> SpanFromPivotAndDistance(
+      const uint32_t aSize, const uintptr_t aPivotAddr,
+      const uint32_t aMaxDistanceFromPivot) const {
+    if (!aPivotAddr || !aMaxDistanceFromPivot) {
+      return Nothing();
+    }
+
+    // We don't allow regions below 1MB so that we're not allocating near any
+    // sensitive areas in our address space.
+    const uintptr_t kMinAllowableAddress = 0x100000;
+
+    const uintptr_t kGranularity(GetAllocGranularity());
+
+    // We subtract the max distance from the pivot to determine our lower bound.
+    CheckedInt<uintptr_t> lowerBound(aPivotAddr);
+    lowerBound -= aMaxDistanceFromPivot;
+    if (lowerBound.isValid()) {
+      // In this case, the subtraction has not underflowed, but we still want
+      // the lower bound to be at least kMinAllowableAddress.
+      lowerBound = std::max(lowerBound.value(), kMinAllowableAddress);
+    } else {
+      // In this case, we underflowed. Forcibly set the lower bound to
+      // kMinAllowableAddress.
+      lowerBound = CheckedInt<uintptr_t>(kMinAllowableAddress);
+    }
+
+    // Align up to the next unit of allocation granularity when necessary.
+    lowerBound = AlignUp(lowerBound.value(), kGranularity);
+    MOZ_ASSERT(lowerBound.isValid());
+    if (!lowerBound.isValid()) {
+      return Nothing();
+    }
+
+    // We must ensure that our region is below the maximum allowable user-mode
+    // address, or our reservation will fail.
+    const uintptr_t kMaxUserModeAddr = GetMaxUserModeAddress();
+
+    // We add the max distance from the pivot to determine our upper bound.
+    CheckedInt<uintptr_t> upperBound(aPivotAddr);
+    upperBound += aMaxDistanceFromPivot;
+    if (upperBound.isValid()) {
+      // In this case, the addition has not overflowed, but we still want
+      // the upper bound to be at most kMaxUserModeAddr.
+      upperBound = std::min(upperBound.value(), kMaxUserModeAddr);
+    } else {
+      // In this case, we overflowed. Forcibly set the upper bound to
+      // kMaxUserModeAddr.
+      upperBound = CheckedInt<uintptr_t>(kMaxUserModeAddr);
+    }
+
+    // Subtract the desired allocation size so that any chunk allocated in the
+    // region will be reachable.
+    upperBound -= aSize;
+    if (!upperBound.isValid()) {
+      return Nothing();
+    }
+
+    // Align down to the next unit of allocation granularity when necessary.
+    upperBound = AlignDown(upperBound.value(), kGranularity);
+    if (!upperBound.isValid()) {
+      return Nothing();
+    }
+
+    MOZ_ASSERT(lowerBound.value() < upperBound.value());
+    if (lowerBound.value() >= upperBound.value()) {
+      return Nothing();
+    }
+
+    // Return the result as a Span
+    return Some(Span(reinterpret_cast<const uint8_t*>(lowerBound.value()),
+                     upperBound.value() - lowerBound.value()));
+  }
+
+  /**
+   * This function locates a virtual memory region of |aDesiredBytesLen| that
+   * resides in the interval [aRangeMin, aRangeMax). We do this by scanning the
+   * virtual memory space for a block of unallocated memory that is sufficiently
+   * large.
+   */
+  PVOID FindRegion(HANDLE aProcess, const size_t aDesiredBytesLen,
+                   const uint8_t* aRangeMin, const uint8_t* aRangeMax) {
+    // Convert the given pointers to uintptr_t because we should not
+    // compare two pointers unless they are from the same array or object.
+    uintptr_t rangeMin = reinterpret_cast<uintptr_t>(aRangeMin);
+    uintptr_t rangeMax = reinterpret_cast<uintptr_t>(aRangeMax);
+
+    const DWORD kGranularity = GetAllocGranularity();
+    if (!aDesiredBytesLen) {
+      SetLastDetourError(MMPOLICY_RESERVE_FINDREGION_INVALIDLEN);
+      return nullptr;
+    }
+
+    MOZ_ASSERT(rangeMin < rangeMax);
+    if (rangeMin >= rangeMax) {
+      SetLastDetourError(MMPOLICY_RESERVE_FINDREGION_INVALIDRANGE);
+      return nullptr;
+    }
+
+    // Generate a randomized base address that falls within the interval
+    // [aRangeMin, aRangeMax - aDesiredBytesLen]
+    unsigned int rnd = 0;
+    rand_s(&rnd);
+
+    // Reduce rnd to a value that falls within the acceptable range
+    uintptr_t maxOffset =
+        (rangeMax - rangeMin - aDesiredBytesLen) / kGranularity;
+    // Divide by maxOffset + 1 because maxOffset * kGranularity is acceptable.
+    uintptr_t offset = (uintptr_t(rnd) % (maxOffset + 1)) * kGranularity;
+
+    // Start searching at this address
+    const uintptr_t searchStart = rangeMin + offset;
+    // The max address needs to incorporate the desired length
+    const uintptr_t kMaxPtr = rangeMax - aDesiredBytesLen;
+
+    MOZ_DIAGNOSTIC_ASSERT(searchStart <= kMaxPtr);
+
+    MEMORY_BASIC_INFORMATION mbi;
+    SIZE_T len = sizeof(mbi);
+
+    // Scan the range for a free chunk that is at least as large as
+    // aDesiredBytesLen
+    // Scan [searchStart, kMaxPtr]
+    for (uintptr_t address = searchStart; address <= kMaxPtr;) {
+      if (nt::VirtualQueryEx(aProcess, reinterpret_cast<uint8_t*>(address),
+                             &mbi, len) != len) {
+        SetLastDetourError(MMPOLICY_RESERVE_FINDREGION_VIRTUALQUERY_ERROR,
+                           ::GetLastError());
+        return nullptr;
+      }
+
+      if (mbi.State == MEM_FREE) {
+        // |mbi.BaseAddress| is aligned with the page granularity, but may not
+        // be aligned with the allocation granularity.  VirtualAlloc does not
+        // accept such a non-aligned address unless the corresponding allocation
+        // region is free.  So we get the next boundary's start address.
+        PVOID regionStart = AlignUpToRegion(mbi.BaseAddress, kGranularity,
+                                            mbi.RegionSize, aDesiredBytesLen);
+        if (regionStart) {
+          return regionStart;
+        }
+      }
+
+      address = reinterpret_cast<uintptr_t>(mbi.BaseAddress) + mbi.RegionSize;
+    }
+
+    // Scan [aRangeMin, searchStart)
+    for (uintptr_t address = rangeMin; address < searchStart;) {
+      if (nt::VirtualQueryEx(aProcess, reinterpret_cast<uint8_t*>(address),
+                             &mbi, len) != len) {
+        SetLastDetourError(MMPOLICY_RESERVE_FINDREGION_VIRTUALQUERY_ERROR,
+                           ::GetLastError());
+        return nullptr;
+      }
+
+      if (mbi.State == MEM_FREE) {
+        PVOID regionStart = AlignUpToRegion(mbi.BaseAddress, kGranularity,
+                                            mbi.RegionSize, aDesiredBytesLen);
+        if (regionStart) {
+          return regionStart;
+        }
+      }
+
+      address = reinterpret_cast<uintptr_t>(mbi.BaseAddress) + mbi.RegionSize;
+    }
+
+    SetLastDetourError(MMPOLICY_RESERVE_FINDREGION_NO_FREE_REGION,
+                       ::GetLastError());
+    return nullptr;
+  }
+
+  /**
+   * This function reserves a |aSize| block of virtual memory.
+   *
+   * When |aBounds| is Nothing, it just calls |aReserveFn| and lets Windows
+   * choose the base address.
+   *
+   * Otherwise, it tries to call |aReserveRangeFn| to reserve the memory within
+   * the bounds provided by |aBounds|. It is advantageous to use this function
+   * because the OS's VM manager has better information as to which base
+   * addresses are the best to use.
+   *
+   * If |aReserveRangeFn| retuns Nothing, this means that the platform support
+   * is not available. In that case, we fall back to manually computing a region
+   * to use for reserving the memory by calling |FindRegion|.
+   */
+  template <typename ReserveFnT, typename ReserveRangeFnT>
+  PVOID Reserve(HANDLE aProcess, const uint32_t aSize,
+                const ReserveFnT& aReserveFn,
+                const ReserveRangeFnT& aReserveRangeFn,
+                const Maybe<Span<const uint8_t>>& aBounds) {
+    if (!aBounds) {
+      // No restrictions, let the OS choose the base address
+      PVOID ret = aReserveFn(aProcess, nullptr, aSize);
+      if (!ret) {
+        SetLastDetourError(MMPOLICY_RESERVE_NOBOUND_RESERVE_ERROR,
+                           ::GetLastError());
+      }
+      return ret;
+    }
+
+    const uint8_t* lowerBound = GetLowerBound(aBounds.ref());
+    const uint8_t* upperBoundExcl = GetUpperBoundExcl(aBounds.ref());
+
+    Maybe<PVOID> result =
+        aReserveRangeFn(aProcess, aSize, lowerBound, upperBoundExcl);
+    if (result) {
+      return result.value();
+    }
+
+    // aReserveRangeFn is not available on this machine. We'll do a manual
+    // search.
+
+    size_t curAttempt = 0;
+    const size_t kMaxAttempts = 8;
+
+    // We loop here because |FindRegion| may return a base address that
+    // is reserved elsewhere before we have had a chance to reserve it
+    // ourselves.
+    while (curAttempt < kMaxAttempts) {
+      PVOID base = FindRegion(aProcess, aSize, lowerBound, upperBoundExcl);
+      if (!base) {
+        return nullptr;
+      }
+
+      result = Some(aReserveFn(aProcess, base, aSize));
+      if (result.value()) {
+        return result.value();
+      }
+
+      ++curAttempt;
+    }
+
+    // If we run out of attempts, we fall through to the default case where
+    // the system chooses any base address it wants. In that case, the hook
+    // will be set on a best-effort basis.
+    PVOID ret = aReserveFn(aProcess, nullptr, aSize);
+    if (!ret) {
+      SetLastDetourError(MMPOLICY_RESERVE_FINAL_RESERVE_ERROR,
+                         ::GetLastError());
+    }
+    return ret;
+  }
+};
+
+class MOZ_TRIVIAL_CTOR_DTOR MMPolicyInProcess
+    : public MMPolicyInProcessPrimitive,
+      public MMPolicyBase {
+ public:
+  typedef MMPolicyInProcess MMPolicyT;
+
+  constexpr MMPolicyInProcess()
+      : mBase(nullptr), mReservationSize(0), mCommitOffset(0) {}
+
+  MMPolicyInProcess(const MMPolicyInProcess&) = delete;
+  MMPolicyInProcess& operator=(const MMPolicyInProcess&) = delete;
+
+  MMPolicyInProcess(MMPolicyInProcess&& aOther)
+      : mBase(nullptr), mReservationSize(0), mCommitOffset(0) {
+    *this = std::move(aOther);
+  }
+
+  MMPolicyInProcess& operator=(MMPolicyInProcess&& aOther) {
+    mBase = aOther.mBase;
+    aOther.mBase = nullptr;
+
+    mCommitOffset = aOther.mCommitOffset;
+    aOther.mCommitOffset = 0;
+
+    mReservationSize = aOther.mReservationSize;
+    aOther.mReservationSize = 0;
+
+    return *this;
+  }
+
+  explicit operator bool() const { return !!mBase; }
+
+  /**
+   * Should we unhook everything upon destruction?
+   */
+  bool ShouldUnhookUponDestruction() const { return true; }
+
+#if defined(_M_IX86)
+  bool WriteAtomic(void* aDestPtr, const uint16_t aValue) const {
+    *static_cast<uint16_t*>(aDestPtr) = aValue;
+    return true;
+  }
+#endif  // defined(_M_IX86)
+
+  bool Protect(void* aVAddress, size_t aSize, uint32_t aProtFlags,
+               uint32_t* aPrevProtFlags) const {
+    return ProtectInternal(::VirtualProtect, aVAddress, aSize, aProtFlags,
+                           aPrevProtFlags);
+  }
+
+  bool FlushInstructionCache() const {
+    return !!::FlushInstructionCache(::GetCurrentProcess(), nullptr, 0);
+  }
+
+  static DWORD GetTrampWriteProtFlags() { return PAGE_EXECUTE_READWRITE; }
+
+#if defined(_M_X64)
+  bool IsTrampolineSpaceInLowest2GB() const {
+    return (mBase + mReservationSize) <=
+           reinterpret_cast<uint8_t*>(0x0000000080000000ULL);
+  }
+#endif  // defined(_M_X64)
+
+ protected:
+  uint8_t* GetLocalView() const { return mBase; }
+
+  uintptr_t GetRemoteView() const {
+    // Same as local view for in-process
+    return reinterpret_cast<uintptr_t>(mBase);
+  }
+
+  /**
+   * @return the effective number of bytes reserved, or 0 on failure
+   */
+  uint32_t Reserve(const uint32_t aSize,
+                   const Maybe<Span<const uint8_t>>& aBounds) {
+    if (!aSize) {
+      return 0;
+    }
+
+    if (mBase) {
+      MOZ_ASSERT(mReservationSize >= aSize);
+      return mReservationSize;
+    }
+
+    mReservationSize = ComputeAllocationSize(aSize);
+
+    auto reserveFn = [](HANDLE aProcess, PVOID aBase, uint32_t aSize) -> PVOID {
+      return ::VirtualAlloc(aBase, aSize, MEM_RESERVE, PAGE_NOACCESS);
+    };
+
+    auto reserveWithinRangeFn =
+        [](HANDLE aProcess, uint32_t aSize, const uint8_t* aRangeMin,
+           const uint8_t* aRangeMaxExcl) -> Maybe<PVOID> {
+      static const StaticDynamicallyLinkedFunctionPtr<decltype(
+          &::VirtualAlloc2)>
+          pVirtualAlloc2(L"kernelbase.dll", "VirtualAlloc2");
+      if (!pVirtualAlloc2) {
+        return Nothing();
+      }
+
+      // NB: MEM_ADDRESS_REQUIREMENTS::HighestEndingAddress is *inclusive*
+      MEM_ADDRESS_REQUIREMENTS memReq = {
+          const_cast<uint8_t*>(aRangeMin),
+          const_cast<uint8_t*>(aRangeMaxExcl - 1)};
+
+      MEM_EXTENDED_PARAMETER memParam = {};
+      memParam.Type = MemExtendedParameterAddressRequirements;
+      memParam.Pointer = &memReq;
+
+      return Some(pVirtualAlloc2(aProcess, nullptr, aSize, MEM_RESERVE,
+                                 PAGE_NOACCESS, &memParam, 1));
+    };
+
+    mBase = static_cast<uint8_t*>(
+        MMPolicyBase::Reserve(::GetCurrentProcess(), mReservationSize,
+                              reserveFn, reserveWithinRangeFn, aBounds));
+
+    if (!mBase) {
+      return 0;
+    }
+
+    return mReservationSize;
+  }
+
+  bool MaybeCommitNextPage(const uint32_t aRequestedOffset,
+                           const uint32_t aRequestedLength) {
+    if (!(*this)) {
+      return false;
+    }
+
+    uint32_t limit = aRequestedOffset + aRequestedLength - 1;
+    if (limit < mCommitOffset) {
+      // No commit required
+      return true;
+    }
+
+    MOZ_DIAGNOSTIC_ASSERT(mCommitOffset < mReservationSize);
+    if (mCommitOffset >= mReservationSize) {
+      return false;
+    }
+
+    PVOID local = ::VirtualAlloc(mBase + mCommitOffset, GetPageSize(),
+                                 MEM_COMMIT, PAGE_EXECUTE_READ);
+    if (!local) {
+      return false;
+    }
+
+    mCommitOffset += GetPageSize();
+    return true;
+  }
+
+ private:
+  uint8_t* mBase;
+  uint32_t mReservationSize;
+  uint32_t mCommitOffset;
+};
+
+// This class manages in-process memory access without using functions
+// imported from kernel32.dll.  Instead, it uses functions in its own
+// function table that are provided from outside.
+class MMPolicyInProcessEarlyStage : public MMPolicyInProcessPrimitive {
+ public:
+  struct Kernel32Exports {
+    decltype(&::FlushInstructionCache) mFlushInstructionCache;
+    decltype(&::GetModuleHandleW) mGetModuleHandleW;
+    decltype(&::GetSystemInfo) mGetSystemInfo;
+    decltype(&::VirtualProtect) mVirtualProtect;
+  };
+
+ private:
+  static DWORD GetPageSize(const Kernel32Exports& aK32Exports) {
+    SYSTEM_INFO sysInfo;
+    aK32Exports.mGetSystemInfo(&sysInfo);
+    return sysInfo.dwPageSize;
+  }
+
+  const Kernel32Exports& mK32Exports;
+  const DWORD mPageSize;
+
+ public:
+  explicit MMPolicyInProcessEarlyStage(const Kernel32Exports& aK32Exports)
+      : mK32Exports(aK32Exports), mPageSize(GetPageSize(mK32Exports)) {}
+
+  // The pattern of constructing a local static variable with a lambda,
+  // which can be seen in MMPolicyBase, is compiled into code with the
+  // critical section APIs like EnterCriticalSection imported from kernel32.dll.
+  // Because this class needs to be able to run in a process's early stage
+  // when IAT is not yet resolved, we cannot use that patten, thus simply
+  // caching a value as a local member in the class.
+  DWORD GetPageSize() const { return mPageSize; }
+
+  bool Protect(void* aVAddress, size_t aSize, uint32_t aProtFlags,
+               uint32_t* aPrevProtFlags) const {
+    return ProtectInternal(mK32Exports.mVirtualProtect, aVAddress, aSize,
+                           aProtFlags, aPrevProtFlags);
+  }
+
+  bool FlushInstructionCache() const {
+    const HANDLE kCurrentProcess = reinterpret_cast<HANDLE>(-1);
+    return !!mK32Exports.mFlushInstructionCache(kCurrentProcess, nullptr, 0);
+  }
+};
+
+class MMPolicyOutOfProcess : public MMPolicyBase {
+ public:
+  typedef MMPolicyOutOfProcess MMPolicyT;
+
+  explicit MMPolicyOutOfProcess(HANDLE aProcess)
+      : mProcess(nullptr),
+        mMapping(nullptr),
+        mLocalView(nullptr),
+        mRemoteView(nullptr),
+        mReservationSize(0),
+        mCommitOffset(0) {
+    MOZ_ASSERT(aProcess);
+    ::DuplicateHandle(::GetCurrentProcess(), aProcess, ::GetCurrentProcess(),
+                      &mProcess, kAccessFlags, FALSE, 0);
+    MOZ_ASSERT(mProcess);
+  }
+
+  explicit MMPolicyOutOfProcess(DWORD aPid)
+      : mProcess(::OpenProcess(kAccessFlags, FALSE, aPid)),
+        mMapping(nullptr),
+        mLocalView(nullptr),
+        mRemoteView(nullptr),
+        mReservationSize(0),
+        mCommitOffset(0) {
+    MOZ_ASSERT(mProcess);
+  }
+
+  ~MMPolicyOutOfProcess() { Destroy(); }
+
+  MMPolicyOutOfProcess(MMPolicyOutOfProcess&& aOther)
+      : mProcess(nullptr),
+        mMapping(nullptr),
+        mLocalView(nullptr),
+        mRemoteView(nullptr),
+        mReservationSize(0),
+        mCommitOffset(0) {
+    *this = std::move(aOther);
+  }
+
+  MMPolicyOutOfProcess(const MMPolicyOutOfProcess& aOther) = delete;
+  MMPolicyOutOfProcess& operator=(const MMPolicyOutOfProcess&) = delete;
+
+  MMPolicyOutOfProcess& operator=(MMPolicyOutOfProcess&& aOther) {
+    Destroy();
+
+    mProcess = aOther.mProcess;
+    aOther.mProcess = nullptr;
+
+    mMapping = aOther.mMapping;
+    aOther.mMapping = nullptr;
+
+    mLocalView = aOther.mLocalView;
+    aOther.mLocalView = nullptr;
+
+    mRemoteView = aOther.mRemoteView;
+    aOther.mRemoteView = nullptr;
+
+    mReservationSize = aOther.mReservationSize;
+    aOther.mReservationSize = 0;
+
+    mCommitOffset = aOther.mCommitOffset;
+    aOther.mCommitOffset = 0;
+
+    return *this;
+  }
+
+  explicit operator bool() const {
+    return mProcess && mMapping && mLocalView && mRemoteView;
+  }
+
+  bool ShouldUnhookUponDestruction() const {
+    // We don't clean up hooks for remote processes; they are expected to
+    // outlive our process.
+    return false;
+  }
+
+  // This function reads as many bytes as |aLen| from the target process and
+  // succeeds only when the entire area to be read is accessible.
+  bool Read(void* aToPtr, const void* aFromPtr, size_t aLen) const {
+    MOZ_ASSERT(mProcess);
+    if (!mProcess) {
+      return false;
+    }
+
+    SIZE_T numBytes = 0;
+    BOOL ok = ::ReadProcessMemory(mProcess, aFromPtr, aToPtr, aLen, &numBytes);
+    return ok && numBytes == aLen;
+  }
+
+  // This function reads as many bytes as possible from the target process up
+  // to |aLen| bytes and returns the number of bytes which was actually read.
+  size_t TryRead(void* aToPtr, const void* aFromPtr, size_t aLen) const {
+    MOZ_ASSERT(mProcess);
+    if (!mProcess) {
+      return 0;
+    }
+
+    uint32_t pageSize = GetPageSize();
+    uintptr_t pageMask = pageSize - 1;
+
+    auto rangeStart = reinterpret_cast<uintptr_t>(aFromPtr);
+    auto rangeEnd = rangeStart + aLen;
+
+    while (rangeStart < rangeEnd) {
+      SIZE_T numBytes = 0;
+      BOOL ok = ::ReadProcessMemory(mProcess, aFromPtr, aToPtr,
+                                    rangeEnd - rangeStart, &numBytes);
+      if (ok) {
+        return numBytes;
+      }
+
+      // If ReadProcessMemory fails, try to read up to each page boundary from
+      // the end of the requested area one by one.
+      if (rangeEnd & pageMask) {
+        rangeEnd &= ~pageMask;
+      } else {
+        rangeEnd -= pageSize;
+      }
+    }
+
+    return 0;
+  }
+
+  bool Write(void* aToPtr, const void* aFromPtr, size_t aLen) const {
+    MOZ_ASSERT(mProcess);
+    if (!mProcess) {
+      return false;
+    }
+
+    SIZE_T numBytes = 0;
+    BOOL ok = ::WriteProcessMemory(mProcess, aToPtr, aFromPtr, aLen, &numBytes);
+    return ok && numBytes == aLen;
+  }
+
+  bool Protect(void* aVAddress, size_t aSize, uint32_t aProtFlags,
+               uint32_t* aPrevProtFlags) const {
+    MOZ_ASSERT(mProcess);
+    if (!mProcess) {
+      return false;
+    }
+
+    MOZ_ASSERT(aPrevProtFlags);
+    BOOL ok = ::VirtualProtectEx(mProcess, aVAddress, aSize, aProtFlags,
+                                 reinterpret_cast<PDWORD>(aPrevProtFlags));
+    if (!ok && aPrevProtFlags) {
+      // VirtualProtectEx can fail but still set valid protection flags.
+      // Let's clear those upon failure.
+      *aPrevProtFlags = 0;
+    }
+
+    return !!ok;
+  }
+
+  /**
+   * @return true if the page that hosts aVAddress is accessible.
+   */
+  bool IsPageAccessible(uintptr_t aVAddress) const {
+    MEMORY_BASIC_INFORMATION mbi;
+    SIZE_T result = nt::VirtualQueryEx(
+        mProcess, reinterpret_cast<LPCVOID>(aVAddress), &mbi, sizeof(mbi));
+
+    return result && mbi.AllocationProtect && mbi.State == MEM_COMMIT &&
+           mbi.Protect != PAGE_NOACCESS;
+  }
+
+  bool FlushInstructionCache() const {
+    return !!::FlushInstructionCache(mProcess, nullptr, 0);
+  }
+
+  static DWORD GetTrampWriteProtFlags() { return PAGE_READWRITE; }
+
+#if defined(_M_X64)
+  bool IsTrampolineSpaceInLowest2GB() const {
+    return (GetRemoteView() + mReservationSize) <= 0x0000000080000000ULL;
+  }
+#endif  // defined(_M_X64)
+
+ protected:
+  uint8_t* GetLocalView() const { return mLocalView; }
+
+  uintptr_t GetRemoteView() const {
+    return reinterpret_cast<uintptr_t>(mRemoteView);
+  }
+
+  /**
+   * @return the effective number of bytes reserved, or 0 on failure
+   */
+  uint32_t Reserve(const uint32_t aSize,
+                   const Maybe<Span<const uint8_t>>& aBounds) {
+    if (!aSize || !mProcess) {
+      SetLastDetourError(MMPOLICY_RESERVE_INVALIDARG);
+      return 0;
+    }
+
+    if (mRemoteView) {
+      MOZ_ASSERT(mReservationSize >= aSize);
+      SetLastDetourError(MMPOLICY_RESERVE_ZERO_RESERVATIONSIZE);
+      return mReservationSize;
+    }
+
+    mReservationSize = ComputeAllocationSize(aSize);
+
+    mMapping = ::CreateFileMappingW(INVALID_HANDLE_VALUE, nullptr,
+                                    PAGE_EXECUTE_READWRITE | SEC_RESERVE, 0,
+                                    mReservationSize, nullptr);
+    if (!mMapping) {
+      SetLastDetourError(MMPOLICY_RESERVE_CREATEFILEMAPPING, ::GetLastError());
+      return 0;
+    }
+
+    mLocalView = static_cast<uint8_t*>(
+        ::MapViewOfFile(mMapping, FILE_MAP_WRITE, 0, 0, 0));
+    if (!mLocalView) {
+      SetLastDetourError(MMPOLICY_RESERVE_MAPVIEWOFFILE, ::GetLastError());
+      return 0;
+    }
+
+    auto reserveFn = [mapping = mMapping](HANDLE aProcess, PVOID aBase,
+                                          uint32_t aSize) -> PVOID {
+      return mozilla::MapRemoteViewOfFile(mapping, aProcess, 0ULL, aBase, 0, 0,
+                                          PAGE_EXECUTE_READ);
+    };
+
+    auto reserveWithinRangeFn =
+        [mapping = mMapping](HANDLE aProcess, uint32_t aSize,
+                             const uint8_t* aRangeMin,
+                             const uint8_t* aRangeMaxExcl) -> Maybe<PVOID> {
+      static const StaticDynamicallyLinkedFunctionPtr<decltype(
+          &::MapViewOfFile3)>
+          pMapViewOfFile3(L"kernelbase.dll", "MapViewOfFile3");
+      if (!pMapViewOfFile3) {
+        return Nothing();
+      }
+
+      // NB: MEM_ADDRESS_REQUIREMENTS::HighestEndingAddress is *inclusive*
+      MEM_ADDRESS_REQUIREMENTS memReq = {
+          const_cast<uint8_t*>(aRangeMin),
+          const_cast<uint8_t*>(aRangeMaxExcl - 1)};
+
+      MEM_EXTENDED_PARAMETER memParam = {};
+      memParam.Type = MemExtendedParameterAddressRequirements;
+      memParam.Pointer = &memReq;
+
+      return Some(pMapViewOfFile3(mapping, aProcess, nullptr, 0, aSize, 0,
+                                  PAGE_EXECUTE_READ, &memParam, 1));
+    };
+
+    mRemoteView = MMPolicyBase::Reserve(mProcess, mReservationSize, reserveFn,
+                                        reserveWithinRangeFn, aBounds);
+    if (!mRemoteView) {
+      return 0;
+    }
+
+    return mReservationSize;
+  }
+
+  bool MaybeCommitNextPage(const uint32_t aRequestedOffset,
+                           const uint32_t aRequestedLength) {
+    if (!(*this)) {
+      return false;
+    }
+
+    uint32_t limit = aRequestedOffset + aRequestedLength - 1;
+    if (limit < mCommitOffset) {
+      // No commit required
+      return true;
+    }
+
+    MOZ_DIAGNOSTIC_ASSERT(mCommitOffset < mReservationSize);
+    if (mCommitOffset >= mReservationSize) {
+      return false;
+    }
+
+    PVOID local = ::VirtualAlloc(mLocalView + mCommitOffset, GetPageSize(),
+                                 MEM_COMMIT, PAGE_READWRITE);
+    if (!local) {
+      return false;
+    }
+
+    PVOID remote = ::VirtualAllocEx(
+        mProcess, static_cast<uint8_t*>(mRemoteView) + mCommitOffset,
+        GetPageSize(), MEM_COMMIT, PAGE_EXECUTE_READ);
+    if (!remote) {
+      return false;
+    }
+
+    mCommitOffset += GetPageSize();
+    return true;
+  }
+
+ private:
+  void Destroy() {
+    // We always leak the remote view
+    if (mLocalView) {
+      ::UnmapViewOfFile(mLocalView);
+      mLocalView = nullptr;
+    }
+
+    if (mMapping) {
+      ::CloseHandle(mMapping);
+      mMapping = nullptr;
+    }
+
+    if (mProcess) {
+      ::CloseHandle(mProcess);
+      mProcess = nullptr;
+    }
+  }
+
+ private:
+  HANDLE mProcess;
+  HANDLE mMapping;
+  uint8_t* mLocalView;
+  PVOID mRemoteView;
+  uint32_t mReservationSize;
+  uint32_t mCommitOffset;
+
+  static const DWORD kAccessFlags = PROCESS_QUERY_INFORMATION |
+                                    PROCESS_VM_OPERATION | PROCESS_VM_READ |
+                                    PROCESS_VM_WRITE;
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // mozilla_interceptor_MMPolicies_h
diff --git a/mozglue/misc/interceptor/PatcherBase.h b/mozglue/misc/interceptor/PatcherBase.h
new file mode 100644
index 0000000000..e39a38fafd
--- /dev/null
+++ b/mozglue/misc/interceptor/PatcherBase.h
@@ -0,0 +1,141 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_PatcherBase_h
+#define mozilla_interceptor_PatcherBase_h
+
+#include "mozilla/interceptor/TargetFunction.h"
+
+namespace mozilla {
+namespace interceptor {
+
+template <typename MMPolicy>
+struct GetProcAddressSelector;
+
+template <>
+struct GetProcAddressSelector<MMPolicyOutOfProcess> {
+  FARPROC operator()(HMODULE aModule, const char* aName,
+                     const MMPolicyOutOfProcess& aMMPolicy) const {
+    auto exportSection =
+        mozilla::nt::PEExportSection<MMPolicyOutOfProcess>::Get(aModule,
+                                                                aMMPolicy);
+    return exportSection.GetProcAddress(aName);
+  }
+};
+
+template <>
+struct GetProcAddressSelector<MMPolicyInProcess> {
+  FARPROC operator()(HMODULE aModule, const char* aName,
+                     const MMPolicyInProcess&) const {
+    // PEExportSection works for MMPolicyInProcess, too, but the native
+    // GetProcAddress is still better because PEExportSection does not
+    // solve a forwarded entry.
+    return ::GetProcAddress(aModule, aName);
+  }
+};
+
+template <typename VMPolicy>
+class WindowsDllPatcherBase {
+ protected:
+  typedef typename VMPolicy::MMPolicyT MMPolicyT;
+
+  template <typename... Args>
+  explicit WindowsDllPatcherBase(Args&&... aArgs)
+      : mVMPolicy(std::forward<Args>(aArgs)...) {}
+
+  ReadOnlyTargetFunction<MMPolicyT> ResolveRedirectedAddress(
+      FARPROC aOriginalFunction) {
+    uintptr_t currAddr = reinterpret_cast<uintptr_t>(aOriginalFunction);
+
+#if defined(_M_IX86) || defined(_M_X64)
+    uintptr_t prevAddr = 0;
+    while (prevAddr != currAddr) {
+      ReadOnlyTargetFunction<MMPolicyT> currFunc(mVMPolicy, currAddr);
+      prevAddr = currAddr;
+
+      // If function entry is jmp rel8 stub to the internal implementation, we
+      // resolve redirected address from the jump target.
+      uintptr_t nextAddr = 0;
+      if (currFunc.IsRelativeShortJump(&nextAddr)) {
+        int8_t offset = nextAddr - currFunc.GetAddress() - 2;
+
+#  if defined(_M_X64)
+        // We redirect to the target of a short jump backwards if the target
+        // is another jump (only 32-bit displacement is currently supported).
+        // This case is used by GetFileAttributesW in Win7 x64.
+        if ((offset < 0) && (currFunc.IsValidAtOffset(2 + offset))) {
+          ReadOnlyTargetFunction<MMPolicyT> redirectFn(mVMPolicy, nextAddr);
+          if (redirectFn.IsIndirectNearJump(&nextAddr)) {
+            return redirectFn;
+          }
+        }
+#  endif
+
+        // We check the downstream has enough nop-space only when the offset is
+        // positive.  Otherwise we stop chasing redirects and let the caller
+        // fail to hook.
+        if (offset > 0) {
+          bool isNopSpace = true;
+          for (int8_t i = 0; i < offset; i++) {
+            if (currFunc[2 + i] != 0x90) {
+              isNopSpace = false;
+              break;
+            }
+          }
+
+          if (isNopSpace) {
+            currAddr = nextAddr;
+          }
+        }
+#  if defined(_M_X64)
+      } else if (currFunc.IsIndirectNearJump(&nextAddr) ||
+                 currFunc.IsRelativeNearJump(&nextAddr)) {
+#  else
+      } else if (currFunc.IsIndirectNearJump(&nextAddr)) {
+#  endif
+        // If function entry is jmp [disp32] such as used by kernel32, we
+        // resolve redirected address from import table. For x64, we resolve
+        // a relative near jump for TestDllInterceptor with --disable-optimize.
+        currAddr = nextAddr;
+      }
+    }
+#endif  // defined(_M_IX86) || defined(_M_X64)
+
+    if (currAddr != reinterpret_cast<uintptr_t>(aOriginalFunction) &&
+        !mVMPolicy.IsPageAccessible(currAddr)) {
+      currAddr = reinterpret_cast<uintptr_t>(aOriginalFunction);
+    }
+    return ReadOnlyTargetFunction<MMPolicyT>(mVMPolicy, currAddr);
+  }
+
+ public:
+  FARPROC GetProcAddress(HMODULE aModule, const char* aName) const {
+    GetProcAddressSelector<MMPolicyT> selector;
+    return selector(aModule, aName, mVMPolicy);
+  }
+
+  bool IsPageAccessible(uintptr_t aAddress) const {
+    return mVMPolicy.IsPageAccessible(aAddress);
+  }
+
+#if defined(NIGHTLY_BUILD)
+  const Maybe<DetourError>& GetLastDetourError() const {
+    return mVMPolicy.GetLastDetourError();
+  }
+#endif  // defined(NIGHTLY_BUILD)
+  template <typename... Args>
+  void SetLastDetourError(Args&&... aArgs) {
+    mVMPolicy.SetLastDetourError(std::forward<Args>(aArgs)...);
+  }
+
+ protected:
+  VMPolicy mVMPolicy;
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // mozilla_interceptor_PatcherBase_h
diff --git a/mozglue/misc/interceptor/PatcherDetour.h b/mozglue/misc/interceptor/PatcherDetour.h
new file mode 100644
index 0000000000..7b04a20c2b
--- /dev/null
+++ b/mozglue/misc/interceptor/PatcherDetour.h
@@ -0,0 +1,1715 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_PatcherDetour_h
+#define mozilla_interceptor_PatcherDetour_h
+
+#if defined(_M_ARM64)
+#  include "mozilla/interceptor/Arm64.h"
+#endif  // defined(_M_ARM64)
+#include <utility>
+
+#include "mozilla/Maybe.h"
+#include "mozilla/NativeNt.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/TypedEnumBits.h"
+#include "mozilla/Types.h"
+#include "mozilla/Unused.h"
+#include "mozilla/interceptor/PatcherBase.h"
+#include "mozilla/interceptor/Trampoline.h"
+#include "mozilla/interceptor/VMSharingPolicies.h"
+
+#define COPY_CODES(NBYTES)                          \
+  do {                                              \
+    tramp.CopyFrom(origBytes.GetAddress(), NBYTES); \
+    origBytes += NBYTES;                            \
+  } while (0)
+
+namespace mozilla {
+namespace interceptor {
+
+enum class DetourFlags : uint32_t {
+  eDefault = 0,
+  eEnable10BytePatch = 1,  // Allow 10-byte patches when conditions allow
+  eTestOnlyForceShortPatch =
+      2,  // Force short patches at all times (x86-64 and arm64 testing only)
+  eDontResolveRedirection =
+      4,  // Don't resolve the redirection of JMP (e.g. kernel32 -> kernelbase)
+};
+
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(DetourFlags)
+
+// This class is responsible to do tasks which depend on MMPolicy, decoupled
+// from VMPolicy.  We already have WindowsDllPatcherBase, but it needs to
+// depend on VMPolicy to hold an instance of VMPolicy as a member.
+template <typename MMPolicyT>
+class WindowsDllDetourPatcherPrimitive {
+ protected:
+#if defined(_M_ARM64)
+  // LDR x16, .+8
+  static const uint32_t kLdrX16Plus8 = 0x58000050U;
+#endif  // defined(_M_ARM64)
+
+  static void ApplyDefaultPatch(WritableTargetFunction<MMPolicyT>& target,
+                                intptr_t aDest) {
+#if defined(_M_IX86)
+    target.WriteByte(0xe9);     // jmp
+    target.WriteDisp32(aDest);  // hook displacement
+#elif defined(_M_X64)
+    // mov r11, address
+    target.WriteByte(0x49);
+    target.WriteByte(0xbb);
+    target.WritePointer(aDest);
+
+    // jmp r11
+    target.WriteByte(0x41);
+    target.WriteByte(0xff);
+    target.WriteByte(0xe3);
+#elif defined(_M_ARM64)
+    // The default patch requires 16 bytes
+    // LDR x16, .+8
+    target.WriteLong(kLdrX16Plus8);
+    // BR x16
+    target.WriteLong(arm64::BuildUnconditionalBranchToRegister(16));
+    target.WritePointer(aDest);
+#else
+#  error "Unsupported processor architecture"
+#endif
+  }
+
+ public:
+  constexpr static uint32_t GetWorstCaseRequiredBytesToPatch() {
+#if defined(_M_IX86)
+    return 5;
+#elif defined(_M_X64)
+    return 13;
+#elif defined(_M_ARM64)
+    return 16;
+#else
+#  error "Unsupported processor architecture"
+#endif
+  }
+
+  WindowsDllDetourPatcherPrimitive() = default;
+
+  WindowsDllDetourPatcherPrimitive(const WindowsDllDetourPatcherPrimitive&) =
+      delete;
+  WindowsDllDetourPatcherPrimitive(WindowsDllDetourPatcherPrimitive&&) = delete;
+  WindowsDllDetourPatcherPrimitive& operator=(
+      const WindowsDllDetourPatcherPrimitive&) = delete;
+  WindowsDllDetourPatcherPrimitive& operator=(
+      WindowsDllDetourPatcherPrimitive&&) = delete;
+
+  bool AddIrreversibleHook(const MMPolicyT& aMMPolicy, FARPROC aTargetFn,
+                           intptr_t aHookDest) {
+    ReadOnlyTargetFunction<MMPolicyT> targetReadOnly(aMMPolicy, aTargetFn);
+
+    WritableTargetFunction<MMPolicyT> targetWritable(
+        targetReadOnly.Promote(GetWorstCaseRequiredBytesToPatch()));
+    if (!targetWritable) {
+      return false;
+    }
+
+    ApplyDefaultPatch(targetWritable, aHookDest);
+
+    return targetWritable.Commit();
+  }
+};
+
+template <typename VMPolicy>
+class WindowsDllDetourPatcher final
+    : public WindowsDllDetourPatcherPrimitive<typename VMPolicy::MMPolicyT>,
+      public WindowsDllPatcherBase<VMPolicy> {
+  using MMPolicyT = typename VMPolicy::MMPolicyT;
+  using TrampPoolT = typename VMPolicy::PoolType;
+  using PrimitiveT = WindowsDllDetourPatcherPrimitive<MMPolicyT>;
+  Maybe<DetourFlags> mFlags;
+
+ public:
+  template <typename... Args>
+  explicit WindowsDllDetourPatcher(Args&&... aArgs)
+      : WindowsDllPatcherBase<VMPolicy>(std::forward<Args>(aArgs)...) {}
+
+  ~WindowsDllDetourPatcher() { Clear(); }
+
+  WindowsDllDetourPatcher(const WindowsDllDetourPatcher&) = delete;
+  WindowsDllDetourPatcher(WindowsDllDetourPatcher&&) = delete;
+  WindowsDllDetourPatcher& operator=(const WindowsDllDetourPatcher&) = delete;
+  WindowsDllDetourPatcher& operator=(WindowsDllDetourPatcher&&) = delete;
+
+  void Clear() {
+    if (!this->mVMPolicy.ShouldUnhookUponDestruction()) {
+      return;
+    }
+
+#if defined(_M_IX86)
+    size_t nBytes = 1 + sizeof(intptr_t);
+#elif defined(_M_X64)
+    size_t nBytes = 2 + sizeof(intptr_t);
+#elif defined(_M_ARM64)
+    size_t nBytes = 2 * sizeof(uint32_t) + sizeof(uintptr_t);
+#else
+#  error "Unknown processor type"
+#endif
+
+    const auto& tramps = this->mVMPolicy.Items();
+    for (auto&& tramp : tramps) {
+      // First we read the pointer to the interceptor instance.
+      Maybe<uintptr_t> instance = tramp.ReadEncodedPointer();
+      if (!instance) {
+        continue;
+      }
+
+      if (instance.value() != reinterpret_cast<uintptr_t>(this)) {
+        // tramp does not belong to this interceptor instance.
+        continue;
+      }
+
+      auto clearInstance = MakeScopeExit([&tramp]() -> void {
+        // Clear the instance pointer so that no future instances with the same
+        // |this| pointer will attempt to reset its hook.
+        tramp.Rewind();
+        tramp.WriteEncodedPointer(nullptr);
+      });
+
+      // Now we read the pointer to the intercepted function.
+      Maybe<uintptr_t> interceptedFn = tramp.ReadEncodedPointer();
+      if (!interceptedFn) {
+        continue;
+      }
+
+      WritableTargetFunction<MMPolicyT> origBytes(
+          this->mVMPolicy, interceptedFn.value(), nBytes);
+      if (!origBytes) {
+        continue;
+      }
+
+#if defined(_M_IX86) || defined(_M_X64)
+
+      Maybe<uint8_t> maybeOpcode1 = origBytes.ReadByte();
+      if (!maybeOpcode1) {
+        continue;
+      }
+
+      uint8_t opcode1 = maybeOpcode1.value();
+
+#  if defined(_M_IX86)
+      // Ensure the JMP from CreateTrampoline is where we expect it to be.
+      MOZ_ASSERT(opcode1 == 0xE9);
+      if (opcode1 != 0xE9) {
+        continue;
+      }
+
+      intptr_t startOfTrampInstructions =
+          static_cast<intptr_t>(tramp.GetCurrentRemoteAddress());
+
+      origBytes.WriteDisp32(startOfTrampInstructions);
+      if (!origBytes) {
+        continue;
+      }
+
+      origBytes.Commit();
+#  elif defined(_M_X64)
+      if (opcode1 == 0x49) {
+        if (!Clear13BytePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
+          continue;
+        }
+      } else if (opcode1 == 0xB8) {
+        if (!Clear10BytePatch(origBytes)) {
+          continue;
+        }
+      } else if (opcode1 == 0x48) {
+        // The original function was just a different trampoline
+        if (!ClearTrampolinePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
+          continue;
+        }
+      } else {
+        MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
+        continue;
+      }
+#  endif
+
+#elif defined(_M_ARM64)
+
+      // Ensure that we see the instruction that we expect
+      Maybe<uint32_t> inst1 = origBytes.ReadLong();
+      if (!inst1) {
+        continue;
+      }
+
+      if (inst1.value() == this->kLdrX16Plus8) {
+        if (!Clear16BytePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
+          continue;
+        }
+      } else if (arm64::IsUnconditionalBranchImm(inst1.value())) {
+        if (!Clear4BytePatch(inst1.value(), origBytes)) {
+          continue;
+        }
+      } else {
+        MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
+        continue;
+      }
+
+#else
+#  error "Unknown processor type"
+#endif
+    }
+
+    this->mVMPolicy.Clear();
+  }
+
+#if defined(_M_X64)
+  bool Clear13BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
+                        const uintptr_t aResetToAddress) {
+    Maybe<uint8_t> maybeOpcode2 = aOrigBytes.ReadByte();
+    if (!maybeOpcode2) {
+      return false;
+    }
+
+    uint8_t opcode2 = maybeOpcode2.value();
+    if (opcode2 != 0xBB) {
+      return false;
+    }
+
+    aOrigBytes.WritePointer(aResetToAddress);
+    if (!aOrigBytes) {
+      return false;
+    }
+
+    return aOrigBytes.Commit();
+  }
+
+  bool ClearTrampolinePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
+                            const uintptr_t aPtrToResetToAddress) {
+    // The target of the trampoline we replaced is stored at
+    // aPtrToResetToAddress. We simply put it back where we got it from.
+    Maybe<uint8_t> maybeOpcode2 = aOrigBytes.ReadByte();
+    if (!maybeOpcode2) {
+      return false;
+    }
+
+    uint8_t opcode2 = maybeOpcode2.value();
+    if (opcode2 != 0xB8) {
+      return false;
+    }
+
+    auto oldPtr = *(reinterpret_cast<const uintptr_t*>(aPtrToResetToAddress));
+
+    aOrigBytes.WritePointer(oldPtr);
+    if (!aOrigBytes) {
+      return false;
+    }
+
+    return aOrigBytes.Commit();
+  }
+
+  bool Clear10BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes) {
+    Maybe<uint32_t> maybePtr32 = aOrigBytes.ReadLong();
+    if (!maybePtr32) {
+      return false;
+    }
+
+    uint32_t ptr32 = maybePtr32.value();
+    // We expect the high bit to be clear
+    if (ptr32 & 0x80000000) {
+      return false;
+    }
+
+    uintptr_t trampPtr = ptr32;
+
+    // trampPtr points to an intermediate trampoline that contains a 13-byte
+    // patch. We back up by sizeof(uintptr_t) so that we can access the pointer
+    // to the stub trampoline.
+    WritableTargetFunction<MMPolicyT> writableIntermediate(
+        this->mVMPolicy, trampPtr - sizeof(uintptr_t), 13 + sizeof(uintptr_t));
+    if (!writableIntermediate) {
+      return false;
+    }
+
+    Maybe<uintptr_t> stubTramp = writableIntermediate.ReadEncodedPtr();
+    if (!stubTramp || !stubTramp.value()) {
+      return false;
+    }
+
+    Maybe<uint8_t> maybeOpcode1 = writableIntermediate.ReadByte();
+    if (!maybeOpcode1) {
+      return false;
+    }
+
+    // We expect this opcode to be the beginning of our normal mov r11, ptr
+    // patch sequence.
+    uint8_t opcode1 = maybeOpcode1.value();
+    if (opcode1 != 0x49) {
+      return false;
+    }
+
+    // Now we can just delegate the rest to our normal 13-byte patch clearing.
+    return Clear13BytePatch(writableIntermediate, stubTramp.value());
+  }
+#endif  // defined(_M_X64)
+
+#if defined(_M_ARM64)
+  bool Clear4BytePatch(const uint32_t aBranchImm,
+                       WritableTargetFunction<MMPolicyT>& aOrigBytes) {
+    MOZ_ASSERT(arm64::IsUnconditionalBranchImm(aBranchImm));
+
+    arm64::LoadOrBranch decoded = arm64::BUncondImmDecode(
+        aOrigBytes.GetCurrentAddress() - sizeof(uint32_t), aBranchImm);
+
+    uintptr_t trampPtr = decoded.mAbsAddress;
+
+    // trampPtr points to an intermediate trampoline that contains a veneer.
+    // We back up by sizeof(uintptr_t) so that we can access the pointer to the
+    // stub trampoline.
+
+    // We want trampLen to be the size of the veneer, plus one pointer (since
+    // we are backing up trampPtr by one pointer)
+    size_t trampLen = 16 + sizeof(uintptr_t);
+
+    WritableTargetFunction<MMPolicyT> writableIntermediate(
+        this->mVMPolicy, trampPtr - sizeof(uintptr_t), trampLen);
+    if (!writableIntermediate) {
+      return false;
+    }
+
+    Maybe<uintptr_t> stubTramp = writableIntermediate.ReadEncodedPtr();
+    if (!stubTramp || !stubTramp.value()) {
+      return false;
+    }
+
+    Maybe<uint32_t> inst1 = writableIntermediate.ReadLong();
+    if (!inst1 || inst1.value() != this->kLdrX16Plus8) {
+      return false;
+    }
+
+    return Clear16BytePatch(writableIntermediate, stubTramp.value());
+  }
+
+  bool Clear16BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
+                        const uintptr_t aResetToAddress) {
+    Maybe<uint32_t> inst2 = aOrigBytes.ReadLong();
+    if (!inst2) {
+      return false;
+    }
+
+    if (inst2.value() != arm64::BuildUnconditionalBranchToRegister(16)) {
+      MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
+      return false;
+    }
+
+    // Clobber the pointer to our hook function with a pointer to the
+    // start of the trampoline.
+    aOrigBytes.WritePointer(aResetToAddress);
+    aOrigBytes.Commit();
+
+    return true;
+  }
+#endif  // defined(_M_ARM64)
+
+  void Init(DetourFlags aFlags = DetourFlags::eDefault) {
+    if (Initialized()) {
+      return;
+    }
+
+#if defined(_M_X64)
+    if (aFlags & DetourFlags::eTestOnlyForceShortPatch) {
+      aFlags |= DetourFlags::eEnable10BytePatch;
+    }
+#endif  // defined(_M_X64)
+
+    mFlags = Some(aFlags);
+  }
+
+  bool Initialized() const { return mFlags.isSome(); }
+
+  bool AddHook(FARPROC aTargetFn, intptr_t aHookDest, void** aOrigFunc) {
+    ReadOnlyTargetFunction<MMPolicyT> target(
+        (mFlags.value() & DetourFlags::eDontResolveRedirection)
+            ? ReadOnlyTargetFunction<MMPolicyT>(
+                  this->mVMPolicy, reinterpret_cast<uintptr_t>(aTargetFn))
+            : this->ResolveRedirectedAddress(aTargetFn));
+
+    TrampPoolT* trampPool = nullptr;
+
+#if defined(_M_ARM64)
+    // ARM64 uses two passes to build its trampoline. The first pass uses a
+    // null tramp to determine how many bytes are needed. Once that is known,
+    // CreateTrampoline calls itself recursively with a "real" tramp.
+    Trampoline<MMPolicyT> tramp(nullptr);
+#else
+    Maybe<TrampPoolT> maybeTrampPool = DoReserve();
+    MOZ_ASSERT(maybeTrampPool);
+    if (!maybeTrampPool) {
+      return false;
+    }
+
+    trampPool = maybeTrampPool.ptr();
+
+    Maybe<Trampoline<MMPolicyT>> maybeTramp(trampPool->GetNextTrampoline());
+    if (!maybeTramp) {
+      this->SetLastDetourError(
+          DetourResultCode::DETOUR_PATCHER_NEXT_TRAMPOLINE_ERROR);
+      return false;
+    }
+
+    Trampoline<MMPolicyT> tramp(std::move(maybeTramp.ref()));
+#endif
+
+    CreateTrampoline(target, trampPool, tramp, aHookDest, aOrigFunc);
+    if (!*aOrigFunc) {
+      return false;
+    }
+
+    return true;
+  }
+
+ private:
+  /**
+   * This function returns a maximum distance that can be reached by a single
+   * unconditional jump instruction. This is dependent on the processor ISA.
+   * Note that this distance is *exclusive* when added to the pivot, so the
+   * distance returned by this function is actually
+   * (maximum_absolute_offset + 1).
+   */
+  static uint32_t GetDefaultPivotDistance() {
+#if defined(_M_ARM64)
+    // Immediate unconditional branch allows for +/- 128MB
+    return 0x08000000U;
+#elif defined(_M_IX86) || defined(_M_X64)
+    // For these ISAs, our distance will assume the use of an unconditional jmp
+    // with a 32-bit signed displacement.
+    return 0x80000000U;
+#else
+#  error "Not defined for this processor arch"
+#endif
+  }
+
+  /**
+   * If we're reserving trampoline space for a specific module, we base the
+   * pivot off of the median address of the module's .text section. While this
+   * may not be precise, it should be accurate enough for our purposes: To
+   * ensure that the trampoline space is reachable by any executable code in the
+   * module.
+   */
+  Maybe<TrampPoolT> ReserveForModule(HMODULE aModule) {
+    nt::PEHeaders moduleHeaders(aModule);
+    if (!moduleHeaders) {
+      this->SetLastDetourError(
+          DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_PE_ERROR);
+      return Nothing();
+    }
+
+    Maybe<Span<const uint8_t>> textSectionInfo =
+        moduleHeaders.GetTextSectionInfo();
+    if (!textSectionInfo) {
+      this->SetLastDetourError(
+          DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_TEXT_ERROR);
+      return Nothing();
+    }
+
+    const uint8_t* median = textSectionInfo.value().data() +
+                            (textSectionInfo.value().LengthBytes() / 2);
+
+    Maybe<TrampPoolT> maybeTrampPool = this->mVMPolicy.Reserve(
+        reinterpret_cast<uintptr_t>(median), GetDefaultPivotDistance());
+    if (!maybeTrampPool) {
+      this->SetLastDetourError(
+          DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_RESERVE_ERROR);
+    }
+    return maybeTrampPool;
+  }
+
+  Maybe<TrampPoolT> DoReserve(HMODULE aModule = nullptr) {
+    if (aModule) {
+      return ReserveForModule(aModule);
+    }
+
+    uintptr_t pivot = 0;
+    uint32_t distance = 0;
+
+#if defined(_M_X64)
+    if (mFlags.value() & DetourFlags::eEnable10BytePatch) {
+      // We must stay below the 2GB mark because a 10-byte patch uses movsxd
+      // (ie, sign extension) to expand the pointer to 64-bits, so bit 31 of any
+      // pointers into the reserved region must be 0.
+      pivot = 0x40000000U;
+      distance = 0x40000000U;
+    }
+#endif  // defined(_M_X64)
+
+    Maybe<TrampPoolT> maybeTrampPool = this->mVMPolicy.Reserve(pivot, distance);
+#if defined(NIGHTLY_BUILD)
+    if (!maybeTrampPool && this->GetLastDetourError().isNothing()) {
+      this->SetLastDetourError(
+          DetourResultCode::DETOUR_PATCHER_DO_RESERVE_ERROR);
+    }
+#endif  // defined(NIGHTLY_BUILD)
+    return maybeTrampPool;
+  }
+
+ protected:
+#if !defined(_M_ARM64)
+
+  const static int kPageSize = 4096;
+
+  // rex bits
+  static const BYTE kMaskHighNibble = 0xF0;
+  static const BYTE kRexOpcode = 0x40;
+  static const BYTE kMaskRexW = 0x08;
+  static const BYTE kMaskRexR = 0x04;
+  static const BYTE kMaskRexX = 0x02;
+  static const BYTE kMaskRexB = 0x01;
+
+  // mod r/m bits
+  static const BYTE kRegFieldShift = 3;
+  static const BYTE kMaskMod = 0xC0;
+  static const BYTE kMaskReg = 0x38;
+  static const BYTE kMaskRm = 0x07;
+  static const BYTE kRmNeedSib = 0x04;
+  static const BYTE kModReg = 0xC0;
+  static const BYTE kModDisp32 = 0x80;
+  static const BYTE kModDisp8 = 0x40;
+  static const BYTE kModNoRegDisp = 0x00;
+  static const BYTE kRmNoRegDispDisp32 = 0x05;
+
+  // sib bits
+  static const BYTE kMaskSibScale = 0xC0;
+  static const BYTE kMaskSibIndex = 0x38;
+  static const BYTE kMaskSibBase = 0x07;
+  static const BYTE kSibBaseEbp = 0x05;
+
+  // Register bit IDs.
+  static const BYTE kRegAx = 0x0;
+  static const BYTE kRegCx = 0x1;
+  static const BYTE kRegDx = 0x2;
+  static const BYTE kRegBx = 0x3;
+  static const BYTE kRegSp = 0x4;
+  static const BYTE kRegBp = 0x5;
+  static const BYTE kRegSi = 0x6;
+  static const BYTE kRegDi = 0x7;
+
+  // Special ModR/M codes.  These indicate operands that cannot be simply
+  // memcpy-ed.
+  // Operand is a 64-bit RIP-relative address.
+  static const int kModOperand64 = -2;
+  // Operand is not yet handled by our trampoline.
+  static const int kModUnknown = -1;
+
+  /**
+   * Returns the number of bytes taken by the ModR/M byte, SIB (if present)
+   * and the instruction's operand.  In special cases, the special MODRM codes
+   * above are returned.
+   * aModRm points to the ModR/M byte of the instruction.
+   * On return, aSubOpcode (if present) is filled with the subopcode/register
+   * code found in the ModR/M byte.
+   */
+  int CountModRmSib(const ReadOnlyTargetFunction<MMPolicyT>& aModRm,
+                    BYTE* aSubOpcode = nullptr) {
+    int numBytes = 1;  // Start with 1 for mod r/m byte itself
+    switch (*aModRm & kMaskMod) {
+      case kModReg:
+        return numBytes;
+      case kModDisp8:
+        numBytes += 1;
+        break;
+      case kModDisp32:
+        numBytes += 4;
+        break;
+      case kModNoRegDisp:
+        if ((*aModRm & kMaskRm) == kRmNoRegDispDisp32) {
+#  if defined(_M_X64)
+          if (aSubOpcode) {
+            *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
+          }
+          return kModOperand64;
+#  else
+          // On IA-32, all ModR/M instruction modes address memory relative to 0
+          numBytes += 4;
+#  endif
+        } else if (((*aModRm & kMaskRm) == kRmNeedSib &&
+                    (*(aModRm + 1) & kMaskSibBase) == kSibBaseEbp)) {
+          numBytes += 4;
+        }
+        break;
+      default:
+        // This should not be reachable
+        MOZ_ASSERT_UNREACHABLE("Impossible value for modr/m byte mod bits");
+        return kModUnknown;
+    }
+    if ((*aModRm & kMaskRm) == kRmNeedSib) {
+      // SIB byte
+      numBytes += 1;
+    }
+    if (aSubOpcode) {
+      *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
+    }
+    return numBytes;
+  }
+
+#  if defined(_M_X64)
+  enum class JumpType{Je, Jne, Jae, Jmp, Call};
+
+  static bool GenerateJump(Trampoline<MMPolicyT>& aTramp,
+                           uintptr_t aAbsTargetAddress, const JumpType aType) {
+    // Near call, absolute indirect, address given in r/m32
+    if (aType == JumpType::Call) {
+      // CALL [RIP+0]
+      aTramp.WriteByte(0xff);
+      aTramp.WriteByte(0x15);
+      // The offset to jump destination -- 2 bytes after the current position.
+      aTramp.WriteInteger(2);
+      aTramp.WriteByte(0xeb);  // JMP + 8 (jump over target address)
+      aTramp.WriteByte(8);
+      aTramp.WritePointer(aAbsTargetAddress);
+      return !!aTramp;
+    }
+
+    // Write an opposite conditional jump because the destination branches
+    // are swapped.
+    if (aType == JumpType::Je) {
+      // JNE RIP+14
+      aTramp.WriteByte(0x75);
+      aTramp.WriteByte(14);
+    } else if (aType == JumpType::Jne) {
+      // JE RIP+14
+      aTramp.WriteByte(0x74);
+      aTramp.WriteByte(14);
+    } else if (aType == JumpType::Jae) {
+      // JB RIP+14
+      aTramp.WriteByte(0x72);
+      aTramp.WriteByte(14);
+    }
+
+    // Near jmp, absolute indirect, address given in r/m32
+    // JMP [RIP+0]
+    aTramp.WriteByte(0xff);
+    aTramp.WriteByte(0x25);
+    // The offset to jump destination is 0
+    aTramp.WriteInteger(0);
+    aTramp.WritePointer(aAbsTargetAddress);
+
+    return !!aTramp;
+  }
+#  endif
+
+  enum ePrefixGroupBits{eNoPrefixes = 0, ePrefixGroup1 = (1 << 0),
+                        ePrefixGroup2 = (1 << 1), ePrefixGroup3 = (1 << 2),
+                        ePrefixGroup4 = (1 << 3)};
+
+  int CountPrefixBytes(const ReadOnlyTargetFunction<MMPolicyT>& aBytes,
+                       unsigned char* aOutGroupBits) {
+    unsigned char& groupBits = *aOutGroupBits;
+    groupBits = eNoPrefixes;
+    int index = 0;
+    while (true) {
+      switch (aBytes[index]) {
+        // Group 1
+        case 0xF0:  // LOCK
+        case 0xF2:  // REPNZ
+        case 0xF3:  // REP / REPZ
+          if (groupBits & ePrefixGroup1) {
+            return -1;
+          }
+          groupBits |= ePrefixGroup1;
+          ++index;
+          break;
+
+        // Group 2
+        case 0x2E:  // CS override / branch not taken
+        case 0x36:  // SS override
+        case 0x3E:  // DS override / branch taken
+        case 0x64:  // FS override
+        case 0x65:  // GS override
+          if (groupBits & ePrefixGroup2) {
+            return -1;
+          }
+          groupBits |= ePrefixGroup2;
+          ++index;
+          break;
+
+        // Group 3
+        case 0x66:  // operand size override
+          if (groupBits & ePrefixGroup3) {
+            return -1;
+          }
+          groupBits |= ePrefixGroup3;
+          ++index;
+          break;
+
+        // Group 4
+        case 0x67:  // Address size override
+          if (groupBits & ePrefixGroup4) {
+            return -1;
+          }
+          groupBits |= ePrefixGroup4;
+          ++index;
+          break;
+
+        default:
+          return index;
+      }
+    }
+  }
+
+  // Return a ModR/M byte made from the 2 Mod bits, the register used for the
+  // reg bits and the register used for the R/M bits.
+  BYTE BuildModRmByte(BYTE aModBits, BYTE aReg, BYTE aRm) {
+    MOZ_ASSERT((aRm & kMaskRm) == aRm);
+    MOZ_ASSERT((aModBits & kMaskMod) == aModBits);
+    MOZ_ASSERT(((aReg << kRegFieldShift) & kMaskReg) ==
+               (aReg << kRegFieldShift));
+    return aModBits | (aReg << kRegFieldShift) | aRm;
+  }
+
+#endif  // !defined(_M_ARM64)
+
+  // If originalFn is a recognized trampoline then patch it to call aDest,
+  // set *aTramp and *aOutTramp to that trampoline's target and return true.
+  bool PatchIfTargetIsRecognizedTrampoline(
+      Trampoline<MMPolicyT>& aTramp,
+      ReadOnlyTargetFunction<MMPolicyT>& aOriginalFn, intptr_t aDest,
+      void** aOutTramp) {
+#if defined(_M_X64)
+    // Variation 1:
+    // 48 b8 imm64  mov rax, imm64
+    // ff e0        jmp rax
+    //
+    // Variation 2:
+    // 48 b8 imm64  mov rax, imm64
+    // 50           push rax
+    // c3           ret
+    if ((aOriginalFn[0] == 0x48) && (aOriginalFn[1] == 0xB8) &&
+        ((aOriginalFn[10] == 0xFF && aOriginalFn[11] == 0xE0) ||
+         (aOriginalFn[10] == 0x50 && aOriginalFn[11] == 0xC3))) {
+      uintptr_t originalTarget =
+          (aOriginalFn + 2).template ChasePointer<uintptr_t>();
+
+      // Skip the first two bytes (48 b8) so that we can overwrite the imm64
+      WritableTargetFunction<MMPolicyT> target(aOriginalFn.Promote(8, 2));
+      if (!target) {
+        return false;
+      }
+
+      // Write the new JMP target address.
+      target.WritePointer(aDest);
+      if (!target.Commit()) {
+        return false;
+      }
+
+      // Store the old target address so we can restore it when we're cleared
+      aTramp.WritePointer(originalTarget);
+      if (!aTramp) {
+        return false;
+      }
+
+      *aOutTramp = reinterpret_cast<void*>(originalTarget);
+      return true;
+    }
+#endif  // defined(_M_X64)
+
+    return false;
+  }
+
+#if defined(_M_ARM64)
+  bool Apply4BytePatch(TrampPoolT* aTrampPool, void* aTrampPtr,
+                       WritableTargetFunction<MMPolicyT>& target,
+                       intptr_t aDest) {
+    MOZ_ASSERT(aTrampPool);
+    if (!aTrampPool) {
+      return false;
+    }
+
+    uintptr_t hookDest = arm64::MakeVeneer(*aTrampPool, aTrampPtr, aDest);
+    if (!hookDest) {
+      return false;
+    }
+
+    Maybe<uint32_t> branchImm = arm64::BuildUnconditionalBranchImm(
+        target.GetCurrentAddress(), hookDest);
+    if (!branchImm) {
+      return false;
+    }
+
+    target.WriteLong(branchImm.value());
+
+    return true;
+  }
+#endif  // defined(_M_ARM64)
+
+#if defined(_M_X64)
+  bool Apply10BytePatch(TrampPoolT* aTrampPool, void* aTrampPtr,
+                        WritableTargetFunction<MMPolicyT>& target,
+                        intptr_t aDest) {
+    // Note: Even if the target function is also below 2GB, we still use an
+    // intermediary trampoline so that we consistently have a 64-bit pointer
+    // that we can use to reset the trampoline upon interceptor shutdown.
+    Maybe<Trampoline<MMPolicyT>> maybeCallTramp(
+        aTrampPool->GetNextTrampoline());
+    if (!maybeCallTramp) {
+      return false;
+    }
+
+    Trampoline<MMPolicyT> callTramp(std::move(maybeCallTramp.ref()));
+
+    // Write a null instance so that Clear() does not consider this tramp to
+    // be a normal tramp to be torn down.
+    callTramp.WriteEncodedPointer(nullptr);
+    // Use the second pointer slot to store a pointer to the primary tramp
+    callTramp.WriteEncodedPointer(aTrampPtr);
+    callTramp.StartExecutableCode();
+
+    // mov r11, address
+    callTramp.WriteByte(0x49);
+    callTramp.WriteByte(0xbb);
+    callTramp.WritePointer(aDest);
+
+    // jmp r11
+    callTramp.WriteByte(0x41);
+    callTramp.WriteByte(0xff);
+    callTramp.WriteByte(0xe3);
+
+    void* callTrampStart = callTramp.EndExecutableCode();
+    if (!callTrampStart) {
+      return false;
+    }
+
+    target.WriteByte(0xB8);  // MOV EAX, IMM32
+
+    // Assert that the topmost 33 bits are 0
+    MOZ_ASSERT(
+        !(reinterpret_cast<uintptr_t>(callTrampStart) & (~0x7FFFFFFFULL)));
+
+    target.WriteLong(static_cast<uint32_t>(
+        reinterpret_cast<uintptr_t>(callTrampStart) & 0x7FFFFFFFU));
+    target.WriteByte(0x48);  // REX.W
+    target.WriteByte(0x63);  // MOVSXD r64, r/m32
+    // dest: rax, src: eax
+    target.WriteByte(BuildModRmByte(kModReg, kRegAx, kRegAx));
+    target.WriteByte(0xFF);                                // JMP /4
+    target.WriteByte(BuildModRmByte(kModReg, 4, kRegAx));  // rax
+
+    return true;
+  }
+#endif  // defined(_M_X64)
+
+  void CreateTrampoline(ReadOnlyTargetFunction<MMPolicyT>& origBytes,
+                        TrampPoolT* aTrampPool, Trampoline<MMPolicyT>& aTramp,
+                        intptr_t aDest, void** aOutTramp) {
+    *aOutTramp = nullptr;
+
+    Trampoline<MMPolicyT>& tramp = aTramp;
+    if (!tramp) {
+      this->SetLastDetourError(
+          DetourResultCode::DETOUR_PATCHER_INVALID_TRAMPOLINE);
+      return;
+    }
+
+    // The beginning of the trampoline contains two pointer-width slots:
+    // [0]: |this|, so that we know whether the trampoline belongs to us;
+    // [1]: Pointer to original function, so that we can reset the hooked
+    // function to its original behavior upon destruction.  In rare cases
+    // where the function was already a different trampoline, this is
+    // just a pointer to that trampoline's target address.
+    tramp.WriteEncodedPointer(this);
+    if (!tramp) {
+      this->SetLastDetourError(
+          DetourResultCode::DETOUR_PATCHER_WRITE_POINTER_ERROR);
+      return;
+    }
+
+    auto clearInstanceOnFailure = MakeScopeExit([this, aOutTramp, &tramp,
+                                                 &origBytes]() -> void {
+      // *aOutTramp is not set until CreateTrampoline has completed
+      // successfully, so we can use that to check for success.
+      if (*aOutTramp) {
+        return;
+      }
+
+      // Clear the instance pointer so that we don't try to reset a
+      // nonexistent hook.
+      tramp.Rewind();
+      tramp.WriteEncodedPointer(nullptr);
+
+#if defined(NIGHTLY_BUILD)
+      origBytes.Rewind();
+      this->SetLastDetourError(
+          DetourResultCode::DETOUR_PATCHER_CREATE_TRAMPOLINE_ERROR);
+      DetourError& lastError = *this->mVMPolicy.mLastError;
+      size_t bytesToCapture = std::min(
+          ArrayLength(lastError.mOrigBytes),
+          static_cast<size_t>(PrimitiveT::GetWorstCaseRequiredBytesToPatch()));
+#  if defined(_M_ARM64)
+      size_t numInstructionsToCapture = bytesToCapture / sizeof(uint32_t);
+      auto origBytesDst = reinterpret_cast<uint32_t*>(lastError.mOrigBytes);
+      for (size_t i = 0; i < numInstructionsToCapture; ++i) {
+        origBytesDst[i] = origBytes.ReadNextInstruction();
+      }
+#  else
+      for (size_t i = 0; i < bytesToCapture; ++i) {
+        lastError.mOrigBytes[i] = origBytes[i];
+      }
+#  endif  // defined(_M_ARM64)
+#else
+      // Silence -Wunused-lambda-capture in non-Nightly.
+      Unused << this;
+      Unused << origBytes;
+#endif  // defined(NIGHTLY_BUILD)
+    });
+
+    tramp.WritePointer(origBytes.AsEncodedPtr());
+    if (!tramp) {
+      return;
+    }
+
+    if (PatchIfTargetIsRecognizedTrampoline(tramp, origBytes, aDest,
+                                            aOutTramp)) {
+      return;
+    }
+
+    tramp.StartExecutableCode();
+
+    constexpr uint32_t kWorstCaseBytesRequired =
+        PrimitiveT::GetWorstCaseRequiredBytesToPatch();
+
+#if defined(_M_IX86)
+    int pJmp32 = -1;
+    while (origBytes.GetOffset() < kWorstCaseBytesRequired) {
+      // Understand some simple instructions that might be found in a
+      // prologue; we might need to extend this as necessary.
+      //
+      // Note!  If we ever need to understand jump instructions, we'll
+      // need to rewrite the displacement argument.
+      unsigned char prefixGroups;
+      int numPrefixBytes = CountPrefixBytes(origBytes, &prefixGroups);
+      if (numPrefixBytes < 0 ||
+          (prefixGroups & (ePrefixGroup3 | ePrefixGroup4))) {
+        // Either the prefix sequence was bad, or there are prefixes that
+        // we don't currently support (groups 3 and 4)
+        MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+        return;
+      }
+
+      origBytes += numPrefixBytes;
+      if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
+        // various MOVs
+        ++origBytes;
+        int len = CountModRmSib(origBytes);
+        if (len < 0) {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
+          return;
+        }
+        origBytes += len;
+      } else if (*origBytes == 0x0f &&
+                 (origBytes[1] == 0x10 || origBytes[1] == 0x11)) {
+        // SSE: movups xmm, xmm/m128
+        //      movups xmm/m128, xmm
+        origBytes += 2;
+        int len = CountModRmSib(origBytes);
+        if (len < 0) {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
+          return;
+        }
+        origBytes += len;
+      } else if (*origBytes == 0xA1) {
+        // MOV eax, [seg:offset]
+        origBytes += 5;
+      } else if (*origBytes == 0xB8) {
+        // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
+        origBytes += 5;
+      } else if (*origBytes == 0x33 && (origBytes[1] & kMaskMod) == kModReg) {
+        // XOR r32, r32
+        origBytes += 2;
+      } else if ((*origBytes & 0xf8) == 0x40) {
+        // INC r32
+        origBytes += 1;
+      } else if (*origBytes == 0x83) {
+        uint8_t mod = static_cast<uint8_t>(origBytes[1]) & kMaskMod;
+        uint8_t rm = static_cast<uint8_t>(origBytes[1]) & kMaskRm;
+        if (mod == kModReg) {
+          // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
+          origBytes += 3;
+        } else if (mod == kModDisp8 && rm != kRmNeedSib) {
+          // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP [r+disp8], imm8
+          origBytes += 4;
+        } else {
+          // bail
+          MOZ_ASSERT_UNREACHABLE("Unrecognized bit opcode sequence");
+          return;
+        }
+      } else if (*origBytes == 0x68) {
+        // PUSH with 4-byte operand
+        origBytes += 5;
+      } else if ((*origBytes & 0xf0) == 0x50) {
+        // 1-byte PUSH/POP
+        ++origBytes;
+      } else if (*origBytes == 0x6A) {
+        // PUSH imm8
+        origBytes += 2;
+      } else if (*origBytes == 0xe9) {
+        pJmp32 = origBytes.GetOffset();
+        // jmp 32bit offset
+        origBytes += 5;
+      } else if (*origBytes == 0xff && origBytes[1] == 0x25) {
+        // jmp [disp32]
+        origBytes += 6;
+      } else if (*origBytes == 0xc2) {
+        // ret imm16.  We can't handle this but it happens.  We don't ASSERT but
+        // we do fail to hook.
+#  if defined(MOZILLA_INTERNAL_API)
+        NS_WARNING("Cannot hook method -- RET opcode found");
+#  endif
+        return;
+      } else {
+        // printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n",
+        // *origBytes);
+        MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+        return;
+      }
+    }
+
+    // The trampoline is a copy of the instructions that we just traced,
+    // followed by a jump that we add below.
+    tramp.CopyFrom(origBytes.GetBaseAddress(), origBytes.GetOffset());
+    if (!tramp) {
+      return;
+    }
+#elif defined(_M_X64)
+    bool foundJmp = false;
+    // |use10BytePatch| should always default to |false| in production. It is
+    // not set to true unless we detect that a 10-byte patch is necessary.
+    // OTOH, for testing purposes, if we want to force a 10-byte patch, we
+    // always initialize |use10BytePatch| to |true|.
+    bool use10BytePatch =
+        (mFlags.value() & DetourFlags::eTestOnlyForceShortPatch) ==
+        DetourFlags::eTestOnlyForceShortPatch;
+    const uint32_t bytesRequired =
+        use10BytePatch ? 10 : kWorstCaseBytesRequired;
+
+    while (origBytes.GetOffset() < bytesRequired) {
+      // If we found JMP 32bit offset, we require that the next bytes must
+      // be NOP or INT3.  There is no reason to copy them.
+      // TODO: This used to trigger for Je as well.  Now that I allow
+      // instructions after CALL and JE, I don't think I need that.
+      // The only real value of this condition is that if code follows a JMP
+      // then its _probably_ the target of a JMP somewhere else and we
+      // will be overwriting it, which would be tragic.  This seems
+      // highly unlikely.
+      if (foundJmp) {
+        if (*origBytes == 0x90 || *origBytes == 0xcc) {
+          ++origBytes;
+          continue;
+        }
+
+        // If our trampoline space is located in the lowest 2GB, we can do a ten
+        // byte patch instead of a thirteen byte patch.
+        if (aTrampPool && aTrampPool->IsInLowest2GB() &&
+            origBytes.GetOffset() >= 10) {
+          use10BytePatch = true;
+          break;
+        }
+
+        MOZ_ASSERT_UNREACHABLE("Opcode sequence includes commands after JMP");
+        return;
+      }
+      if (*origBytes == 0x0f) {
+        COPY_CODES(1);
+        if (*origBytes == 0x1f) {
+          // nop (multibyte)
+          COPY_CODES(1);
+          if ((*origBytes & 0xc0) == 0x40 && (*origBytes & 0x7) == 0x04) {
+            COPY_CODES(3);
+          } else {
+            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+            return;
+          }
+        } else if (*origBytes == 0x05) {
+          // syscall
+          COPY_CODES(1);
+        } else if (*origBytes == 0x10 || *origBytes == 0x11) {
+          // SSE: movups xmm, xmm/m128
+          //      movups xmm/m128, xmm
+          COPY_CODES(1);
+          int nModRmSibBytes = CountModRmSib(origBytes);
+          if (nModRmSibBytes < 0) {
+            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+            return;
+          } else {
+            COPY_CODES(nModRmSibBytes);
+          }
+        } else if (*origBytes >= 0x83 && *origBytes <= 0x85) {
+          // 0f 83 cd    JAE rel32
+          // 0f 84 cd    JE  rel32
+          // 0f 85 cd    JNE rel32
+          const JumpType kJumpTypes[] = {JumpType::Jae, JumpType::Je,
+                                         JumpType::Jne};
+          auto jumpType = kJumpTypes[*origBytes - 0x83];
+          ++origBytes;
+          --tramp;  // overwrite the 0x0f we copied above
+
+          if (!GenerateJump(tramp, origBytes.ReadDisp32AsAbsolute(),
+                            jumpType)) {
+            return;
+          }
+        } else {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+      } else if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
+        // various 32-bit MOVs
+        COPY_CODES(1);
+        int len = CountModRmSib(origBytes);
+        if (len < 0) {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
+          return;
+        }
+        COPY_CODES(len);
+      } else if (*origBytes == 0x40 || *origBytes == 0x41) {
+        // Plain REX or REX.B
+        COPY_CODES(1);
+        if ((*origBytes & 0xf0) == 0x50) {
+          // push/pop with Rx register
+          COPY_CODES(1);
+        } else if (*origBytes >= 0xb8 && *origBytes <= 0xbf) {
+          // mov r32, imm32
+          COPY_CODES(5);
+        } else {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+      } else if (*origBytes == 0x44) {
+        // REX.R
+        COPY_CODES(1);
+
+        // TODO: Combine with the "0x89" case below in the REX.W section
+        if (*origBytes == 0x89) {
+          // mov r/m32, r32
+          COPY_CODES(1);
+          int len = CountModRmSib(origBytes);
+          if (len < 0) {
+            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+            return;
+          }
+          COPY_CODES(len);
+        } else {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+      } else if (*origBytes == 0x45) {
+        // REX.R & REX.B
+        COPY_CODES(1);
+
+        if (*origBytes == 0x33) {
+          // xor r32, r32
+          COPY_CODES(2);
+        } else {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+      } else if ((*origBytes & 0xfa) == 0x48) {
+        // REX.W | REX.WR | REX.WRB | REX.WB
+        COPY_CODES(1);
+
+        if (*origBytes == 0x81 && (origBytes[1] & 0xf8) == 0xe8) {
+          // sub r, dword
+          COPY_CODES(6);
+        } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0xe8) {
+          // sub r, byte
+          COPY_CODES(3);
+        } else if (*origBytes == 0x83 &&
+                   (origBytes[1] & (kMaskMod | kMaskReg)) == kModReg) {
+          // add r, byte
+          COPY_CODES(3);
+        } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0x60) {
+          // and [r+d], imm8
+          COPY_CODES(5);
+        } else if (*origBytes == 0x2b && (origBytes[1] & kMaskMod) == kModReg) {
+          // sub r64, r64
+          COPY_CODES(2);
+        } else if (*origBytes == 0x85) {
+          // 85 /r => TEST r/m32, r32
+          if ((origBytes[1] & 0xc0) == 0xc0) {
+            COPY_CODES(2);
+          } else {
+            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+            return;
+          }
+        } else if ((*origBytes & 0xfd) == 0x89) {
+          // MOV r/m64, r64 | MOV r64, r/m64
+          BYTE reg;
+          int len = CountModRmSib(origBytes + 1, &reg);
+          if (len < 0) {
+            MOZ_ASSERT(len == kModOperand64);
+            if (len != kModOperand64) {
+              return;
+            }
+            origBytes += 2;  // skip the MOV and MOD R/M bytes
+
+            // The instruction MOVs 64-bit data from a RIP-relative memory
+            // address (determined with a 32-bit offset from RIP) into a
+            // 64-bit register.
+            uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
+
+            if (reg == kRegAx) {
+              // Destination is RAX.  Encode instruction as MOVABS with a
+              // 64-bit absolute address as its immediate operand.
+              tramp.WriteByte(0xa1);
+              tramp.WritePointer(absAddr);
+            } else {
+              // The MOV must be done in two steps.  First, we MOVABS the
+              // absolute 64-bit address into our target register.
+              // Then, we MOV from that address into the register
+              // using register-indirect addressing.
+              tramp.WriteByte(0xb8 + reg);
+              tramp.WritePointer(absAddr);
+              tramp.WriteByte(0x48);
+              tramp.WriteByte(0x8b);
+              tramp.WriteByte(BuildModRmByte(kModNoRegDisp, reg, reg));
+            }
+          } else {
+            COPY_CODES(len + 1);
+          }
+        } else if ((*origBytes & 0xf8) == 0xb8) {
+          // MOV r64, imm64
+          COPY_CODES(9);
+        } else if (*origBytes == 0xc7) {
+          // MOV r/m64, imm32
+          if (origBytes[1] == 0x44) {
+            // MOV [r64+disp8], imm32
+            // ModR/W + SIB + disp8 + imm32
+            COPY_CODES(8);
+          } else {
+            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+            return;
+          }
+        } else if (*origBytes == 0xff) {
+          // JMP /4
+          if ((origBytes[1] & 0xc0) == 0x0 && (origBytes[1] & 0x07) == 0x5) {
+            origBytes += 2;
+            --tramp;  // overwrite the REX.W/REX.RW we copied above
+
+            if (!GenerateJump(tramp, origBytes.ChasePointerFromDisp(),
+                              JumpType::Jmp)) {
+              return;
+            }
+
+            foundJmp = true;
+          } else {
+            // not support yet!
+            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+            return;
+          }
+        } else if (*origBytes == 0x8d) {
+          // LEA reg, addr
+          if ((origBytes[1] & kMaskMod) == 0x0 &&
+              (origBytes[1] & kMaskRm) == 0x5) {
+            // [rip+disp32]
+            // convert 32bit offset to 64bit direct and convert instruction
+            // to a simple 64-bit mov
+            BYTE reg = (origBytes[1] & kMaskReg) >> kRegFieldShift;
+            origBytes += 2;
+            uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
+            tramp.WriteByte(0xb8 + reg);  // move
+            tramp.WritePointer(absAddr);
+          } else {
+            // Above we dealt with RIP-relative instructions.  Any other
+            // operand form can simply be copied.
+            int len = CountModRmSib(origBytes + 1);
+            // We handled the kModOperand64 -- ie RIP-relative -- case above
+            MOZ_ASSERT(len > 0);
+            COPY_CODES(len + 1);
+          }
+        } else if (*origBytes == 0x63 && (origBytes[1] & kMaskMod) == kModReg) {
+          // movsxd r64, r32 (move + sign extend)
+          COPY_CODES(2);
+        } else {
+          // not support yet!
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+      } else if (*origBytes == 0x66) {
+        // operand override prefix
+        COPY_CODES(1);
+        // This is the same as the x86 version
+        if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
+          // various MOVs
+          unsigned char b = origBytes[1];
+          if (((b & 0xc0) == 0xc0) ||
+              (((b & 0xc0) == 0x00) && ((b & 0x07) != 0x04) &&
+               ((b & 0x07) != 0x05))) {
+            // REG=r, R/M=r or REG=r, R/M=[r]
+            COPY_CODES(2);
+          } else if ((b & 0xc0) == 0x40) {
+            if ((b & 0x07) == 0x04) {
+              // REG=r, R/M=[SIB + disp8]
+              COPY_CODES(4);
+            } else {
+              // REG=r, R/M=[r + disp8]
+              COPY_CODES(3);
+            }
+          } else {
+            // complex MOV, bail
+            MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
+            return;
+          }
+        } else if (*origBytes == 0x44 && origBytes[1] == 0x89) {
+          // mov word ptr [reg+disp8], reg
+          COPY_CODES(2);
+          int len = CountModRmSib(origBytes);
+          if (len < 0) {
+            // no way to support this yet.
+            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+            return;
+          }
+          COPY_CODES(len);
+        }
+      } else if ((*origBytes & 0xf0) == 0x50) {
+        // 1-byte push/pop
+        COPY_CODES(1);
+      } else if (*origBytes == 0x65) {
+        // GS prefix
+        //
+        // The entry of GetKeyState on Windows 10 has the following code.
+        // 65 48 8b 04 25 30 00 00 00    mov   rax,qword ptr gs:[30h]
+        // (GS prefix + REX + MOV (0x8b) ...)
+        if (origBytes[1] == 0x48 &&
+            (origBytes[2] >= 0x88 && origBytes[2] <= 0x8b)) {
+          COPY_CODES(3);
+          int len = CountModRmSib(origBytes);
+          if (len < 0) {
+            // no way to support this yet.
+            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+            return;
+          }
+          COPY_CODES(len);
+        } else {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+      } else if (*origBytes == 0x80 && origBytes[1] == 0x3d) {
+        origBytes += 2;
+
+        // cmp byte ptr [rip-relative address], imm8
+        // We'll compute the absolute address and do the cmp in r11
+
+        // push r11 (to save the old value)
+        tramp.WriteByte(0x49);
+        tramp.WriteByte(0x53);
+
+        uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
+
+        // mov r11, absolute address
+        tramp.WriteByte(0x49);
+        tramp.WriteByte(0xbb);
+        tramp.WritePointer(absAddr);
+
+        // cmp byte ptr [r11],...
+        tramp.WriteByte(0x41);
+        tramp.WriteByte(0x80);
+        tramp.WriteByte(0x3b);
+
+        // ...imm8
+        COPY_CODES(1);
+
+        // pop r11 (doesn't affect the flags from the cmp)
+        tramp.WriteByte(0x49);
+        tramp.WriteByte(0x5b);
+      } else if (*origBytes == 0x90) {
+        // nop
+        COPY_CODES(1);
+      } else if ((*origBytes & 0xf8) == 0xb8) {
+        // MOV r32, imm32
+        COPY_CODES(5);
+      } else if (*origBytes == 0x33) {
+        // xor r32, r/m32
+        COPY_CODES(2);
+      } else if (*origBytes == 0xf6) {
+        // test r/m8, imm8 (used by ntdll on Windows 10 x64)
+        // (no flags are affected by near jmp since there is no task switch,
+        // so it is ok for a jmp to be written immediately after a test)
+        BYTE subOpcode = 0;
+        int nModRmSibBytes = CountModRmSib(origBytes + 1, &subOpcode);
+        if (nModRmSibBytes < 0 || subOpcode != 0) {
+          // Unsupported
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+        COPY_CODES(2 + nModRmSibBytes);
+      } else if (*origBytes == 0x85) {
+        // test r/m32, r32
+        int nModRmSibBytes = CountModRmSib(origBytes + 1);
+        if (nModRmSibBytes < 0) {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+        COPY_CODES(1 + nModRmSibBytes);
+      } else if (*origBytes == 0xd1 && (origBytes[1] & kMaskMod) == kModReg) {
+        // bit shifts/rotates : (SA|SH|RO|RC)(R|L) r32
+        // (e.g. 0xd1 0xe0 is SAL, 0xd1 0xc8 is ROR)
+        COPY_CODES(2);
+      } else if (*origBytes == 0x83 && (origBytes[1] & kMaskMod) == kModReg) {
+        // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
+        COPY_CODES(3);
+      } else if (*origBytes == 0xc3) {
+        // ret
+        COPY_CODES(1);
+      } else if (*origBytes == 0xcc) {
+        // int 3
+        COPY_CODES(1);
+      } else if (*origBytes == 0xe8 || *origBytes == 0xe9) {
+        // CALL (0xe8) or JMP (0xe9) 32bit offset
+        foundJmp = *origBytes == 0xe9;
+        ++origBytes;
+
+        if (!GenerateJump(tramp, origBytes.ReadDisp32AsAbsolute(),
+                          foundJmp ? JumpType::Jmp : JumpType::Call)) {
+          return;
+        }
+      } else if (*origBytes >= 0x73 && *origBytes <= 0x75) {
+        // 73 cb    JAE rel8
+        // 74 cb    JE  rel8
+        // 75 cb    JNE rel8
+        const JumpType kJumpTypes[] = {JumpType::Jae, JumpType::Je,
+                                       JumpType::Jne};
+        auto jumpType = kJumpTypes[*origBytes - 0x73];
+        uint8_t offset = origBytes[1];
+
+        origBytes += 2;
+
+        if (!GenerateJump(tramp, origBytes.OffsetToAbsolute(offset),
+                          jumpType)) {
+          return;
+        }
+      } else if (*origBytes == 0xff) {
+        uint8_t mod = origBytes[1] & kMaskMod;
+        uint8_t reg = (origBytes[1] & kMaskReg) >> kRegFieldShift;
+        uint8_t rm = origBytes[1] & kMaskRm;
+        if (mod == kModReg && (reg == 0 || reg == 1 || reg == 2 || reg == 6)) {
+          // INC|DEC|CALL|PUSH r64
+          COPY_CODES(2);
+        } else if (mod == kModNoRegDisp && reg == 2 &&
+                   rm == kRmNoRegDispDisp32) {
+          // FF 15    CALL [disp32]
+          origBytes += 2;
+          if (!GenerateJump(tramp, origBytes.ChasePointerFromDisp(),
+                            JumpType::Call)) {
+            return;
+          }
+        } else if (reg == 4) {
+          // FF /4 (Opcode=ff, REG=4): JMP r/m
+          if (mod == kModNoRegDisp && rm == kRmNoRegDispDisp32) {
+            // FF 25    JMP [disp32]
+            foundJmp = true;
+
+            origBytes += 2;
+
+            uintptr_t jmpDest = origBytes.ChasePointerFromDisp();
+
+            if (!GenerateJump(tramp, jmpDest, JumpType::Jmp)) {
+              return;
+            }
+          } else {
+            // JMP r/m except JMP [disp32]
+            int len = CountModRmSib(origBytes + 1);
+            if (len < 0) {
+              // RIP-relative not yet supported
+              MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+              return;
+            }
+
+            COPY_CODES(len + 1);
+
+            foundJmp = true;
+          }
+        } else {
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+      } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0x60) {
+        // and [r+d], imm8
+        COPY_CODES(5);
+      } else if (*origBytes == 0xc6) {
+        // mov [r+d], imm8
+        int len = CountModRmSib(origBytes + 1);
+        if (len < 0) {
+          // RIP-relative not yet supported
+          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+          return;
+        }
+        COPY_CODES(len + 2);
+      } else {
+        MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+        return;
+      }
+    }
+#elif defined(_M_ARM64)
+
+    // The number of bytes required to facilitate a detour depends on the
+    // proximity of the hook function to the target function. In the best case,
+    // we can branch within +/- 128MB of the current location, requiring only
+    // 4 bytes. In the worst case, we need 16 bytes to load an absolute address
+    // into a register and then branch to it.
+    const uint32_t bytesRequiredFromDecode =
+        (mFlags.value() & DetourFlags::eTestOnlyForceShortPatch)
+            ? 4
+            : kWorstCaseBytesRequired;
+
+    while (origBytes.GetOffset() < bytesRequiredFromDecode) {
+      uintptr_t curPC = origBytes.GetCurrentAbsolute();
+      uint32_t curInst = origBytes.ReadNextInstruction();
+
+      Result<arm64::LoadOrBranch, arm64::PCRelCheckError> pcRelInfo =
+          arm64::CheckForPCRel(curPC, curInst);
+      if (pcRelInfo.isErr()) {
+        if (pcRelInfo.unwrapErr() ==
+            arm64::PCRelCheckError::InstructionNotPCRel) {
+          // Instruction is not PC-relative, we can just copy it verbatim
+          tramp.WriteInstruction(curInst);
+          continue;
+        }
+
+        // At this point we have determined that there is no decoder available
+        // for the current, PC-relative, instruction.
+
+        // origBytes is now pointing one instruction past the one that we
+        // need the trampoline to jump back to.
+        if (!origBytes.BackUpOneInstruction()) {
+          return;
+        }
+
+        break;
+      }
+
+      // We need to load an absolute address into a particular register
+      tramp.WriteLoadLiteral(pcRelInfo.inspect().mAbsAddress,
+                             pcRelInfo.inspect().mDestReg);
+    }
+
+#else
+#  error "Unknown processor type"
+#endif
+
+    if (origBytes.GetOffset() > 100) {
+      // printf ("Too big!");
+      return;
+    }
+
+#if defined(_M_IX86)
+    if (pJmp32 >= 0) {
+      // Jump directly to the original target of the jump instead of jumping to
+      // the original function. Adjust jump target displacement to jump location
+      // in the trampoline.
+      tramp.AdjustDisp32AtOffset(pJmp32 + 1, origBytes.GetBaseAddress());
+    } else {
+      tramp.WriteByte(0xe9);  // jmp
+      tramp.WriteDisp32(origBytes.GetAddress());
+    }
+#elif defined(_M_X64)
+    // If we found a Jmp, we don't need to add another instruction. However,
+    // if we found a _conditional_ jump or a CALL (or no control operations
+    // at all) then we still need to run the rest of aOriginalFunction.
+    if (!foundJmp) {
+      if (!GenerateJump(tramp, origBytes.GetAddress(), JumpType::Jmp)) {
+        return;
+      }
+    }
+#elif defined(_M_ARM64)
+    // Let's find out how many bytes we have available to us for patching
+    uint32_t numBytesForPatching = tramp.GetCurrentExecutableCodeLen();
+
+    if (!numBytesForPatching) {
+      // There's nothing we can do
+      return;
+    }
+
+    if (tramp.IsNull()) {
+      // Recursive case
+      HMODULE targetModule = nullptr;
+
+      if (numBytesForPatching < kWorstCaseBytesRequired) {
+        if (!::GetModuleHandleExW(
+                GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
+                    GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                reinterpret_cast<LPCWSTR>(origBytes.GetBaseAddress()),
+                &targetModule)) {
+          return;
+        }
+      }
+
+      Maybe<TrampPoolT> maybeTrampPool = DoReserve(targetModule);
+      MOZ_ASSERT(maybeTrampPool);
+      if (!maybeTrampPool) {
+        return;
+      }
+
+      Maybe<Trampoline<MMPolicyT>> maybeRealTramp(
+          maybeTrampPool.ref().GetNextTrampoline());
+      if (!maybeRealTramp) {
+        return;
+      }
+
+      origBytes.Rewind();
+      CreateTrampoline(origBytes, maybeTrampPool.ptr(), maybeRealTramp.ref(),
+                       aDest, aOutTramp);
+      return;
+    }
+
+    // Write the branch from the trampoline back to the original code
+
+    tramp.WriteLoadLiteral(origBytes.GetAddress(), 16);
+    tramp.WriteInstruction(arm64::BuildUnconditionalBranchToRegister(16));
+#else
+#  error "Unsupported processor architecture"
+#endif
+
+    // The trampoline is now complete.
+    void* trampPtr = tramp.EndExecutableCode();
+    if (!trampPtr) {
+      return;
+    }
+
+    WritableTargetFunction<MMPolicyT> target(origBytes.Promote());
+    if (!target) {
+      return;
+    }
+
+    do {
+      // Now patch the original function.
+      // When we're instructed to apply a non-default patch, apply it and exit.
+      // If non-default patching fails, bail out, no fallback.
+      // Otherwise, we go straight to the default patch.
+
+#if defined(_M_X64)
+      if (use10BytePatch) {
+        if (!Apply10BytePatch(aTrampPool, trampPtr, target, aDest)) {
+          return;
+        }
+        break;
+      }
+#elif defined(_M_ARM64)
+      if (numBytesForPatching < kWorstCaseBytesRequired) {
+        if (!Apply4BytePatch(aTrampPool, trampPtr, target, aDest)) {
+          return;
+        }
+        break;
+      }
+#endif
+
+      PrimitiveT::ApplyDefaultPatch(target, aDest);
+    } while (false);
+
+    if (!target.Commit()) {
+      return;
+    }
+
+    // Output the trampoline, thus signalling that this call was a success
+    *aOutTramp = trampPtr;
+  }
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // mozilla_interceptor_PatcherDetour_h
diff --git a/mozglue/misc/interceptor/PatcherNopSpace.h b/mozglue/misc/interceptor/PatcherNopSpace.h
new file mode 100644
index 0000000000..deee87e0f8
--- /dev/null
+++ b/mozglue/misc/interceptor/PatcherNopSpace.h
@@ -0,0 +1,205 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_PatcherNopSpace_h
+#define mozilla_interceptor_PatcherNopSpace_h
+
+#if defined(_M_IX86)
+
+#  include "mozilla/interceptor/PatcherBase.h"
+
+namespace mozilla {
+namespace interceptor {
+
+template <typename VMPolicy>
+class WindowsDllNopSpacePatcher final : public WindowsDllPatcherBase<VMPolicy> {
+  typedef typename VMPolicy::MMPolicyT MMPolicyT;
+
+  // For remembering the addresses of functions we've patched.
+  mozilla::Vector<void*> mPatchedFns;
+
+ public:
+  template <typename... Args>
+  explicit WindowsDllNopSpacePatcher(Args&&... aArgs)
+      : WindowsDllPatcherBase<VMPolicy>(std::forward<Args>(aArgs)...) {}
+
+  ~WindowsDllNopSpacePatcher() { Clear(); }
+
+  WindowsDllNopSpacePatcher(const WindowsDllNopSpacePatcher&) = delete;
+  WindowsDllNopSpacePatcher(WindowsDllNopSpacePatcher&&) = delete;
+  WindowsDllNopSpacePatcher& operator=(const WindowsDllNopSpacePatcher&) =
+      delete;
+  WindowsDllNopSpacePatcher& operator=(WindowsDllNopSpacePatcher&&) = delete;
+
+  void Clear() {
+    // Restore the mov edi, edi to the beginning of each function we patched.
+
+    for (auto&& ptr : mPatchedFns) {
+      WritableTargetFunction<MMPolicyT> fn(
+          this->mVMPolicy, reinterpret_cast<uintptr_t>(ptr), sizeof(uint16_t));
+      if (!fn) {
+        continue;
+      }
+
+      // mov edi, edi
+      fn.CommitAndWriteShort(0xff8b);
+    }
+
+    mPatchedFns.clear();
+  }
+
+  /**
+   * NVIDIA Optimus drivers utilize Microsoft Detours 2.x to patch functions
+   * in our address space. There is a bug in Detours 2.x that causes it to
+   * patch at the wrong address when attempting to detour code that is already
+   * NOP space patched. This function is an effort to detect the presence of
+   * this NVIDIA code in our address space and disable NOP space patching if it
+   * is. We also check AppInit_DLLs since this is the mechanism that the Optimus
+   * drivers use to inject into our process.
+   */
+  static bool IsCompatible() {
+    // These DLLs are known to have bad interactions with this style of patching
+    const wchar_t* kIncompatibleDLLs[] = {L"detoured.dll", L"_etoured.dll",
+                                          L"nvd3d9wrap.dll", L"nvdxgiwrap.dll"};
+    // See if the infringing DLLs are already loaded
+    for (unsigned int i = 0; i < mozilla::ArrayLength(kIncompatibleDLLs); ++i) {
+      if (GetModuleHandleW(kIncompatibleDLLs[i])) {
+        return false;
+      }
+    }
+    if (GetModuleHandleW(L"user32.dll")) {
+      // user32 is loaded but the infringing DLLs are not, assume we're safe to
+      // proceed.
+      return true;
+    }
+    // If user32 has not loaded yet, check AppInit_DLLs to ensure that Optimus
+    // won't be loaded once user32 is initialized.
+    HKEY hkey = NULL;
+    if (!RegOpenKeyExW(
+            HKEY_LOCAL_MACHINE,
+            L"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Windows", 0,
+            KEY_QUERY_VALUE, &hkey)) {
+      nsAutoRegKey key(hkey);
+      DWORD numBytes = 0;
+      const wchar_t kAppInitDLLs[] = L"AppInit_DLLs";
+      // Query for required buffer size
+      LONG status = RegQueryValueExW(hkey, kAppInitDLLs, nullptr, nullptr,
+                                     nullptr, &numBytes);
+      mozilla::UniquePtr<wchar_t[]> data;
+      if (!status) {
+        // Allocate the buffer and query for the actual data
+        data = mozilla::MakeUnique<wchar_t[]>((numBytes + 1) / sizeof(wchar_t));
+        status = RegQueryValueExW(hkey, kAppInitDLLs, nullptr, nullptr,
+                                  (LPBYTE)data.get(), &numBytes);
+      }
+      if (!status) {
+        // For each token, split up the filename components and then check the
+        // name of the file.
+        const wchar_t kDelimiters[] = L", ";
+        wchar_t* tokenContext = nullptr;
+        wchar_t* token = wcstok_s(data.get(), kDelimiters, &tokenContext);
+        while (token) {
+          wchar_t fname[_MAX_FNAME] = {0};
+          if (!_wsplitpath_s(token, nullptr, 0, nullptr, 0, fname,
+                             mozilla::ArrayLength(fname), nullptr, 0)) {
+            // nvinit.dll is responsible for bootstrapping the DLL injection, so
+            // that is the library that we check for here
+            const wchar_t kNvInitName[] = L"nvinit";
+            if (!_wcsnicmp(fname, kNvInitName,
+                           mozilla::ArrayLength(kNvInitName))) {
+              return false;
+            }
+          }
+          token = wcstok_s(nullptr, kDelimiters, &tokenContext);
+        }
+      }
+    }
+    return true;
+  }
+
+  bool AddHook(FARPROC aTargetFn, intptr_t aHookDest, void** aOrigFunc) {
+    if (!IsCompatible()) {
+#  if defined(MOZILLA_INTERNAL_API)
+      NS_WARNING("NOP space patching is unavailable for compatibility reasons");
+#  endif
+      return false;
+    }
+
+    MOZ_ASSERT(aTargetFn);
+    if (!aTargetFn) {
+      return false;
+    }
+
+    ReadOnlyTargetFunction<MMPolicyT> readOnlyTargetFn(
+        this->ResolveRedirectedAddress(aTargetFn));
+
+    if (!WriteHook(readOnlyTargetFn, aHookDest, aOrigFunc)) {
+      return false;
+    }
+
+    return mPatchedFns.append(
+        reinterpret_cast<void*>(readOnlyTargetFn.GetBaseAddress()));
+  }
+
+  bool WriteHook(const ReadOnlyTargetFunction<MMPolicyT>& aFn,
+                 intptr_t aHookDest, void** aOrigFunc) {
+    // Ensure we can read and write starting at fn - 5 (for the long jmp we're
+    // going to write) and ending at fn + 2 (for the short jmp up to the long
+    // jmp). These bytes may span two pages with different protection.
+    WritableTargetFunction<MMPolicyT> writableFn(aFn.Promote(7, -5));
+    if (!writableFn) {
+      return false;
+    }
+
+    // Check that the 5 bytes before the function are NOP's or INT 3's,
+    const uint8_t nopOrBp[] = {0x90, 0xCC};
+    if (!writableFn.template VerifyValuesAreOneOf<uint8_t, 5>(nopOrBp)) {
+      return false;
+    }
+
+    // ... and that the first 2 bytes of the function are mov(edi, edi).
+    // There are two ways to encode the same thing:
+    //
+    //   0x89 0xff == mov r/m, r
+    //   0x8b 0xff == mov r, r/m
+    //
+    // where "r" is register and "r/m" is register or memory.
+    // Windows seems to use 0x8B 0xFF. We include 0x89 0xFF out of paranoia.
+
+    // (These look backwards because little-endian)
+    const uint16_t possibleEncodings[] = {0xFF8B, 0xFF89};
+    if (!writableFn.template VerifyValuesAreOneOf<uint16_t, 1>(
+            possibleEncodings, 5)) {
+      return false;
+    }
+
+    // Write a long jump into the space above the function.
+    writableFn.WriteByte(0xe9);  // jmp
+    if (!writableFn) {
+      return false;
+    }
+
+    writableFn.WriteDisp32(aHookDest);  // target
+    if (!writableFn) {
+      return false;
+    }
+
+    // Set aOrigFunc here, because after this point, aHookDest might be called,
+    // and aHookDest might use the aOrigFunc pointer.
+    *aOrigFunc = reinterpret_cast<void*>(writableFn.GetCurrentAddress() +
+                                         sizeof(uint16_t));
+
+    // Short jump up into our long jump.
+    return writableFn.CommitAndWriteShort(0xF9EB);  // jmp $-5
+  }
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // defined(_M_IX86)
+
+#endif  // mozilla_interceptor_PatcherNopSpace_h
diff --git a/mozglue/misc/interceptor/RangeMap.h b/mozglue/misc/interceptor/RangeMap.h
new file mode 100644
index 0000000000..d45d031613
--- /dev/null
+++ b/mozglue/misc/interceptor/RangeMap.h
@@ -0,0 +1,142 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_RangeMap_h
+#define mozilla_interceptor_RangeMap_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/mozalloc.h"
+#include "mozilla/Span.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+
+#include <algorithm>
+
+namespace mozilla {
+namespace interceptor {
+
+/**
+ * This class maintains a vector of VMSharingPolicyUnique objects, sorted on
+ * the memory range that is used for reserving each object.
+ *
+ * This is used by VMSharingPolicyShared for creating and looking up VM regions
+ * that are within proximity of the applicable range.
+ *
+ * VMSharingPolicyUnique objects managed by this class are reused whenever
+ * possible. If no range is required, we just return the first available
+ * policy.
+ *
+ * If no range is required and no policies have yet been allocated, we create
+ * a new one with a null range as a default.
+ */
+template <typename MMPolicyT>
+class RangeMap final {
+ private:
+  /**
+   * This class is used as the comparison key for sorting and insertion.
+   */
+  class Range {
+   public:
+    constexpr Range() : mBase(0), mLimit(0) {}
+
+    explicit Range(const Maybe<Span<const uint8_t>>& aBounds)
+        : mBase(aBounds ? reinterpret_cast<const uintptr_t>(
+                              MMPolicyT::GetLowerBound(aBounds.ref()))
+                        : 0),
+          mLimit(aBounds ? reinterpret_cast<const uintptr_t>(
+                               MMPolicyT::GetUpperBoundIncl(aBounds.ref()))
+                         : 0) {}
+
+    Range& operator=(const Range&) = default;
+    Range(const Range&) = default;
+    Range(Range&&) = default;
+    Range& operator=(Range&&) = default;
+
+    bool operator<(const Range& aOther) const {
+      return mBase < aOther.mBase ||
+             (mBase == aOther.mBase && mLimit < aOther.mLimit);
+    }
+
+    bool Contains(const Range& aOther) const {
+      return mBase <= aOther.mBase && mLimit >= aOther.mLimit;
+    }
+
+   private:
+    uintptr_t mBase;
+    uintptr_t mLimit;
+  };
+
+  class PolicyInfo final : public Range {
+   public:
+    explicit PolicyInfo(const Range& aRange)
+        : Range(aRange),
+          mPolicy(MakeUnique<VMSharingPolicyUnique<MMPolicyT>>()) {}
+
+    PolicyInfo(const PolicyInfo&) = delete;
+    PolicyInfo& operator=(const PolicyInfo&) = delete;
+
+    PolicyInfo(PolicyInfo&& aOther) = default;
+    PolicyInfo& operator=(PolicyInfo&& aOther) = default;
+
+    VMSharingPolicyUnique<MMPolicyT>* GetPolicy() { return mPolicy.get(); }
+
+   private:
+    UniquePtr<VMSharingPolicyUnique<MMPolicyT>> mPolicy;
+  };
+
+  using VectorType = Vector<PolicyInfo, 0, InfallibleAllocPolicy>;
+
+ public:
+  constexpr RangeMap() : mPolicies(nullptr) {}
+
+  VMSharingPolicyUnique<MMPolicyT>* GetPolicy(
+      const Maybe<Span<const uint8_t>>& aBounds) {
+    Range testRange(aBounds);
+
+    if (!mPolicies) {
+      mPolicies = new VectorType();
+    }
+
+    // If no bounds are specified, we just use the first available policy
+    if (!aBounds) {
+      if (mPolicies->empty()) {
+        if (!mPolicies->append(PolicyInfo(testRange))) {
+          return nullptr;
+        }
+      }
+
+      return GetFirstPolicy();
+    }
+
+    // mPolicies is sorted, so we search
+    auto itr =
+        std::lower_bound(mPolicies->begin(), mPolicies->end(), testRange);
+    if (itr != mPolicies->end() && itr->Contains(testRange)) {
+      return itr->GetPolicy();
+    }
+
+    itr = mPolicies->insert(itr, PolicyInfo(testRange));
+
+    MOZ_ASSERT(std::is_sorted(mPolicies->begin(), mPolicies->end()));
+
+    return itr->GetPolicy();
+  }
+
+ private:
+  VMSharingPolicyUnique<MMPolicyT>* GetFirstPolicy() {
+    MOZ_RELEASE_ASSERT(mPolicies && !mPolicies->empty());
+    return mPolicies->begin()->GetPolicy();
+  }
+
+ private:
+  VectorType* mPolicies;
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // mozilla_interceptor_RangeMap_h
diff --git a/mozglue/misc/interceptor/TargetFunction.h b/mozglue/misc/interceptor/TargetFunction.h
new file mode 100644
index 0000000000..40be1ad08b
--- /dev/null
+++ b/mozglue/misc/interceptor/TargetFunction.h
@@ -0,0 +1,1000 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_TargetFunction_h
+#define mozilla_interceptor_TargetFunction_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/BinarySearch.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Tuple.h"
+#include "mozilla/Types.h"
+#include "mozilla/Unused.h"
+#include "mozilla/Vector.h"
+
+#include <memory>
+#include <type_traits>
+
+namespace mozilla {
+namespace interceptor {
+
+#if defined(_M_IX86)
+
+template <typename T>
+bool CommitAndWriteShortInternal(const T& aMMPolicy, void* aDest,
+                                 uint16_t aValue);
+
+template <>
+inline bool CommitAndWriteShortInternal<MMPolicyInProcess>(
+    const MMPolicyInProcess& aMMPolicy, void* aDest, uint16_t aValue) {
+  return aMMPolicy.WriteAtomic(aDest, aValue);
+}
+
+template <>
+inline bool CommitAndWriteShortInternal<MMPolicyOutOfProcess>(
+    const MMPolicyOutOfProcess& aMMPolicy, void* aDest, uint16_t aValue) {
+  return aMMPolicy.Write(aDest, &aValue, sizeof(uint16_t));
+}
+
+#endif  // defined(_M_IX86)
+
+// Forward declaration
+template <typename MMPolicy>
+class ReadOnlyTargetFunction;
+
+template <typename MMPolicy>
+class MOZ_STACK_CLASS WritableTargetFunction final {
+  class AutoProtect final {
+    using ProtectParams = Tuple<uintptr_t, uint32_t>;
+
+   public:
+    explicit AutoProtect(const MMPolicy& aMMPolicy) : mMMPolicy(aMMPolicy) {}
+
+    AutoProtect(const MMPolicy& aMMPolicy, uintptr_t aAddr, size_t aNumBytes,
+                uint32_t aNewProt)
+        : mMMPolicy(aMMPolicy) {
+      const uint32_t pageSize = mMMPolicy.GetPageSize();
+      const uintptr_t limit = aAddr + aNumBytes - 1;
+      const uintptr_t limitPageNum = limit / pageSize;
+      const uintptr_t basePageNum = aAddr / pageSize;
+      const uintptr_t numPagesToChange = limitPageNum - basePageNum + 1;
+
+      // We'll use the base address of the page instead of aAddr
+      uintptr_t curAddr = basePageNum * pageSize;
+
+      // Now change the protection on each page
+      for (uintptr_t curPage = 0; curPage < numPagesToChange;
+           ++curPage, curAddr += pageSize) {
+        uint32_t prevProt;
+        if (!aMMPolicy.Protect(reinterpret_cast<void*>(curAddr), pageSize,
+                               aNewProt, &prevProt)) {
+          Clear();
+          return;
+        }
+
+        // Save the previous protection for curAddr so that we can revert this
+        // in the destructor.
+        if (!mProtects.append(MakeTuple(curAddr, prevProt))) {
+          Clear();
+          return;
+        }
+      }
+    }
+
+    AutoProtect(AutoProtect&& aOther)
+        : mMMPolicy(aOther.mMMPolicy), mProtects(std::move(aOther.mProtects)) {
+      aOther.mProtects.clear();
+    }
+
+    ~AutoProtect() { Clear(); }
+
+    explicit operator bool() const { return !mProtects.empty(); }
+
+    AutoProtect(const AutoProtect&) = delete;
+    AutoProtect& operator=(const AutoProtect&) = delete;
+    AutoProtect& operator=(AutoProtect&&) = delete;
+
+   private:
+    void Clear() {
+      const uint32_t pageSize = mMMPolicy.GetPageSize();
+      for (auto&& entry : mProtects) {
+        uint32_t prevProt;
+        DebugOnly<bool> ok =
+            mMMPolicy.Protect(reinterpret_cast<void*>(Get<0>(entry)), pageSize,
+                              Get<1>(entry), &prevProt);
+        MOZ_ASSERT(ok);
+      }
+
+      mProtects.clear();
+    }
+
+   private:
+    const MMPolicy& mMMPolicy;
+    // We include two entries of inline storage as that is most common in the
+    // worst case.
+    Vector<ProtectParams, 2> mProtects;
+  };
+
+ public:
+  /**
+   * Used to initialize an invalid WritableTargetFunction, thus signalling an
+   * error.
+   */
+  explicit WritableTargetFunction(const MMPolicy& aMMPolicy)
+      : mMMPolicy(aMMPolicy),
+        mFunc(0),
+        mNumBytes(0),
+        mOffset(0),
+        mStartWriteOffset(0),
+        mAccumulatedStatus(false),
+        mProtect(aMMPolicy) {}
+
+  WritableTargetFunction(const MMPolicy& aMMPolicy, uintptr_t aFunc,
+                         size_t aNumBytes)
+      : mMMPolicy(aMMPolicy),
+        mFunc(aFunc),
+        mNumBytes(aNumBytes),
+        mOffset(0),
+        mStartWriteOffset(0),
+        mAccumulatedStatus(true),
+        mProtect(aMMPolicy, aFunc, aNumBytes, PAGE_EXECUTE_READWRITE) {}
+
+  WritableTargetFunction(WritableTargetFunction&& aOther)
+      : mMMPolicy(aOther.mMMPolicy),
+        mFunc(aOther.mFunc),
+        mNumBytes(aOther.mNumBytes),
+        mOffset(aOther.mOffset),
+        mStartWriteOffset(aOther.mStartWriteOffset),
+        mLocalBytes(std::move(aOther.mLocalBytes)),
+        mAccumulatedStatus(aOther.mAccumulatedStatus),
+        mProtect(std::move(aOther.mProtect)) {
+    aOther.mAccumulatedStatus = false;
+  }
+
+  ~WritableTargetFunction() {
+    MOZ_ASSERT(mLocalBytes.empty(), "Did you forget to call Commit?");
+  }
+
+  WritableTargetFunction(const WritableTargetFunction&) = delete;
+  WritableTargetFunction& operator=(const WritableTargetFunction&) = delete;
+  WritableTargetFunction& operator=(WritableTargetFunction&&) = delete;
+
+  /**
+   * @return true if data was successfully committed.
+   */
+  bool Commit() {
+    if (!(*this)) {
+      return false;
+    }
+
+    if (mLocalBytes.empty()) {
+      // Nothing to commit, treat like success
+      return true;
+    }
+
+    bool ok =
+        mMMPolicy.Write(reinterpret_cast<void*>(mFunc + mStartWriteOffset),
+                        mLocalBytes.begin(), mLocalBytes.length());
+    if (!ok) {
+      return false;
+    }
+
+    mMMPolicy.FlushInstructionCache();
+
+    mStartWriteOffset += mLocalBytes.length();
+
+    mLocalBytes.clear();
+    return true;
+  }
+
+  explicit operator bool() const { return mProtect && mAccumulatedStatus; }
+
+  void WriteByte(const uint8_t& aValue) {
+    if (!mLocalBytes.append(aValue)) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    mOffset += sizeof(uint8_t);
+  }
+
+  Maybe<uint8_t> ReadByte() {
+    // Reading is only permitted prior to any writing
+    MOZ_ASSERT(mOffset == mStartWriteOffset);
+    if (mOffset > mStartWriteOffset) {
+      mAccumulatedStatus = false;
+      return Nothing();
+    }
+
+    uint8_t value;
+    if (!mMMPolicy.Read(&value, reinterpret_cast<const void*>(mFunc + mOffset),
+                        sizeof(uint8_t))) {
+      mAccumulatedStatus = false;
+      return Nothing();
+    }
+
+    mOffset += sizeof(uint8_t);
+    mStartWriteOffset += sizeof(uint8_t);
+    return Some(value);
+  }
+
+  Maybe<uintptr_t> ReadEncodedPtr() {
+    // Reading is only permitted prior to any writing
+    MOZ_ASSERT(mOffset == mStartWriteOffset);
+    if (mOffset > mStartWriteOffset) {
+      mAccumulatedStatus = false;
+      return Nothing();
+    }
+
+    uintptr_t value;
+    if (!mMMPolicy.Read(&value, reinterpret_cast<const void*>(mFunc + mOffset),
+                        sizeof(uintptr_t))) {
+      mAccumulatedStatus = false;
+      return Nothing();
+    }
+
+    mOffset += sizeof(uintptr_t);
+    mStartWriteOffset += sizeof(uintptr_t);
+    return Some(ReadOnlyTargetFunction<MMPolicy>::DecodePtr(value));
+  }
+
+  Maybe<uint32_t> ReadLong() {
+    // Reading is only permitted prior to any writing
+    MOZ_ASSERT(mOffset == mStartWriteOffset);
+    if (mOffset > mStartWriteOffset) {
+      mAccumulatedStatus = false;
+      return Nothing();
+    }
+
+    uint32_t value;
+    if (!mMMPolicy.Read(&value, reinterpret_cast<const void*>(mFunc + mOffset),
+                        sizeof(uint32_t))) {
+      mAccumulatedStatus = false;
+      return Nothing();
+    }
+
+    mOffset += sizeof(uint32_t);
+    mStartWriteOffset += sizeof(uint32_t);
+    return Some(value);
+  }
+
+  void WriteShort(const uint16_t& aValue) {
+    if (!mLocalBytes.append(reinterpret_cast<const uint8_t*>(&aValue),
+                            sizeof(uint16_t))) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    mOffset += sizeof(uint16_t);
+  }
+
+#if defined(_M_IX86)
+ public:
+  /**
+   * Commits any dirty writes, and then writes a short, atomically if possible.
+   * This call may succeed in both inproc and outproc cases, but atomicity
+   * is only guaranteed in the inproc case.
+   */
+  bool CommitAndWriteShort(const uint16_t aValue) {
+    // First, commit everything that has been written until now
+    if (!Commit()) {
+      return false;
+    }
+
+    // Now immediately write the short, atomically if inproc
+    bool ok = CommitAndWriteShortInternal(
+        mMMPolicy, reinterpret_cast<void*>(mFunc + mStartWriteOffset), aValue);
+    if (!ok) {
+      return false;
+    }
+
+    mMMPolicy.FlushInstructionCache();
+    mStartWriteOffset += sizeof(uint16_t);
+    return true;
+  }
+#endif  // defined(_M_IX86)
+
+  void WriteDisp32(const uintptr_t aAbsTarget) {
+    intptr_t diff = static_cast<intptr_t>(aAbsTarget) -
+                    static_cast<intptr_t>(mFunc + mOffset + sizeof(int32_t));
+
+    CheckedInt<int32_t> checkedDisp(diff);
+    MOZ_ASSERT(checkedDisp.isValid());
+    if (!checkedDisp.isValid()) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    int32_t disp = checkedDisp.value();
+    if (!mLocalBytes.append(reinterpret_cast<uint8_t*>(&disp),
+                            sizeof(int32_t))) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    mOffset += sizeof(int32_t);
+  }
+
+#if defined(_M_X64) || defined(_M_ARM64)
+  void WriteLong(const uint32_t aValue) {
+    if (!mLocalBytes.append(reinterpret_cast<const uint8_t*>(&aValue),
+                            sizeof(uint32_t))) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    mOffset += sizeof(uint32_t);
+  }
+#endif  // defined(_M_X64)
+
+  void WritePointer(const uintptr_t aAbsTarget) {
+    if (!mLocalBytes.append(reinterpret_cast<const uint8_t*>(&aAbsTarget),
+                            sizeof(uintptr_t))) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    mOffset += sizeof(uintptr_t);
+  }
+
+  /**
+   * @param aValues N-sized array of type T that specifies the set of values
+   *                that are permissible in the first M bytes of the target
+   *                function at aOffset.
+   * @return true if M values of type T in the function are members of the
+   *         set specified by aValues.
+   */
+  template <typename T, size_t M, size_t N>
+  bool VerifyValuesAreOneOf(const T (&aValues)[N], const uint8_t aOffset = 0) {
+    T buf[M];
+    if (!mMMPolicy.Read(
+            buf, reinterpret_cast<const void*>(mFunc + mOffset + aOffset),
+            M * sizeof(T))) {
+      return false;
+    }
+
+    for (auto&& fnValue : buf) {
+      bool match = false;
+      for (auto&& testValue : aValues) {
+        match |= (fnValue == testValue);
+      }
+
+      if (!match) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  uintptr_t GetCurrentAddress() const { return mFunc + mOffset; }
+
+ private:
+  const MMPolicy& mMMPolicy;
+  const uintptr_t mFunc;
+  const size_t mNumBytes;
+  uint32_t mOffset;
+  uint32_t mStartWriteOffset;
+
+  // In an ideal world, we'd only read 5 bytes on 32-bit and 13 bytes on 64-bit,
+  // to match the minimum bytes that we need to write in in order to patch the
+  // target function. Since the actual opcodes will often require us to pull in
+  // extra bytes above that minimum, we set the inline storage to be larger than
+  // those minima in an effort to give the Vector extra wiggle room before it
+  // needs to touch the heap.
+#if defined(_M_IX86)
+  static const size_t kInlineStorage = 16;
+#elif defined(_M_X64) || defined(_M_ARM64)
+  static const size_t kInlineStorage = 32;
+#endif
+  Vector<uint8_t, kInlineStorage> mLocalBytes;
+  bool mAccumulatedStatus;
+  AutoProtect mProtect;
+};
+
+template <typename MMPolicy>
+class ReadOnlyTargetBytes {
+ public:
+  ReadOnlyTargetBytes(const MMPolicy& aMMPolicy, const void* aBase)
+      : mMMPolicy(aMMPolicy), mBase(reinterpret_cast<const uint8_t*>(aBase)) {}
+
+  ReadOnlyTargetBytes(ReadOnlyTargetBytes&& aOther)
+      : mMMPolicy(aOther.mMMPolicy), mBase(aOther.mBase) {}
+
+  ReadOnlyTargetBytes(const ReadOnlyTargetBytes& aOther,
+                      const uint32_t aOffsetFromOther = 0)
+      : mMMPolicy(aOther.mMMPolicy), mBase(aOther.mBase + aOffsetFromOther) {}
+
+  void EnsureLimit(uint32_t aDesiredLimit) {
+    // In the out-proc case we use this function to read the target function's
+    // bytes in the other process into a local buffer. We don't need that for
+    // the in-process case because we already have direct access to our target
+    // function's bytes.
+  }
+
+  uint32_t TryEnsureLimit(uint32_t aDesiredLimit) {
+    // Same as EnsureLimit above.  We don't need to ensure for the in-process.
+    return aDesiredLimit;
+  }
+
+  bool IsValidAtOffset(const int8_t aOffset) const {
+    if (!aOffset) {
+      return true;
+    }
+
+    uintptr_t base = reinterpret_cast<uintptr_t>(mBase);
+    uintptr_t adjusted = base + aOffset;
+    uint32_t pageSize = mMMPolicy.GetPageSize();
+
+    // If |adjusted| is within the same page as |mBase|, we're still valid
+    if ((base / pageSize) == (adjusted / pageSize)) {
+      return true;
+    }
+
+    // Otherwise, let's query |adjusted|
+    return mMMPolicy.IsPageAccessible(adjusted);
+  }
+
+  /**
+   * This returns a pointer to a *potentially local copy* of the target
+   * function's bytes. The returned pointer should not be used for any
+   * pointer arithmetic relating to the target function.
+   */
+  const uint8_t* GetLocalBytes() const { return mBase; }
+
+  /**
+   * This returns a pointer to the target function's bytes. The returned pointer
+   * may possibly belong to another process, so while it should be used for
+   * pointer arithmetic, it *must not* be dereferenced.
+   */
+  uintptr_t GetBase() const { return reinterpret_cast<uintptr_t>(mBase); }
+
+  const MMPolicy& GetMMPolicy() const { return mMMPolicy; }
+
+  ReadOnlyTargetBytes& operator=(const ReadOnlyTargetBytes&) = delete;
+  ReadOnlyTargetBytes& operator=(ReadOnlyTargetBytes&&) = delete;
+
+ private:
+  const MMPolicy& mMMPolicy;
+  uint8_t const* const mBase;
+};
+
+template <>
+class ReadOnlyTargetBytes<MMPolicyOutOfProcess> {
+ public:
+  ReadOnlyTargetBytes(const MMPolicyOutOfProcess& aMMPolicy, const void* aBase)
+      : mMMPolicy(aMMPolicy), mBase(reinterpret_cast<const uint8_t*>(aBase)) {}
+
+  ReadOnlyTargetBytes(ReadOnlyTargetBytes&& aOther)
+      : mMMPolicy(aOther.mMMPolicy),
+        mLocalBytes(std::move(aOther.mLocalBytes)),
+        mBase(aOther.mBase) {}
+
+  ReadOnlyTargetBytes(const ReadOnlyTargetBytes& aOther)
+      : mMMPolicy(aOther.mMMPolicy), mBase(aOther.mBase) {
+    Unused << mLocalBytes.appendAll(aOther.mLocalBytes);
+  }
+
+  ReadOnlyTargetBytes(const ReadOnlyTargetBytes& aOther,
+                      const uint32_t aOffsetFromOther)
+      : mMMPolicy(aOther.mMMPolicy), mBase(aOther.mBase + aOffsetFromOther) {
+    if (aOffsetFromOther >= aOther.mLocalBytes.length()) {
+      return;
+    }
+
+    Unused << mLocalBytes.append(aOther.mLocalBytes.begin() + aOffsetFromOther,
+                                 aOther.mLocalBytes.end());
+  }
+
+  void EnsureLimit(uint32_t aDesiredLimit) {
+    size_t prevSize = mLocalBytes.length();
+    if (aDesiredLimit < prevSize) {
+      return;
+    }
+
+    size_t newSize = aDesiredLimit + 1;
+    if (newSize < kInlineStorage) {
+      // Always try to read as much memory as we can at once
+      newSize = kInlineStorage;
+    }
+
+    bool resizeOk = mLocalBytes.resize(newSize);
+    MOZ_RELEASE_ASSERT(resizeOk);
+
+    bool ok = mMMPolicy.Read(&mLocalBytes[prevSize], mBase + prevSize,
+                             newSize - prevSize);
+    if (ok) {
+      return;
+    }
+
+    // We couldn't pull more bytes than needed (which may happen if those extra
+    // bytes are not accessible). In this case, we try just to get the bare
+    // minimum.
+    newSize = aDesiredLimit + 1;
+    resizeOk = mLocalBytes.resize(newSize);
+    MOZ_RELEASE_ASSERT(resizeOk);
+
+    ok = mMMPolicy.Read(&mLocalBytes[prevSize], mBase + prevSize,
+                        newSize - prevSize);
+    MOZ_RELEASE_ASSERT(ok);
+  }
+
+  // This function tries to ensure as many bytes as possible up to
+  // |aDesiredLimit| bytes, returning how many bytes were actually ensured.
+  // As EnsureLimit does, we allocate an extra byte in local to make sure
+  // mLocalBytes always has at least one byte even though the target memory
+  // was inaccessible at all.
+  uint32_t TryEnsureLimit(uint32_t aDesiredLimit) {
+    size_t prevSize = mLocalBytes.length();
+    if (aDesiredLimit < prevSize) {
+      return aDesiredLimit;
+    }
+
+    size_t newSize = aDesiredLimit;
+    if (newSize < kInlineStorage) {
+      // Always try to read as much memory as we can at once
+      newSize = kInlineStorage;
+    }
+
+    bool resizeOk = mLocalBytes.resize(newSize);
+    MOZ_RELEASE_ASSERT(resizeOk);
+
+    size_t bytesRead = mMMPolicy.TryRead(&mLocalBytes[prevSize],
+                                         mBase + prevSize, newSize - prevSize);
+
+    newSize = prevSize + bytesRead;
+
+    resizeOk = mLocalBytes.resize(newSize + 1);
+    MOZ_RELEASE_ASSERT(resizeOk);
+
+    mLocalBytes[newSize] = 0;
+    return newSize;
+  }
+
+  bool IsValidAtOffset(const int8_t aOffset) const {
+    if (!aOffset) {
+      return true;
+    }
+
+    uintptr_t base = reinterpret_cast<uintptr_t>(mBase);
+    uintptr_t adjusted = base + aOffset;
+    uint32_t pageSize = mMMPolicy.GetPageSize();
+
+    // If |adjusted| is within the same page as |mBase|, we're still valid
+    if ((base / pageSize) == (adjusted / pageSize)) {
+      return true;
+    }
+
+    // Otherwise, let's query |adjusted|
+    return mMMPolicy.IsPageAccessible(adjusted);
+  }
+
+  /**
+   * This returns a pointer to a *potentially local copy* of the target
+   * function's bytes. The returned pointer should not be used for any
+   * pointer arithmetic relating to the target function.
+   */
+  const uint8_t* GetLocalBytes() const {
+    if (mLocalBytes.empty()) {
+      return nullptr;
+    }
+
+    return mLocalBytes.begin();
+  }
+
+  /**
+   * This returns a pointer to the target function's bytes. The returned pointer
+   * may possibly belong to another process, so while it should be used for
+   * pointer arithmetic, it *must not* be dereferenced.
+   */
+  uintptr_t GetBase() const { return reinterpret_cast<uintptr_t>(mBase); }
+
+  const MMPolicyOutOfProcess& GetMMPolicy() const { return mMMPolicy; }
+
+  ReadOnlyTargetBytes& operator=(const ReadOnlyTargetBytes&) = delete;
+  ReadOnlyTargetBytes& operator=(ReadOnlyTargetBytes&&) = delete;
+
+ private:
+  // In an ideal world, we'd only read 5 bytes on 32-bit and 13 bytes on 64-bit,
+  // to match the minimum bytes that we need to write in in order to patch the
+  // target function. Since the actual opcodes will often require us to pull in
+  // extra bytes above that minimum, we set the inline storage to be larger than
+  // those minima in an effort to give the Vector extra wiggle room before it
+  // needs to touch the heap.
+#if defined(_M_IX86)
+  static const size_t kInlineStorage = 16;
+#elif defined(_M_X64) || defined(_M_ARM64)
+  static const size_t kInlineStorage = 32;
+#endif
+
+  const MMPolicyOutOfProcess& mMMPolicy;
+  Vector<uint8_t, kInlineStorage> mLocalBytes;
+  uint8_t const* const mBase;
+};
+
+template <typename MMPolicy>
+class TargetBytesPtr {
+ public:
+  typedef TargetBytesPtr<MMPolicy> Type;
+
+  static Type Make(const MMPolicy& aMMPolicy, const void* aFunc) {
+    return TargetBytesPtr(aMMPolicy, aFunc);
+  }
+
+  static Type CopyFromOffset(const TargetBytesPtr& aOther,
+                             const uint32_t aOffsetFromOther) {
+    return TargetBytesPtr(aOther, aOffsetFromOther);
+  }
+
+  ReadOnlyTargetBytes<MMPolicy>* operator->() { return &mTargetBytes; }
+
+  TargetBytesPtr(TargetBytesPtr&& aOther)
+      : mTargetBytes(std::move(aOther.mTargetBytes)) {}
+
+  TargetBytesPtr(const TargetBytesPtr& aOther)
+      : mTargetBytes(aOther.mTargetBytes) {}
+
+  TargetBytesPtr& operator=(const TargetBytesPtr&) = delete;
+  TargetBytesPtr& operator=(TargetBytesPtr&&) = delete;
+
+ private:
+  TargetBytesPtr(const MMPolicy& aMMPolicy, const void* aFunc)
+      : mTargetBytes(aMMPolicy, aFunc) {}
+
+  TargetBytesPtr(const TargetBytesPtr& aOther, const uint32_t aOffsetFromOther)
+      : mTargetBytes(aOther.mTargetBytes, aOffsetFromOther) {}
+
+  ReadOnlyTargetBytes<MMPolicy> mTargetBytes;
+};
+
+template <>
+class TargetBytesPtr<MMPolicyOutOfProcess> {
+ public:
+  typedef std::shared_ptr<ReadOnlyTargetBytes<MMPolicyOutOfProcess>> Type;
+
+  static Type Make(const MMPolicyOutOfProcess& aMMPolicy, const void* aFunc) {
+    return std::make_shared<ReadOnlyTargetBytes<MMPolicyOutOfProcess>>(
+        aMMPolicy, aFunc);
+  }
+
+  static Type CopyFromOffset(const Type& aOther,
+                             const uint32_t aOffsetFromOther) {
+    return std::make_shared<ReadOnlyTargetBytes<MMPolicyOutOfProcess>>(
+        *aOther, aOffsetFromOther);
+  }
+};
+
+template <typename MMPolicy>
+class MOZ_STACK_CLASS ReadOnlyTargetFunction final {
+ public:
+  ReadOnlyTargetFunction(const MMPolicy& aMMPolicy, const void* aFunc)
+      : mTargetBytes(TargetBytesPtr<MMPolicy>::Make(aMMPolicy, aFunc)),
+        mOffset(0) {}
+
+  ReadOnlyTargetFunction(const MMPolicy& aMMPolicy, FARPROC aFunc)
+      : mTargetBytes(TargetBytesPtr<MMPolicy>::Make(
+            aMMPolicy, reinterpret_cast<const void*>(aFunc))),
+        mOffset(0) {}
+
+  ReadOnlyTargetFunction(const MMPolicy& aMMPolicy, uintptr_t aFunc)
+      : mTargetBytes(TargetBytesPtr<MMPolicy>::Make(
+            aMMPolicy, reinterpret_cast<const void*>(aFunc))),
+        mOffset(0) {}
+
+  ReadOnlyTargetFunction(ReadOnlyTargetFunction&& aOther)
+      : mTargetBytes(std::move(aOther.mTargetBytes)), mOffset(aOther.mOffset) {}
+
+  ReadOnlyTargetFunction& operator=(const ReadOnlyTargetFunction&) = delete;
+  ReadOnlyTargetFunction& operator=(ReadOnlyTargetFunction&&) = delete;
+
+  ~ReadOnlyTargetFunction() = default;
+
+  ReadOnlyTargetFunction operator+(const uint32_t aOffset) const {
+    return ReadOnlyTargetFunction(*this, mOffset + aOffset);
+  }
+
+  uintptr_t GetBaseAddress() const { return mTargetBytes->GetBase(); }
+
+  uintptr_t GetAddress() const { return mTargetBytes->GetBase() + mOffset; }
+
+  uintptr_t AsEncodedPtr() const {
+    return EncodePtr(
+        reinterpret_cast<void*>(mTargetBytes->GetBase() + mOffset));
+  }
+
+  static uintptr_t EncodePtr(void* aPtr) {
+    return reinterpret_cast<uintptr_t>(::EncodePointer(aPtr));
+  }
+
+  static uintptr_t DecodePtr(uintptr_t aEncodedPtr) {
+    return reinterpret_cast<uintptr_t>(
+        ::DecodePointer(reinterpret_cast<PVOID>(aEncodedPtr)));
+  }
+
+  bool IsValidAtOffset(const int8_t aOffset) const {
+    return mTargetBytes->IsValidAtOffset(aOffset);
+  }
+
+#if defined(_M_ARM64)
+
+  uint32_t ReadNextInstruction() {
+    mTargetBytes->EnsureLimit(mOffset + sizeof(uint32_t));
+    uint32_t instruction = *reinterpret_cast<const uint32_t*>(
+        mTargetBytes->GetLocalBytes() + mOffset);
+    mOffset += sizeof(uint32_t);
+    return instruction;
+  }
+
+  bool BackUpOneInstruction() {
+    if (mOffset < sizeof(uint32_t)) {
+      return false;
+    }
+
+    mOffset -= sizeof(uint32_t);
+    return true;
+  }
+
+#else
+
+  uint8_t const& operator*() const {
+    mTargetBytes->EnsureLimit(mOffset);
+    return *(mTargetBytes->GetLocalBytes() + mOffset);
+  }
+
+  uint8_t const& operator[](uint32_t aIndex) const {
+    mTargetBytes->EnsureLimit(mOffset + aIndex);
+    return *(mTargetBytes->GetLocalBytes() + mOffset + aIndex);
+  }
+
+  ReadOnlyTargetFunction& operator++() {
+    ++mOffset;
+    return *this;
+  }
+
+  ReadOnlyTargetFunction& operator+=(uint32_t aDelta) {
+    mOffset += aDelta;
+    return *this;
+  }
+
+  uintptr_t ReadDisp32AsAbsolute() {
+    mTargetBytes->EnsureLimit(mOffset + sizeof(int32_t));
+    int32_t disp = *reinterpret_cast<const int32_t*>(
+        mTargetBytes->GetLocalBytes() + mOffset);
+    uintptr_t result =
+        mTargetBytes->GetBase() + mOffset + sizeof(int32_t) + disp;
+    mOffset += sizeof(int32_t);
+    return result;
+  }
+
+  bool IsRelativeShortJump(uintptr_t* aOutTarget) {
+    if ((*this)[0] == 0xeb) {
+      int8_t offset = static_cast<int8_t>((*this)[1]);
+      *aOutTarget = GetAddress() + 2 + offset;
+      return true;
+    }
+    return false;
+  }
+
+#  if defined(_M_X64)
+  // Currently this function is used only in x64.
+  bool IsRelativeNearJump(uintptr_t* aOutTarget) {
+    if ((*this)[0] == 0xe9) {
+      *aOutTarget = (*this + 1).ReadDisp32AsAbsolute();
+      return true;
+    }
+    return false;
+  }
+#  endif  // defined(_M_X64)
+
+  bool IsIndirectNearJump(uintptr_t* aOutTarget) {
+    if ((*this)[0] == 0xff && (*this)[1] == 0x25) {
+#  if defined(_M_X64)
+      *aOutTarget = (*this + 2).ChasePointerFromDisp();
+#  else
+      *aOutTarget = (*this + 2).template ChasePointer<uintptr_t*>();
+#  endif  // defined(_M_X64)
+      return true;
+    }
+#  if defined(_M_X64)
+    else if ((*this)[0] == 0x48 && (*this)[1] == 0xff && (*this)[2] == 0x25) {
+      // According to Intel SDM, JMP does not have REX.W except JMP m16:64,
+      // but CPU can execute JMP r/m32 with REX.W.  We handle it just in case.
+      *aOutTarget = (*this + 3).ChasePointerFromDisp();
+      return true;
+    }
+#  endif  // defined(_M_X64)
+    return false;
+  }
+
+#endif  // defined(_M_ARM64)
+
+  void Rewind() { mOffset = 0; }
+
+  uint32_t GetOffset() const { return mOffset; }
+
+  uintptr_t OffsetToAbsolute(const uint8_t aOffset) const {
+    return mTargetBytes->GetBase() + mOffset + aOffset;
+  }
+
+  uintptr_t GetCurrentAbsolute() const { return OffsetToAbsolute(0); }
+
+  /**
+   * This method promotes the code referenced by this object to be writable.
+   *
+   * @param aLen    The length of the function's code to make writable. If set
+   *                to zero, this object's current offset is used as the length.
+   * @param aOffset The result's base address will be offset from this
+   *                object's base address by |aOffset| bytes. This value may be
+   *                negative.
+   */
+  WritableTargetFunction<MMPolicy> Promote(const uint32_t aLen = 0,
+                                           const int8_t aOffset = 0) const {
+    const uint32_t effectiveLength = aLen ? aLen : mOffset;
+    MOZ_RELEASE_ASSERT(effectiveLength,
+                       "Cannot Promote a zero-length function");
+
+    if (!mTargetBytes->IsValidAtOffset(aOffset)) {
+      return WritableTargetFunction<MMPolicy>(mTargetBytes->GetMMPolicy());
+    }
+
+    WritableTargetFunction<MMPolicy> result(mTargetBytes->GetMMPolicy(),
+                                            mTargetBytes->GetBase() + aOffset,
+                                            effectiveLength);
+
+    return result;
+  }
+
+ private:
+  template <typename T>
+  struct ChasePointerHelper {
+    template <typename MMPolicy_>
+    static T Result(const MMPolicy_&, T aValue) {
+      return aValue;
+    }
+  };
+
+  template <typename T>
+  struct ChasePointerHelper<T*> {
+    template <typename MMPolicy_>
+    static auto Result(const MMPolicy_& aPolicy, T* aValue) {
+      ReadOnlyTargetFunction<MMPolicy_> ptr(aPolicy, aValue);
+      return ptr.template ChasePointer<T>();
+    }
+  };
+
+ public:
+  // Keep chasing pointers until T is not a pointer type anymore
+  template <typename T>
+  auto ChasePointer() {
+    mTargetBytes->EnsureLimit(mOffset + sizeof(T));
+    const std::remove_cv_t<T> result =
+        *reinterpret_cast<const std::remove_cv_t<T>*>(
+            mTargetBytes->GetLocalBytes() + mOffset);
+    return ChasePointerHelper<std::remove_cv_t<T>>::Result(
+        mTargetBytes->GetMMPolicy(), result);
+  }
+
+  uintptr_t ChasePointerFromDisp() {
+    uintptr_t ptrFromDisp = ReadDisp32AsAbsolute();
+    ReadOnlyTargetFunction<MMPolicy> ptr(
+        mTargetBytes->GetMMPolicy(),
+        reinterpret_cast<const void*>(ptrFromDisp));
+    return ptr.template ChasePointer<uintptr_t>();
+  }
+
+ private:
+  ReadOnlyTargetFunction(const ReadOnlyTargetFunction& aOther)
+      : mTargetBytes(aOther.mTargetBytes), mOffset(aOther.mOffset) {}
+
+  ReadOnlyTargetFunction(const ReadOnlyTargetFunction& aOther,
+                         const uint32_t aOffsetFromOther)
+      : mTargetBytes(TargetBytesPtr<MMPolicy>::CopyFromOffset(
+            aOther.mTargetBytes, aOffsetFromOther)),
+        mOffset(0) {}
+
+ private:
+  mutable typename TargetBytesPtr<MMPolicy>::Type mTargetBytes;
+  uint32_t mOffset;
+};
+
+template <typename MMPolicy, typename T>
+class MOZ_STACK_CLASS TargetObject {
+  mutable typename TargetBytesPtr<MMPolicy>::Type mTargetBytes;
+
+  TargetObject(const MMPolicy& aMMPolicy, const void* aBaseAddress)
+      : mTargetBytes(TargetBytesPtr<MMPolicy>::Make(aMMPolicy, aBaseAddress)) {
+    mTargetBytes->EnsureLimit(sizeof(T));
+  }
+
+ public:
+  explicit TargetObject(const MMPolicy& aMMPolicy)
+      : mTargetBytes(TargetBytesPtr<MMPolicy>::Make(aMMPolicy, nullptr)) {}
+
+  TargetObject(const MMPolicy& aMMPolicy, uintptr_t aBaseAddress)
+      : TargetObject(aMMPolicy, reinterpret_cast<const void*>(aBaseAddress)) {}
+
+  TargetObject(const TargetObject&) = delete;
+  TargetObject(TargetObject&&) = delete;
+  TargetObject& operator=(const TargetObject&) = delete;
+  TargetObject& operator=(TargetObject&&) = delete;
+
+  explicit operator bool() const {
+    return mTargetBytes->GetBase() && mTargetBytes->GetLocalBytes();
+  }
+
+  const T* operator->() const {
+    return reinterpret_cast<const T*>(mTargetBytes->GetLocalBytes());
+  }
+
+  const T* GetLocalBase() const {
+    return reinterpret_cast<const T*>(mTargetBytes->GetLocalBytes());
+  }
+};
+
+template <typename MMPolicy, typename T>
+class MOZ_STACK_CLASS TargetObjectArray {
+  mutable typename TargetBytesPtr<MMPolicy>::Type mTargetBytes;
+  size_t mNumOfItems;
+
+  TargetObjectArray(const MMPolicy& aMMPolicy, const void* aBaseAddress,
+                    size_t aNumOfItems)
+      : mTargetBytes(TargetBytesPtr<MMPolicy>::Make(aMMPolicy, aBaseAddress)),
+        mNumOfItems(aNumOfItems) {
+    uint32_t itemsRead =
+        mTargetBytes->TryEnsureLimit(sizeof(T) * mNumOfItems) / sizeof(T);
+    // itemsRead may be bigger than the requested amount because of buffering,
+    // but mNumOfItems should not include extra bytes of buffering.
+    if (itemsRead < mNumOfItems) {
+      mNumOfItems = itemsRead;
+    }
+  }
+
+  const T* GetLocalBase() const {
+    return reinterpret_cast<const T*>(mTargetBytes->GetLocalBytes());
+  }
+
+ public:
+  explicit TargetObjectArray(const MMPolicy& aMMPolicy)
+      : mTargetBytes(TargetBytesPtr<MMPolicy>::Make(aMMPolicy, nullptr)),
+        mNumOfItems(0) {}
+
+  TargetObjectArray(const MMPolicy& aMMPolicy, uintptr_t aBaseAddress,
+                    size_t aNumOfItems)
+      : TargetObjectArray(aMMPolicy,
+                          reinterpret_cast<const void*>(aBaseAddress),
+                          aNumOfItems) {}
+
+  TargetObjectArray(const TargetObjectArray&) = delete;
+  TargetObjectArray(TargetObjectArray&&) = delete;
+  TargetObjectArray& operator=(const TargetObjectArray&) = delete;
+  TargetObjectArray& operator=(TargetObjectArray&&) = delete;
+
+  explicit operator bool() const {
+    return mTargetBytes->GetBase() && mNumOfItems;
+  }
+
+  const T* operator[](size_t aIndex) const {
+    if (aIndex >= mNumOfItems) {
+      return nullptr;
+    }
+
+    return &GetLocalBase()[aIndex];
+  }
+
+  template <typename Comparator>
+  bool BinarySearchIf(const Comparator& aCompare,
+                      size_t* aMatchOrInsertionPoint) const {
+    return mozilla::BinarySearchIf(GetLocalBase(), 0, mNumOfItems, aCompare,
+                                   aMatchOrInsertionPoint);
+  }
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // mozilla_interceptor_TargetFunction_h
diff --git a/mozglue/misc/interceptor/Trampoline.h b/mozglue/misc/interceptor/Trampoline.h
new file mode 100644
index 0000000000..c471408bd1
--- /dev/null
+++ b/mozglue/misc/interceptor/Trampoline.h
@@ -0,0 +1,517 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_Trampoline_h
+#define mozilla_interceptor_Trampoline_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Types.h"
+#include "mozilla/WindowsProcessMitigations.h"
+
+namespace mozilla {
+namespace interceptor {
+
+template <typename MMPolicy>
+class MOZ_STACK_CLASS Trampoline final {
+ public:
+  Trampoline(const MMPolicy* aMMPolicy, uint8_t* const aLocalBase,
+             const uintptr_t aRemoteBase, const uint32_t aChunkSize)
+      : mMMPolicy(aMMPolicy),
+        mPrevLocalProt(0),
+        mLocalBase(aLocalBase),
+        mRemoteBase(aRemoteBase),
+        mOffset(0),
+        mExeOffset(0),
+        mMaxOffset(aChunkSize),
+        mAccumulatedStatus(true) {
+    if (!::VirtualProtect(aLocalBase, aChunkSize,
+                          MMPolicy::GetTrampWriteProtFlags(),
+                          &mPrevLocalProt)) {
+      mPrevLocalProt = 0;
+    }
+  }
+
+  Trampoline(Trampoline&& aOther)
+      : mMMPolicy(aOther.mMMPolicy),
+        mPrevLocalProt(aOther.mPrevLocalProt),
+        mLocalBase(aOther.mLocalBase),
+        mRemoteBase(aOther.mRemoteBase),
+        mOffset(aOther.mOffset),
+        mExeOffset(aOther.mExeOffset),
+        mMaxOffset(aOther.mMaxOffset),
+        mAccumulatedStatus(aOther.mAccumulatedStatus) {
+    aOther.mPrevLocalProt = 0;
+    aOther.mAccumulatedStatus = false;
+  }
+
+  MOZ_IMPLICIT Trampoline(decltype(nullptr))
+      : mMMPolicy(nullptr),
+        mPrevLocalProt(0),
+        mLocalBase(nullptr),
+        mRemoteBase(0),
+        mOffset(0),
+        mExeOffset(0),
+        mMaxOffset(0),
+        mAccumulatedStatus(false) {}
+
+  Trampoline(const Trampoline&) = delete;
+  Trampoline& operator=(const Trampoline&) = delete;
+
+  Trampoline& operator=(Trampoline&& aOther) {
+    Clear();
+
+    mMMPolicy = aOther.mMMPolicy;
+    mPrevLocalProt = aOther.mPrevLocalProt;
+    mLocalBase = aOther.mLocalBase;
+    mRemoteBase = aOther.mRemoteBase;
+    mOffset = aOther.mOffset;
+    mExeOffset = aOther.mExeOffset;
+    mMaxOffset = aOther.mMaxOffset;
+    mAccumulatedStatus = aOther.mAccumulatedStatus;
+
+    aOther.mPrevLocalProt = 0;
+    aOther.mAccumulatedStatus = false;
+
+    return *this;
+  }
+
+  ~Trampoline() { Clear(); }
+
+  explicit operator bool() const {
+    return IsNull() ||
+           (mLocalBase && mRemoteBase && mPrevLocalProt && mAccumulatedStatus);
+  }
+
+  bool IsNull() const { return !mMMPolicy; }
+
+#if defined(_M_ARM64)
+
+  void WriteInstruction(uint32_t aInstruction) {
+    const uint32_t kDelta = sizeof(uint32_t);
+
+    if (!mMMPolicy) {
+      // Null tramp, just track offset
+      mOffset += kDelta;
+      return;
+    }
+
+    if (mOffset + kDelta > mMaxOffset) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    *reinterpret_cast<uint32_t*>(mLocalBase + mOffset) = aInstruction;
+    mOffset += kDelta;
+  }
+
+  void WriteLoadLiteral(const uintptr_t aAddress, const uint8_t aReg) {
+    const uint32_t kDelta = sizeof(uint32_t) + sizeof(uintptr_t);
+
+    if (!mMMPolicy) {
+      // Null tramp, just track offset
+      mOffset += kDelta;
+      return;
+    }
+
+    // We grow the literal pool from the *end* of the tramp,
+    // so we need to ensure that there is enough room for both an instruction
+    // and a pointer
+    if (mOffset + kDelta > mMaxOffset) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    mMaxOffset -= sizeof(uintptr_t);
+    *reinterpret_cast<uintptr_t*>(mLocalBase + mMaxOffset) = aAddress;
+
+    CheckedInt<intptr_t> pc(GetCurrentRemoteAddress());
+    if (!pc.isValid()) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    CheckedInt<intptr_t> literal(reinterpret_cast<uintptr_t>(mLocalBase) +
+                                 mMaxOffset);
+    if (!literal.isValid()) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    CheckedInt<intptr_t> ptrOffset = (literal - pc);
+    if (!ptrOffset.isValid()) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    // ptrOffset must be properly aligned
+    MOZ_ASSERT((ptrOffset.value() % 4) == 0);
+    ptrOffset /= 4;
+
+    CheckedInt<int32_t> offset(ptrOffset.value());
+    if (!offset.isValid()) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    // Ensure that offset falls within the range of a signed 19-bit value
+    if (offset.value() < -0x40000 || offset.value() > 0x3FFFF) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    const int32_t kimm19Mask = 0x7FFFF;
+    int32_t masked = offset.value() & kimm19Mask;
+
+    MOZ_ASSERT(aReg < 32);
+    uint32_t loadInstr = 0x58000000 | (masked << 5) | aReg;
+    WriteInstruction(loadInstr);
+  }
+
+#else
+
+  void WriteByte(uint8_t aValue) {
+    const uint32_t kDelta = sizeof(uint8_t);
+
+    if (!mMMPolicy) {
+      // Null tramp, just track offset
+      mOffset += kDelta;
+      return;
+    }
+
+    if (mOffset >= mMaxOffset) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    *(mLocalBase + mOffset) = aValue;
+    ++mOffset;
+  }
+
+  void WriteInteger(int32_t aValue) {
+    const uint32_t kDelta = sizeof(int32_t);
+
+    if (!mMMPolicy) {
+      // Null tramp, just track offset
+      mOffset += kDelta;
+      return;
+    }
+
+    if (mOffset + kDelta > mMaxOffset) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    *reinterpret_cast<int32_t*>(mLocalBase + mOffset) = aValue;
+    mOffset += kDelta;
+  }
+
+  void WriteDisp32(uintptr_t aAbsTarget) {
+    const uint32_t kDelta = sizeof(int32_t);
+
+    if (!mMMPolicy) {
+      // Null tramp, just track offset
+      mOffset += kDelta;
+      return;
+    }
+
+    if (mOffset + kDelta > mMaxOffset) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    // This needs to be computed from the remote location
+    intptr_t remoteTrampPosition = static_cast<intptr_t>(mRemoteBase + mOffset);
+
+    intptr_t diff =
+        static_cast<intptr_t>(aAbsTarget) - (remoteTrampPosition + kDelta);
+
+    CheckedInt<int32_t> checkedDisp(diff);
+    MOZ_ASSERT(checkedDisp.isValid());
+    if (!checkedDisp.isValid()) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    int32_t disp = checkedDisp.value();
+    *reinterpret_cast<int32_t*>(mLocalBase + mOffset) = disp;
+    mOffset += kDelta;
+  }
+
+#endif
+
+  void WritePointer(uintptr_t aValue) {
+    const uint32_t kDelta = sizeof(uintptr_t);
+
+    if (!mMMPolicy) {
+      // Null tramp, just track offset
+      mOffset += kDelta;
+      return;
+    }
+
+    if (mOffset + kDelta > mMaxOffset) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    *reinterpret_cast<uintptr_t*>(mLocalBase + mOffset) = aValue;
+    mOffset += kDelta;
+  }
+
+  void WriteEncodedPointer(void* aValue) {
+    uintptr_t encoded = ReadOnlyTargetFunction<MMPolicy>::EncodePtr(aValue);
+    WritePointer(encoded);
+  }
+
+  Maybe<uintptr_t> ReadPointer() {
+    if (mOffset + sizeof(uintptr_t) > mMaxOffset) {
+      mAccumulatedStatus = false;
+      return Nothing();
+    }
+
+    auto result = Some(*reinterpret_cast<uintptr_t*>(mLocalBase + mOffset));
+    mOffset += sizeof(uintptr_t);
+    return std::move(result);
+  }
+
+  Maybe<uintptr_t> ReadEncodedPointer() {
+    Maybe<uintptr_t> encoded(ReadPointer());
+    if (!encoded) {
+      return encoded;
+    }
+
+    return Some(ReadOnlyTargetFunction<MMPolicy>::DecodePtr(encoded.value()));
+  }
+
+#if defined(_M_IX86)
+  // 32-bit only
+  void AdjustDisp32AtOffset(uint32_t aOffset, uintptr_t aAbsTarget) {
+    uint32_t effectiveOffset = mExeOffset + aOffset;
+
+    if (effectiveOffset + sizeof(int32_t) > mMaxOffset) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    intptr_t diff = static_cast<intptr_t>(aAbsTarget) -
+                    static_cast<intptr_t>(mRemoteBase + mExeOffset);
+    *reinterpret_cast<int32_t*>(mLocalBase + effectiveOffset) += diff;
+  }
+#endif  // defined(_M_IX86)
+
+  void CopyFrom(uintptr_t aOrigBytes, uint32_t aNumBytes) {
+    if (!mMMPolicy) {
+      // Null tramp, just track offset
+      mOffset += aNumBytes;
+      return;
+    }
+
+    if (!mMMPolicy || mOffset + aNumBytes > mMaxOffset) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    if (!mMMPolicy->Read(mLocalBase + mOffset,
+                         reinterpret_cast<void*>(aOrigBytes), aNumBytes)) {
+      mAccumulatedStatus = false;
+      return;
+    }
+
+    mOffset += aNumBytes;
+  }
+
+  void Rewind() { mOffset = 0; }
+
+  uintptr_t GetCurrentRemoteAddress() const { return mRemoteBase + mOffset; }
+
+  void StartExecutableCode() {
+    MOZ_ASSERT(!mExeOffset);
+    mExeOffset = mOffset;
+  }
+
+  void* EndExecutableCode() const {
+    if (!mAccumulatedStatus || !mMMPolicy) {
+      return nullptr;
+    }
+
+    // This must always return the start address the executable code
+    // *in the target process*
+    return reinterpret_cast<void*>(mRemoteBase + mExeOffset);
+  }
+
+  uint32_t GetCurrentExecutableCodeLen() const { return mOffset - mExeOffset; }
+
+  Trampoline<MMPolicy>& operator--() {
+    MOZ_ASSERT(mOffset);
+    --mOffset;
+    return *this;
+  }
+
+ private:
+  void Clear() {
+    if (!mLocalBase || !mPrevLocalProt) {
+      return;
+    }
+
+    DebugOnly<bool> ok = !!::VirtualProtect(mLocalBase, mMaxOffset,
+                                            mPrevLocalProt, &mPrevLocalProt);
+    MOZ_ASSERT(ok);
+
+    mLocalBase = nullptr;
+    mRemoteBase = 0;
+    mPrevLocalProt = 0;
+    mAccumulatedStatus = false;
+  }
+
+ private:
+  const MMPolicy* mMMPolicy;
+  DWORD mPrevLocalProt;
+  uint8_t* mLocalBase;
+  uintptr_t mRemoteBase;
+  uint32_t mOffset;
+  uint32_t mExeOffset;
+  uint32_t mMaxOffset;
+  bool mAccumulatedStatus;
+};
+
+template <typename MMPolicy>
+class MOZ_STACK_CLASS TrampolineCollection final {
+ public:
+  class MOZ_STACK_CLASS TrampolineIterator final {
+   public:
+    Trampoline<MMPolicy> operator*() {
+      uint32_t offset = mCurTramp * mCollection.mTrampSize;
+      return Trampoline<MMPolicy>(nullptr, mCollection.mLocalBase + offset,
+                                  mCollection.mRemoteBase + offset,
+                                  mCollection.mTrampSize);
+    }
+
+    TrampolineIterator& operator++() {
+      ++mCurTramp;
+      return *this;
+    }
+
+    bool operator!=(const TrampolineIterator& aOther) const {
+      return mCurTramp != aOther.mCurTramp;
+    }
+
+   private:
+    explicit TrampolineIterator(
+        const TrampolineCollection<MMPolicy>& aCollection,
+        const uint32_t aCurTramp = 0)
+        : mCollection(aCollection), mCurTramp(aCurTramp) {}
+
+    const TrampolineCollection<MMPolicy>& mCollection;
+    uint32_t mCurTramp;
+
+    friend class TrampolineCollection<MMPolicy>;
+  };
+
+  explicit TrampolineCollection(const MMPolicy& aMMPolicy)
+      : mMMPolicy(aMMPolicy),
+        mLocalBase(0),
+        mRemoteBase(0),
+        mTrampSize(0),
+        mNumTramps(0),
+        mPrevProt(0),
+        mCS(nullptr) {}
+
+  TrampolineCollection(const MMPolicy& aMMPolicy, uint8_t* const aLocalBase,
+                       const uintptr_t aRemoteBase, const uint32_t aTrampSize,
+                       const uint32_t aNumTramps)
+      : mMMPolicy(aMMPolicy),
+        mLocalBase(aLocalBase),
+        mRemoteBase(aRemoteBase),
+        mTrampSize(aTrampSize),
+        mNumTramps(aNumTramps),
+        mPrevProt(0),
+        mCS(nullptr) {
+    if (!aNumTramps) {
+      return;
+    }
+
+    BOOL ok = mMMPolicy.Protect(aLocalBase, aNumTramps * aTrampSize,
+                                PAGE_EXECUTE_READWRITE, &mPrevProt);
+    if (!ok) {
+      // When destroying a sandboxed process that uses
+      // MITIGATION_DYNAMIC_CODE_DISABLE, we won't be allowed to write to our
+      // executable memory so we just do nothing.  If we fail to get access
+      // to memory for any other reason, we still don't want to crash but we
+      // do assert.
+      MOZ_ASSERT(IsDynamicCodeDisabled());
+      mNumTramps = 0;
+      mPrevProt = 0;
+    }
+  }
+
+  ~TrampolineCollection() {
+    if (!mPrevProt) {
+      return;
+    }
+
+    mMMPolicy.Protect(mLocalBase, mNumTramps * mTrampSize, mPrevProt,
+                      &mPrevProt);
+
+    if (mCS) {
+      ::LeaveCriticalSection(mCS);
+    }
+  }
+
+  void Lock(CRITICAL_SECTION& aCS) {
+    if (!mPrevProt || mCS) {
+      return;
+    }
+
+    mCS = &aCS;
+    ::EnterCriticalSection(&aCS);
+  }
+
+  TrampolineIterator begin() const {
+    if (!mPrevProt) {
+      return end();
+    }
+
+    return TrampolineIterator(*this);
+  }
+
+  TrampolineIterator end() const {
+    return TrampolineIterator(*this, mNumTramps);
+  }
+
+  TrampolineCollection(const TrampolineCollection&) = delete;
+  TrampolineCollection& operator=(const TrampolineCollection&) = delete;
+  TrampolineCollection& operator=(TrampolineCollection&&) = delete;
+
+  TrampolineCollection(TrampolineCollection&& aOther)
+      : mMMPolicy(aOther.mMMPolicy),
+        mLocalBase(aOther.mLocalBase),
+        mRemoteBase(aOther.mRemoteBase),
+        mTrampSize(aOther.mTrampSize),
+        mNumTramps(aOther.mNumTramps),
+        mPrevProt(aOther.mPrevProt),
+        mCS(aOther.mCS) {
+    aOther.mPrevProt = 0;
+    aOther.mCS = nullptr;
+  }
+
+ private:
+  const MMPolicy& mMMPolicy;
+  uint8_t* const mLocalBase;
+  const uintptr_t mRemoteBase;
+  const uint32_t mTrampSize;
+  uint32_t mNumTramps;
+  uint32_t mPrevProt;
+  CRITICAL_SECTION* mCS;
+
+  friend class TrampolineIterator;
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // mozilla_interceptor_Trampoline_h
diff --git a/mozglue/misc/interceptor/VMSharingPolicies.h b/mozglue/misc/interceptor/VMSharingPolicies.h
new file mode 100644
index 0000000000..8f93f5c1ad
--- /dev/null
+++ b/mozglue/misc/interceptor/VMSharingPolicies.h
@@ -0,0 +1,285 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_VMSharingPolicies_h
+#define mozilla_interceptor_VMSharingPolicies_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/Types.h"
+
+namespace mozilla {
+namespace interceptor {
+
+/**
+ * This class is an abstraction of a reservation of virtual address space that
+ * has been obtained from a VMSharingPolicy via the policy's |Reserve| method.
+ *
+ * TrampolinePool allows us to obtain a trampoline without needing to concern
+ * ourselves with the underlying implementation of the VM sharing policy.
+ *
+ * For example, VMSharingPolicyShared delegates to VMSharingPolicyUnique, but
+ * also requires taking a lock before doing so. By invoking |GetNextTrampoline|
+ * on a TrampolinePool, the caller does not need to concern themselves with
+ * that detail.
+ *
+ * We accompolish this with a recursive implementation that provides an inner
+ * TrampolinePool that is provided by the delegated VMSharingPolicy.
+ */
+template <typename VMPolicyT, typename InnerT>
+class MOZ_STACK_CLASS TrampolinePool final {
+ public:
+  TrampolinePool(TrampolinePool&& aOther) = default;
+
+  TrampolinePool(VMPolicyT& aVMPolicy, InnerT&& aInner)
+      : mVMPolicy(aVMPolicy), mInner(std::move(aInner)) {}
+
+  TrampolinePool& operator=(TrampolinePool&& aOther) = delete;
+  TrampolinePool(const TrampolinePool&) = delete;
+  TrampolinePool& operator=(const TrampolinePool&) = delete;
+
+  using MMPolicyT = typename VMPolicyT::MMPolicyT;
+
+  Maybe<Trampoline<MMPolicyT>> GetNextTrampoline() {
+    return mVMPolicy.GetNextTrampoline(mInner);
+  }
+
+#if defined(_M_X64)
+  bool IsInLowest2GB() const {
+    return mVMPolicy.IsTrampolineSpaceInLowest2GB(mInner);
+  }
+#endif  // defined(_M_X64)
+
+ private:
+  VMPolicyT& mVMPolicy;
+  InnerT mInner;
+};
+
+/**
+ * This specialization is the base case for TrampolinePool, and is used by
+ * VMSharingPolicyUnique (since that policy does not delegate anything).
+ */
+template <typename VMPolicyT>
+class MOZ_STACK_CLASS TrampolinePool<VMPolicyT, decltype(nullptr)> final {
+ public:
+  explicit TrampolinePool(VMPolicyT& aVMPolicy) : mVMPolicy(aVMPolicy) {}
+
+  TrampolinePool(TrampolinePool&& aOther) = default;
+
+  TrampolinePool& operator=(TrampolinePool&& aOther) = delete;
+  TrampolinePool(const TrampolinePool&) = delete;
+  TrampolinePool& operator=(const TrampolinePool&) = delete;
+
+  using MMPolicyT = typename VMPolicyT::MMPolicyT;
+
+  Maybe<Trampoline<MMPolicyT>> GetNextTrampoline() {
+    return mVMPolicy.GetNextTrampoline();
+  }
+
+#if defined(_M_X64)
+  bool IsInLowest2GB() const {
+    return mVMPolicy.IsTrampolineSpaceInLowest2GB();
+  }
+#endif  // defined(_M_X64)
+
+ private:
+  VMPolicyT& mVMPolicy;
+};
+
+template <typename MMPolicy>
+class VMSharingPolicyUnique : public MMPolicy {
+  using ThisType = VMSharingPolicyUnique<MMPolicy>;
+
+ public:
+  using PoolType = TrampolinePool<ThisType, decltype(nullptr)>;
+
+  template <typename... Args>
+  explicit VMSharingPolicyUnique(Args&&... aArgs)
+      : MMPolicy(std::forward<Args>(aArgs)...), mNextChunkIndex(0) {}
+
+  Maybe<PoolType> Reserve(const uintptr_t aPivotAddr,
+                          const uint32_t aMaxDistanceFromPivot) {
+    // Win32 allocates VM addresses at a 64KiB granularity, so we might as well
+    // utilize that entire 64KiB reservation.
+    uint32_t len = MMPolicy::GetAllocGranularity();
+
+    Maybe<Span<const uint8_t>> maybeBounds = MMPolicy::SpanFromPivotAndDistance(
+        len, aPivotAddr, aMaxDistanceFromPivot);
+
+    return Reserve(len, maybeBounds);
+  }
+
+  Maybe<PoolType> Reserve(const uint32_t aSize,
+                          const Maybe<Span<const uint8_t>>& aBounds) {
+    uint32_t bytesReserved = MMPolicy::Reserve(aSize, aBounds);
+    if (!bytesReserved) {
+      return Nothing();
+    }
+
+    return Some(PoolType(*this));
+  }
+
+  TrampolineCollection<MMPolicy> Items() const {
+    return TrampolineCollection<MMPolicy>(*this, this->GetLocalView(),
+                                          this->GetRemoteView(), kChunkSize,
+                                          mNextChunkIndex);
+  }
+
+  void Clear() { mNextChunkIndex = 0; }
+
+  ~VMSharingPolicyUnique() = default;
+
+  VMSharingPolicyUnique(const VMSharingPolicyUnique&) = delete;
+  VMSharingPolicyUnique& operator=(const VMSharingPolicyUnique&) = delete;
+
+  VMSharingPolicyUnique(VMSharingPolicyUnique&& aOther)
+      : MMPolicy(std::move(aOther)), mNextChunkIndex(aOther.mNextChunkIndex) {
+    aOther.mNextChunkIndex = 0;
+  }
+
+  VMSharingPolicyUnique& operator=(VMSharingPolicyUnique&& aOther) {
+    static_cast<MMPolicy&>(*this) = std::move(aOther);
+    mNextChunkIndex = aOther.mNextChunkIndex;
+    aOther.mNextChunkIndex = 0;
+    return *this;
+  }
+
+ protected:
+  // In VMSharingPolicyUnique we do not implement the overload that accepts
+  // an inner trampoline pool, as this policy is expected to be the
+  // implementation of the base case.
+  Maybe<Trampoline<MMPolicy>> GetNextTrampoline() {
+    uint32_t offset = mNextChunkIndex * kChunkSize;
+    if (!this->MaybeCommitNextPage(offset, kChunkSize)) {
+      return Nothing();
+    }
+
+    Trampoline<MMPolicy> result(this, this->GetLocalView() + offset,
+                                this->GetRemoteView() + offset, kChunkSize);
+    if (!!result) {
+      ++mNextChunkIndex;
+    }
+
+    return Some(std::move(result));
+  }
+
+ private:
+  uint32_t mNextChunkIndex;
+  static const uint32_t kChunkSize = 128;
+
+  template <typename VMPolicyT, typename FriendT>
+  friend class TrampolinePool;
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+// We don't include RangeMap.h until this point because it depends on the
+// TrampolinePool definitions from above.
+#include "mozilla/interceptor/RangeMap.h"
+
+namespace mozilla {
+namespace interceptor {
+
+// We only support this policy for in-proc MMPolicy.
+class MOZ_TRIVIAL_CTOR_DTOR VMSharingPolicyShared : public MMPolicyInProcess {
+  typedef VMSharingPolicyUnique<MMPolicyInProcess> UniquePolicyT;
+  typedef VMSharingPolicyShared ThisType;
+
+ public:
+  using PoolType = TrampolinePool<ThisType, UniquePolicyT::PoolType>;
+  using MMPolicyT = MMPolicyInProcess;
+
+  constexpr VMSharingPolicyShared() {}
+
+  bool ShouldUnhookUponDestruction() const { return false; }
+
+  Maybe<PoolType> Reserve(const uintptr_t aPivotAddr,
+                          const uint32_t aMaxDistanceFromPivot) {
+    // Win32 allocates VM addresses at a 64KiB granularity, so we might as well
+    // utilize that entire 64KiB reservation.
+    uint32_t len = this->GetAllocGranularity();
+
+    Maybe<Span<const uint8_t>> maybeBounds =
+        MMPolicyInProcess::SpanFromPivotAndDistance(len, aPivotAddr,
+                                                    aMaxDistanceFromPivot);
+
+    AutoCriticalSection lock(GetCS());
+    VMSharingPolicyUnique<MMPolicyT>* uniquePol = sVMMap.GetPolicy(maybeBounds);
+    MOZ_ASSERT(uniquePol);
+    if (!uniquePol) {
+      return Nothing();
+    }
+
+    Maybe<UniquePolicyT::PoolType> maybeUnique =
+        uniquePol->Reserve(len, maybeBounds);
+    if (!maybeUnique) {
+      return Nothing();
+    }
+
+    return Some(PoolType(*this, std::move(maybeUnique.ref())));
+  }
+
+  TrampolineCollection<MMPolicyInProcess> Items() const {
+    // Since ShouldUnhookUponDestruction returns false, this can be empty
+    return TrampolineCollection<MMPolicyInProcess>(*this);
+  }
+
+  void Clear() {
+    // This must be a no-op for shared VM policy; we can't have one interceptor
+    // wiping out trampolines for all interceptors in the process.
+  }
+
+  VMSharingPolicyShared(const VMSharingPolicyShared&) = delete;
+  VMSharingPolicyShared(VMSharingPolicyShared&&) = delete;
+  VMSharingPolicyShared& operator=(const VMSharingPolicyShared&) = delete;
+  VMSharingPolicyShared& operator=(VMSharingPolicyShared&&) = delete;
+
+ private:
+  static CRITICAL_SECTION* GetCS() {
+    static const bool isAlloc = []() -> bool {
+      DWORD flags = 0;
+#if defined(RELEASE_OR_BETA)
+      flags |= CRITICAL_SECTION_NO_DEBUG_INFO;
+#endif  // defined(RELEASE_OR_BETA)
+      ::InitializeCriticalSectionEx(&sCS, 4000, flags);
+      return true;
+    }();
+    Unused << isAlloc;
+
+    return &sCS;
+  }
+
+  // In VMSharingPolicyShared, we only implement the overload that accepts
+  // a VMSharingPolicyUnique trampoline pool as |aInner|, since we require the
+  // former policy to wrap the latter.
+  Maybe<Trampoline<MMPolicyInProcess>> GetNextTrampoline(
+      UniquePolicyT::PoolType& aInner) {
+    AutoCriticalSection lock(GetCS());
+    return aInner.GetNextTrampoline();
+  }
+
+#if defined(_M_X64)
+  bool IsTrampolineSpaceInLowest2GB(
+      const UniquePolicyT::PoolType& aInner) const {
+    AutoCriticalSection lock(GetCS());
+    return aInner.IsInLowest2GB();
+  }
+#endif  // defined(_M_X64)
+
+ private:
+  template <typename VMPolicyT, typename InnerT>
+  friend class TrampolinePool;
+
+  inline static RangeMap<MMPolicyInProcess> sVMMap;
+  inline static CRITICAL_SECTION sCS;
+};
+
+}  // namespace interceptor
+}  // namespace mozilla
+
+#endif  // mozilla_interceptor_VMSharingPolicies_h
diff --git a/mozglue/misc/interceptor/moz.build b/mozglue/misc/interceptor/moz.build
new file mode 100644
index 0000000000..6966dc7543
--- /dev/null
+++ b/mozglue/misc/interceptor/moz.build
@@ -0,0 +1,24 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS.mozilla.interceptor += [
+    "Arm64.h",
+    "MMPolicies.h",
+    "PatcherBase.h",
+    "PatcherDetour.h",
+    "PatcherNopSpace.h",
+    "RangeMap.h",
+    "TargetFunction.h",
+    "Trampoline.h",
+    "VMSharingPolicies.h",
+]
+
+if CONFIG["CPU_ARCH"] == "aarch64":
+    FINAL_LIBRARY = "mozglue"
+
+    UNIFIED_SOURCES += [
+        "Arm64.cpp",
+    ]
diff --git a/mozglue/misc/moz.build b/mozglue/misc/moz.build
new file mode 100644
index 0000000000..ab2855d403
--- /dev/null
+++ b/mozglue/misc/moz.build
@@ -0,0 +1,107 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+FINAL_LIBRARY = "mozglue"
+
+EXPORTS.mozilla += [
+    "AutoProfilerLabel.h",
+    "decimal/Decimal.h",
+    "decimal/DoubleConversion.h",
+    "MmapFaultHandler.h",
+    "PlatformConditionVariable.h",
+    "PlatformMutex.h",
+    "Printf.h",
+    "StackWalk.h",
+    "TimeStamp.h",
+    "Uptime.h",
+]
+
+EXPORTS.mozilla.glue += [
+    "Debug.h",
+    "WinUtils.h",
+]
+
+if CONFIG["OS_ARCH"] == "WINNT":
+    EXPORTS.mozilla += [
+        "PreXULSkeletonUI.h",
+        "StackWalk_windows.h",
+        "TimeStamp_windows.h",
+        "WindowsDpiAwareness.h",
+    ]
+
+SOURCES += [
+    "AutoProfilerLabel.cpp",
+    "MmapFaultHandler.cpp",
+    "Printf.cpp",
+    "StackWalk.cpp",
+    "TimeStamp.cpp",
+    "Uptime.cpp",
+]
+
+OS_LIBS += CONFIG["REALTIME_LIBS"]
+
+if CONFIG["OS_ARCH"] == "WINNT":
+    DIRS += [
+        "interceptor",
+    ]
+    EXPORTS += [
+        "nsWindowsDllInterceptor.h",
+    ]
+    EXPORTS.mozilla += [
+        "DynamicallyLinkedFunctionPtr.h",
+        "ImportDir.h",
+        "NativeNt.h",
+        "WindowsMapRemoteView.h",
+        "WindowsProcessMitigations.h",
+    ]
+    EXPORTS.mozilla.glue += [
+        "WindowsUnicode.h",
+    ]
+    SOURCES += [
+        "PreXULSkeletonUI.cpp",
+        "TimeStamp_windows.cpp",
+        "WindowsMapRemoteView.cpp",
+        "WindowsProcessMitigations.cpp",
+        "WindowsUnicode.cpp",
+    ]
+    OS_LIBS += ["dbghelp"]
+elif CONFIG["HAVE_CLOCK_MONOTONIC"]:
+    SOURCES += [
+        "TimeStamp_posix.cpp",
+    ]
+elif CONFIG["OS_ARCH"] == "Darwin":
+    SOURCES += [
+        "TimeStamp_darwin.cpp",
+    ]
+elif CONFIG["COMPILE_ENVIRONMENT"]:
+    error("No TimeStamp implementation on this platform.  Build will not succeed")
+
+if CONFIG["OS_ARCH"] == "WINNT":
+    SOURCES += [
+        "ConditionVariable_windows.cpp",
+        "Mutex_windows.cpp",
+    ]
+else:
+    SOURCES += [
+        "ConditionVariable_posix.cpp",
+        "Mutex_posix.cpp",
+    ]
+
+if CONFIG["MOZ_LINKER"] and CONFIG["MOZ_WIDGET_TOOLKIT"] == "android":
+    LOCAL_INCLUDES += [
+        "/mozglue/linker",
+    ]
+
+SOURCES += [
+    "decimal/Decimal.cpp",
+]
+
+if CONFIG["CC_TYPE"] == "clang":
+    # Suppress warnings from third-party V8 Decimal code.
+    SOURCES["decimal/Decimal.cpp"].flags += ["-Wno-implicit-fallthrough"]
+
+for var in ("MOZ_APP_BASENAME", "MOZ_APP_VENDOR"):
+    DEFINES[var] = '"%s"' % CONFIG[var]
diff --git a/mozglue/misc/nsWindowsDllInterceptor.h b/mozglue/misc/nsWindowsDllInterceptor.h
new file mode 100644
index 0000000000..d6afc9bb7f
--- /dev/null
+++ b/mozglue/misc/nsWindowsDllInterceptor.h
@@ -0,0 +1,819 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NS_WINDOWS_DLL_INTERCEPTOR_H_
+#define NS_WINDOWS_DLL_INTERCEPTOR_H_
+
+#include <wchar.h>
+#include <windows.h>
+#include <winternl.h>
+
+#include <utility>
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/NativeNt.h"
+#include "mozilla/Tuple.h"
+#include "mozilla/Types.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include "mozilla/interceptor/MMPolicies.h"
+#include "mozilla/interceptor/PatcherDetour.h"
+#include "mozilla/interceptor/PatcherNopSpace.h"
+#include "mozilla/interceptor/VMSharingPolicies.h"
+#include "nsWindowsHelpers.h"
+
+/*
+ * Simple function interception.
+ *
+ * We have two separate mechanisms for intercepting a function: We can use the
+ * built-in nop space, if it exists, or we can create a detour.
+ *
+ * Using the built-in nop space works as follows: On x86-32, DLL functions
+ * begin with a two-byte nop (mov edi, edi) and are preceeded by five bytes of
+ * NOP instructions.
+ *
+ * When we detect a function with this prelude, we do the following:
+ *
+ * 1. Write a long jump to our interceptor function into the five bytes of NOPs
+ *    before the function.
+ *
+ * 2. Write a short jump -5 into the two-byte nop at the beginning of the
+ *    function.
+ *
+ * This mechanism is nice because it's thread-safe.  It's even safe to do if
+ * another thread is currently running the function we're modifying!
+ *
+ * When the WindowsDllNopSpacePatcher is destroyed, we overwrite the short jump
+ * but not the long jump, so re-intercepting the same function won't work,
+ * because its prelude won't match.
+ *
+ *
+ * Unfortunately nop space patching doesn't work on functions which don't have
+ * this magic prelude (and in particular, x86-64 never has the prelude).  So
+ * when we can't use the built-in nop space, we fall back to using a detour,
+ * which works as follows:
+ *
+ * 1. Save first N bytes of OrigFunction to trampoline, where N is a
+ *    number of bytes >= 5 that are instruction aligned.
+ *
+ * 2. Replace first 5 bytes of OrigFunction with a jump to the Hook
+ *    function.
+ *
+ * 3. After N bytes of the trampoline, add a jump to OrigFunction+N to
+ *    continue original program flow.
+ *
+ * 4. Hook function needs to call the trampoline during its execution,
+ *    to invoke the original function (so address of trampoline is
+ *    returned).
+ *
+ * When the WindowsDllDetourPatcher object is destructed, OrigFunction is
+ * patched again to jump directly to the trampoline instead of going through
+ * the hook function. As such, re-intercepting the same function won't work, as
+ * jump instructions are not supported.
+ *
+ * Note that this is not thread-safe.  Sad day.
+ *
+ */
+
+#if defined(_M_IX86) && defined(__clang__) && __has_declspec_attribute(guard)
+// On x86, nop-space patches return to the second instruction of their target.
+// This is a deliberate violation of Control Flow Guard, so disable the check.
+#  define INTERCEPTOR_DISABLE_CFGUARD __declspec(guard(nocf))
+#else
+#  define INTERCEPTOR_DISABLE_CFGUARD /* nothing */
+#endif
+
+namespace mozilla {
+namespace interceptor {
+
+template <typename T>
+struct OriginalFunctionPtrTraits;
+
+template <typename R, typename... Args>
+struct OriginalFunctionPtrTraits<R (*)(Args...)> {
+  using ReturnType = R;
+};
+
+#if defined(_M_IX86)
+template <typename R, typename... Args>
+struct OriginalFunctionPtrTraits<R(__stdcall*)(Args...)> {
+  using ReturnType = R;
+};
+
+template <typename R, typename... Args>
+struct OriginalFunctionPtrTraits<R(__fastcall*)(Args...)> {
+  using ReturnType = R;
+};
+#endif  // defined(_M_IX86)
+
+template <typename InterceptorT, typename FuncPtrT>
+class FuncHook final {
+ public:
+  using ThisType = FuncHook<InterceptorT, FuncPtrT>;
+  using ReturnType = typename OriginalFunctionPtrTraits<FuncPtrT>::ReturnType;
+
+  constexpr FuncHook() : mOrigFunc(nullptr), mInitOnce(INIT_ONCE_STATIC_INIT) {}
+
+  ~FuncHook() = default;
+
+  bool Set(InterceptorT& aInterceptor, const char* aName, FuncPtrT aHookDest) {
+    LPVOID addHookOk = nullptr;
+    InitOnceContext ctx(this, &aInterceptor, aName, aHookDest, false);
+
+    return ::InitOnceExecuteOnce(&mInitOnce, &InitOnceCallback, &ctx,
+                                 &addHookOk) &&
+           addHookOk;
+  }
+
+  bool SetDetour(InterceptorT& aInterceptor, const char* aName,
+                 FuncPtrT aHookDest) {
+    LPVOID addHookOk = nullptr;
+    InitOnceContext ctx(this, &aInterceptor, aName, aHookDest, true);
+
+    return ::InitOnceExecuteOnce(&mInitOnce, &InitOnceCallback, &ctx,
+                                 &addHookOk) &&
+           addHookOk;
+  }
+
+  explicit operator bool() const { return !!mOrigFunc; }
+
+  template <typename... ArgsType>
+  INTERCEPTOR_DISABLE_CFGUARD ReturnType operator()(ArgsType&&... aArgs) const {
+    return mOrigFunc(std::forward<ArgsType>(aArgs)...);
+  }
+
+  FuncPtrT GetStub() const { return mOrigFunc; }
+
+  // One-time init stuff cannot be moved or copied
+  FuncHook(const FuncHook&) = delete;
+  FuncHook(FuncHook&&) = delete;
+  FuncHook& operator=(const FuncHook&) = delete;
+  FuncHook& operator=(FuncHook&& aOther) = delete;
+
+ private:
+  struct MOZ_RAII InitOnceContext final {
+    InitOnceContext(ThisType* aHook, InterceptorT* aInterceptor,
+                    const char* aName, FuncPtrT aHookDest, bool aForceDetour)
+        : mHook(aHook),
+          mInterceptor(aInterceptor),
+          mName(aName),
+          mHookDest(reinterpret_cast<void*>(aHookDest)),
+          mForceDetour(aForceDetour) {}
+
+    ThisType* mHook;
+    InterceptorT* mInterceptor;
+    const char* mName;
+    void* mHookDest;
+    bool mForceDetour;
+  };
+
+ private:
+  bool Apply(InterceptorT* aInterceptor, const char* aName, void* aHookDest) {
+    return aInterceptor->AddHook(aName, reinterpret_cast<intptr_t>(aHookDest),
+                                 reinterpret_cast<void**>(&mOrigFunc));
+  }
+
+  bool ApplyDetour(InterceptorT* aInterceptor, const char* aName,
+                   void* aHookDest) {
+    return aInterceptor->AddDetour(aName, reinterpret_cast<intptr_t>(aHookDest),
+                                   reinterpret_cast<void**>(&mOrigFunc));
+  }
+
+  static BOOL CALLBACK InitOnceCallback(PINIT_ONCE aInitOnce, PVOID aParam,
+                                        PVOID* aOutContext) {
+    MOZ_ASSERT(aOutContext);
+
+    bool result;
+    auto ctx = reinterpret_cast<InitOnceContext*>(aParam);
+    if (ctx->mForceDetour) {
+      result = ctx->mHook->ApplyDetour(ctx->mInterceptor, ctx->mName,
+                                       ctx->mHookDest);
+    } else {
+      result = ctx->mHook->Apply(ctx->mInterceptor, ctx->mName, ctx->mHookDest);
+    }
+
+    *aOutContext =
+        result ? reinterpret_cast<PVOID>(1U << INIT_ONCE_CTX_RESERVED_BITS)
+               : nullptr;
+    return TRUE;
+  }
+
+ private:
+  FuncPtrT mOrigFunc;
+  INIT_ONCE mInitOnce;
+};
+
+template <typename InterceptorT, typename FuncPtrT>
+class MOZ_ONLY_USED_TO_AVOID_STATIC_CONSTRUCTORS FuncHookCrossProcess final {
+ public:
+  using ThisType = FuncHookCrossProcess<InterceptorT, FuncPtrT>;
+  using ReturnType = typename OriginalFunctionPtrTraits<FuncPtrT>::ReturnType;
+
+#if defined(DEBUG)
+  FuncHookCrossProcess() {}
+#endif  // defined(DEBUG)
+
+  bool Set(nt::CrossExecTransferManager& aTransferMgr,
+           InterceptorT& aInterceptor, const char* aName, FuncPtrT aHookDest) {
+    FuncPtrT origFunc;
+    if (!aInterceptor.AddHook(aName, reinterpret_cast<intptr_t>(aHookDest),
+                              reinterpret_cast<void**>(&origFunc))) {
+      return false;
+    }
+
+    return CopyStubToChildProcess(aTransferMgr, aInterceptor, origFunc);
+  }
+
+  bool SetDetour(nt::CrossExecTransferManager& aTransferMgr,
+                 InterceptorT& aInterceptor, const char* aName,
+                 FuncPtrT aHookDest) {
+    FuncPtrT origFunc;
+    if (!aInterceptor.AddDetour(aName, reinterpret_cast<intptr_t>(aHookDest),
+                                reinterpret_cast<void**>(&origFunc))) {
+      return false;
+    }
+
+    return CopyStubToChildProcess(aTransferMgr, aInterceptor, origFunc);
+  }
+
+  explicit operator bool() const { return !!mOrigFunc; }
+
+  /**
+   * NB: This operator is only meaningful when invoked in the target process!
+   */
+  template <typename... ArgsType>
+  ReturnType operator()(ArgsType&&... aArgs) const {
+    return mOrigFunc(std::forward<ArgsType>(aArgs)...);
+  }
+
+#if defined(DEBUG)
+  FuncHookCrossProcess(const FuncHookCrossProcess&) = delete;
+  FuncHookCrossProcess(FuncHookCrossProcess&&) = delete;
+  FuncHookCrossProcess& operator=(const FuncHookCrossProcess&) = delete;
+  FuncHookCrossProcess& operator=(FuncHookCrossProcess&& aOther) = delete;
+#endif  // defined(DEBUG)
+
+ private:
+  bool CopyStubToChildProcess(nt::CrossExecTransferManager& aTransferMgr,
+                              InterceptorT& aInterceptor, FuncPtrT aStub) {
+    LauncherVoidResult writeResult =
+        aTransferMgr.Transfer(&mOrigFunc, &aStub, sizeof(FuncPtrT));
+    if (writeResult.isErr()) {
+#ifdef MOZ_USE_LAUNCHER_ERROR
+      const mozilla::WindowsError& err = writeResult.inspectErr().mError;
+#else
+      const mozilla::WindowsError& err = writeResult.inspectErr();
+#endif
+      aInterceptor.SetLastDetourError(FUNCHOOKCROSSPROCESS_COPYSTUB_ERROR,
+                                      err.AsHResult());
+      return false;
+    }
+    return true;
+  }
+
+ private:
+  FuncPtrT mOrigFunc;
+};
+
+template <typename MMPolicyT, typename InterceptorT>
+struct TypeResolver;
+
+template <typename InterceptorT>
+struct TypeResolver<mozilla::interceptor::MMPolicyInProcess, InterceptorT> {
+  template <typename FuncPtrT>
+  using FuncHookType = FuncHook<InterceptorT, FuncPtrT>;
+};
+
+template <typename InterceptorT>
+struct TypeResolver<mozilla::interceptor::MMPolicyOutOfProcess, InterceptorT> {
+  template <typename FuncPtrT>
+  using FuncHookType = FuncHookCrossProcess<InterceptorT, FuncPtrT>;
+};
+
+template <typename VMPolicy = mozilla::interceptor::VMSharingPolicyShared>
+class WindowsDllInterceptor final
+    : public TypeResolver<typename VMPolicy::MMPolicyT,
+                          WindowsDllInterceptor<VMPolicy>> {
+  typedef WindowsDllInterceptor<VMPolicy> ThisType;
+
+  interceptor::WindowsDllDetourPatcher<VMPolicy> mDetourPatcher;
+#if defined(_M_IX86)
+  interceptor::WindowsDllNopSpacePatcher<typename VMPolicy::MMPolicyT>
+      mNopSpacePatcher;
+#endif  // defined(_M_IX86)
+
+  HMODULE mModule;
+
+ public:
+  template <typename... Args>
+  explicit WindowsDllInterceptor(Args&&... aArgs)
+      : mDetourPatcher(std::forward<Args>(aArgs)...)
+#if defined(_M_IX86)
+        ,
+        mNopSpacePatcher(std::forward<Args>(aArgs)...)
+#endif  // defined(_M_IX86)
+        ,
+        mModule(nullptr) {
+  }
+
+  WindowsDllInterceptor(const WindowsDllInterceptor&) = delete;
+  WindowsDllInterceptor(WindowsDllInterceptor&&) = delete;
+  WindowsDllInterceptor& operator=(const WindowsDllInterceptor&) = delete;
+  WindowsDllInterceptor& operator=(WindowsDllInterceptor&&) = delete;
+
+  ~WindowsDllInterceptor() { Clear(); }
+
+  template <size_t N>
+  void Init(const char (&aModuleName)[N]) {
+    wchar_t moduleName[N];
+
+    for (size_t i = 0; i < N; ++i) {
+      MOZ_ASSERT(!(aModuleName[i] & 0x80),
+                 "Use wide-character overload for non-ASCII module names");
+      moduleName[i] = aModuleName[i];
+    }
+
+    Init(moduleName);
+  }
+
+  void Init(const wchar_t* aModuleName) {
+    if (mModule) {
+      return;
+    }
+
+    mModule = ::LoadLibraryW(aModuleName);
+  }
+
+  /** Force a specific configuration for testing purposes. NOT to be used in
+      production code! **/
+  void TestOnlyDetourInit(const wchar_t* aModuleName, DetourFlags aFlags) {
+    Init(aModuleName);
+    mDetourPatcher.Init(aFlags);
+  }
+
+  void Clear() {
+    if (!mModule) {
+      return;
+    }
+
+#if defined(_M_IX86)
+    mNopSpacePatcher.Clear();
+#endif  // defined(_M_IX86)
+    mDetourPatcher.Clear();
+
+    // NB: We intentionally leak mModule
+  }
+
+#if defined(NIGHTLY_BUILD)
+  const Maybe<DetourError>& GetLastDetourError() const {
+    return mDetourPatcher.GetLastDetourError();
+  }
+#endif  // defined(NIGHTLY_BUILD)
+  template <typename... Args>
+  void SetLastDetourError(Args&&... aArgs) {
+    return mDetourPatcher.SetLastDetourError(std::forward<Args>(aArgs)...);
+  }
+
+  constexpr static uint32_t GetWorstCaseRequiredBytesToPatch() {
+    return WindowsDllDetourPatcherPrimitive<
+        typename VMPolicy::MMPolicyT>::GetWorstCaseRequiredBytesToPatch();
+  }
+
+ private:
+  /**
+   * Hook/detour the method aName from the DLL we set in Init so that it calls
+   * aHookDest instead.  Returns the original method pointer in aOrigFunc
+   * and returns true if successful.
+   *
+   * IMPORTANT: If you use this method, please add your case to the
+   * TestDllInterceptor in order to detect future failures.  Even if this
+   * succeeds now, updates to the hooked DLL could cause it to fail in
+   * the future.
+   */
+  bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc) {
+    // Use a nop space patch if possible, otherwise fall back to a detour.
+    // This should be the preferred method for adding hooks.
+    if (!mModule) {
+      mDetourPatcher.SetLastDetourError(DetourResultCode::INTERCEPTOR_MOD_NULL);
+      return false;
+    }
+
+    if (!mDetourPatcher.IsPageAccessible(
+            nt::PEHeaders::HModuleToBaseAddr<uintptr_t>(mModule))) {
+      mDetourPatcher.SetLastDetourError(
+          DetourResultCode::INTERCEPTOR_MOD_INACCESSIBLE);
+      return false;
+    }
+
+    FARPROC proc = mDetourPatcher.GetProcAddress(mModule, aName);
+    if (!proc) {
+      mDetourPatcher.SetLastDetourError(
+          DetourResultCode::INTERCEPTOR_PROC_NULL);
+      return false;
+    }
+
+    if (!mDetourPatcher.IsPageAccessible(reinterpret_cast<uintptr_t>(proc))) {
+      mDetourPatcher.SetLastDetourError(
+          DetourResultCode::INTERCEPTOR_PROC_INACCESSIBLE);
+      return false;
+    }
+
+#if defined(_M_IX86)
+    if (mNopSpacePatcher.AddHook(proc, aHookDest, aOrigFunc)) {
+      return true;
+    }
+#endif  // defined(_M_IX86)
+
+    return AddDetour(proc, aHookDest, aOrigFunc);
+  }
+
+  /**
+   * Detour the method aName from the DLL we set in Init so that it calls
+   * aHookDest instead.  Returns the original method pointer in aOrigFunc
+   * and returns true if successful.
+   *
+   * IMPORTANT: If you use this method, please add your case to the
+   * TestDllInterceptor in order to detect future failures.  Even if this
+   * succeeds now, updates to the detoured DLL could cause it to fail in
+   * the future.
+   */
+  bool AddDetour(const char* aName, intptr_t aHookDest, void** aOrigFunc) {
+    // Generally, code should not call this method directly. Use AddHook unless
+    // there is a specific need to avoid nop space patches.
+    if (!mModule) {
+      mDetourPatcher.SetLastDetourError(DetourResultCode::INTERCEPTOR_MOD_NULL);
+      return false;
+    }
+
+    if (!mDetourPatcher.IsPageAccessible(
+            nt::PEHeaders::HModuleToBaseAddr<uintptr_t>(mModule))) {
+      mDetourPatcher.SetLastDetourError(
+          DetourResultCode::INTERCEPTOR_MOD_INACCESSIBLE);
+      return false;
+    }
+
+    FARPROC proc = mDetourPatcher.GetProcAddress(mModule, aName);
+    if (!proc) {
+      mDetourPatcher.SetLastDetourError(
+          DetourResultCode::INTERCEPTOR_PROC_NULL);
+      return false;
+    }
+
+    if (!mDetourPatcher.IsPageAccessible(reinterpret_cast<uintptr_t>(proc))) {
+      mDetourPatcher.SetLastDetourError(
+          DetourResultCode::INTERCEPTOR_PROC_INACCESSIBLE);
+      return false;
+    }
+
+    return AddDetour(proc, aHookDest, aOrigFunc);
+  }
+
+  bool AddDetour(FARPROC aProc, intptr_t aHookDest, void** aOrigFunc) {
+    MOZ_ASSERT(mModule && aProc);
+
+    if (!mDetourPatcher.Initialized()) {
+      DetourFlags flags = DetourFlags::eDefault;
+#if defined(_M_X64)
+      // NTDLL hooks should attempt to use a 10-byte patch because some
+      // injected DLLs do the same and interfere with our stuff.
+      bool needs10BytePatch = (mModule == ::GetModuleHandleW(L"ntdll.dll"));
+
+      bool isWin8Or81 = IsWin8OrLater() && (!IsWin10OrLater());
+      bool isWin8 = IsWin8OrLater() && (!IsWin8Point1OrLater());
+
+      bool isKernel32Dll = (mModule == ::GetModuleHandleW(L"kernel32.dll"));
+
+      bool isDuplicateHandle = (reinterpret_cast<void*>(aProc) ==
+                                reinterpret_cast<void*>(&::DuplicateHandle));
+
+      // CloseHandle on Windows 8/8.1 only accomodates 10-byte patches.
+      needs10BytePatch |= isWin8Or81 && isKernel32Dll &&
+                          (reinterpret_cast<void*>(aProc) ==
+                           reinterpret_cast<void*>(&CloseHandle));
+
+      // CreateFileA and DuplicateHandle on Windows 8 require 10-byte patches.
+      needs10BytePatch |= isWin8 && isKernel32Dll &&
+                          ((reinterpret_cast<void*>(aProc) ==
+                            reinterpret_cast<void*>(&::CreateFileA)) ||
+                           isDuplicateHandle);
+
+      if (needs10BytePatch) {
+        flags |= DetourFlags::eEnable10BytePatch;
+      }
+
+      if (isWin8 && isDuplicateHandle) {
+        // Because we can't detour Win8's KERNELBASE!DuplicateHandle,
+        // we detour kernel32!DuplicateHandle (See bug 1659398).
+        flags |= DetourFlags::eDontResolveRedirection;
+      }
+#endif  // defined(_M_X64)
+
+      mDetourPatcher.Init(flags);
+    }
+
+    return mDetourPatcher.AddHook(aProc, aHookDest, aOrigFunc);
+  }
+
+ private:
+  template <typename InterceptorT, typename FuncPtrT>
+  friend class FuncHook;
+
+  template <typename InterceptorT, typename FuncPtrT>
+  friend class FuncHookCrossProcess;
+};
+
+/**
+ * IAT patching is intended for use when we only want to intercept a function
+ * call originating from a specific module.
+ */
+class WindowsIATPatcher final {
+ public:
+  template <typename FuncPtrT>
+  using FuncHookType = FuncHook<WindowsIATPatcher, FuncPtrT>;
+
+ private:
+  static bool CheckASCII(const char* aInStr) {
+    while (*aInStr) {
+      if (*aInStr & 0x80) {
+        return false;
+      }
+      ++aInStr;
+    }
+    return true;
+  }
+
+  static bool AddHook(HMODULE aFromModule, const char* aToModuleName,
+                      const char* aTargetFnName, void* aHookDest,
+                      Atomic<void*>* aOutOrigFunc) {
+    if (!aFromModule || !aToModuleName || !aTargetFnName || !aOutOrigFunc) {
+      return false;
+    }
+
+    // PE Spec requires ASCII names for imported module names
+    const bool isModuleNameAscii = CheckASCII(aToModuleName);
+    MOZ_ASSERT(isModuleNameAscii);
+    if (!isModuleNameAscii) {
+      return false;
+    }
+
+    // PE Spec requires ASCII names for imported function names
+    const bool isTargetFnNameAscii = CheckASCII(aTargetFnName);
+    MOZ_ASSERT(isTargetFnNameAscii);
+    if (!isTargetFnNameAscii) {
+      return false;
+    }
+
+    nt::PEHeaders headers(aFromModule);
+    if (!headers) {
+      return false;
+    }
+
+    PIMAGE_IMPORT_DESCRIPTOR impDesc =
+        headers.GetImportDescriptor(aToModuleName);
+    if (!nt::PEHeaders::IsValid(impDesc)) {
+      // Either aFromModule does not import aToModuleName at load-time, or
+      // aToModuleName is a (currently unsupported) delay-load import.
+      return false;
+    }
+
+    // Resolve the import name table (INT).
+    auto firstINTThunk = headers.template RVAToPtr<PIMAGE_THUNK_DATA>(
+        impDesc->OriginalFirstThunk);
+    if (!nt::PEHeaders::IsValid(firstINTThunk)) {
+      return false;
+    }
+
+    Maybe<ptrdiff_t> thunkIndex;
+
+    // Scan the INT for the location of the thunk for the function named
+    // 'aTargetFnName'.
+    for (PIMAGE_THUNK_DATA curINTThunk = firstINTThunk;
+         nt::PEHeaders::IsValid(curINTThunk); ++curINTThunk) {
+      if (IMAGE_SNAP_BY_ORDINAL(curINTThunk->u1.Ordinal)) {
+        // Currently not supporting import by ordinal; this isn't hard to add,
+        // but we won't bother unless necessary.
+        continue;
+      }
+
+      PIMAGE_IMPORT_BY_NAME curThunkFnName =
+          headers.template RVAToPtr<PIMAGE_IMPORT_BY_NAME>(
+              curINTThunk->u1.AddressOfData);
+      MOZ_ASSERT(curThunkFnName);
+      if (!curThunkFnName) {
+        // Looks like we have a bad name descriptor. Try to continue.
+        continue;
+      }
+
+      // Function name checks MUST be case-sensitive!
+      if (!strcmp(aTargetFnName, curThunkFnName->Name)) {
+        // We found the thunk. Save the index of this thunk, as the IAT thunk
+        // is located at the same index in that table as in the INT.
+        thunkIndex = Some(curINTThunk - firstINTThunk);
+        break;
+      }
+    }
+
+    if (thunkIndex.isNothing()) {
+      // We never found a thunk for that function. Perhaps it's not imported?
+      return false;
+    }
+
+    if (thunkIndex.value() < 0) {
+      // That's just wrong.
+      return false;
+    }
+
+    auto firstIATThunk =
+        headers.template RVAToPtr<PIMAGE_THUNK_DATA>(impDesc->FirstThunk);
+    if (!nt::PEHeaders::IsValid(firstIATThunk)) {
+      return false;
+    }
+
+    // Resolve the IAT thunk for the function we want
+    PIMAGE_THUNK_DATA targetThunk = &firstIATThunk[thunkIndex.value()];
+    if (!nt::PEHeaders::IsValid(targetThunk)) {
+      return false;
+    }
+
+    auto fnPtr = reinterpret_cast<Atomic<void*>*>(&targetThunk->u1.Function);
+
+    // Now we can just change out its pointer with our hook function.
+    AutoVirtualProtect prot(fnPtr, sizeof(void*), PAGE_EXECUTE_READWRITE);
+    if (!prot) {
+      return false;
+    }
+
+    // We do the exchange this way to ensure that *aOutOrigFunc is always valid
+    // once the atomic exchange has taken place.
+    void* tmp;
+
+    do {
+      tmp = *fnPtr;
+      *aOutOrigFunc = tmp;
+    } while (!fnPtr->compareExchange(tmp, aHookDest));
+
+    return true;
+  }
+
+  template <typename InterceptorT, typename FuncPtrT>
+  friend class FuncHook;
+};
+
+template <typename FuncPtrT>
+class MOZ_ONLY_USED_TO_AVOID_STATIC_CONSTRUCTORS
+    FuncHook<WindowsIATPatcher, FuncPtrT>
+        final {
+ public:
+  using ThisType = FuncHook<WindowsIATPatcher, FuncPtrT>;
+  using ReturnType = typename OriginalFunctionPtrTraits<FuncPtrT>::ReturnType;
+
+  constexpr FuncHook()
+      : mInitOnce(INIT_ONCE_STATIC_INIT),
+        mFromModule(nullptr),
+        mOrigFunc(nullptr) {}
+
+#if defined(DEBUG)
+  ~FuncHook() = default;
+#endif  // defined(DEBUG)
+
+  bool Set(const wchar_t* aFromModuleName, const char* aToModuleName,
+           const char* aFnName, FuncPtrT aHookDest) {
+    nsModuleHandle fromModule(::LoadLibraryW(aFromModuleName));
+    if (!fromModule) {
+      return false;
+    }
+
+    return Set(fromModule, aToModuleName, aFnName, aHookDest);
+  }
+
+  // We offer this overload in case the client wants finer-grained control over
+  // loading aFromModule.
+  bool Set(nsModuleHandle& aFromModule, const char* aToModuleName,
+           const char* aFnName, FuncPtrT aHookDest) {
+    LPVOID addHookOk = nullptr;
+    InitOnceContext ctx(this, aFromModule, aToModuleName, aFnName, aHookDest);
+
+    bool result = ::InitOnceExecuteOnce(&mInitOnce, &InitOnceCallback, &ctx,
+                                        &addHookOk) &&
+                  addHookOk;
+    if (!result) {
+      return result;
+    }
+
+    // If we successfully set the hook then we must retain a strong reference
+    // to the module that we modified.
+    mFromModule = aFromModule.disown();
+    return result;
+  }
+
+  explicit operator bool() const { return !!mOrigFunc; }
+
+  template <typename... ArgsType>
+  ReturnType operator()(ArgsType&&... aArgs) const {
+    return mOrigFunc(std::forward<ArgsType>(aArgs)...);
+  }
+
+  FuncPtrT GetStub() const { return mOrigFunc; }
+
+#if defined(DEBUG)
+  // One-time init stuff cannot be moved or copied
+  FuncHook(const FuncHook&) = delete;
+  FuncHook(FuncHook&&) = delete;
+  FuncHook& operator=(const FuncHook&) = delete;
+  FuncHook& operator=(FuncHook&& aOther) = delete;
+#endif  // defined(DEBUG)
+
+ private:
+  struct MOZ_RAII InitOnceContext final {
+    InitOnceContext(ThisType* aHook, const nsModuleHandle& aFromModule,
+                    const char* aToModuleName, const char* aFnName,
+                    FuncPtrT aHookDest)
+        : mHook(aHook),
+          mFromModule(aFromModule),
+          mToModuleName(aToModuleName),
+          mFnName(aFnName),
+          mHookDest(reinterpret_cast<void*>(aHookDest)) {}
+
+    ThisType* mHook;
+    const nsModuleHandle& mFromModule;
+    const char* mToModuleName;
+    const char* mFnName;
+    void* mHookDest;
+  };
+
+ private:
+  bool Apply(const nsModuleHandle& aFromModule, const char* aToModuleName,
+             const char* aFnName, void* aHookDest) {
+    return WindowsIATPatcher::AddHook(
+        aFromModule, aToModuleName, aFnName, aHookDest,
+        reinterpret_cast<Atomic<void*>*>(&mOrigFunc));
+  }
+
+  static BOOL CALLBACK InitOnceCallback(PINIT_ONCE aInitOnce, PVOID aParam,
+                                        PVOID* aOutContext) {
+    MOZ_ASSERT(aOutContext);
+
+    auto ctx = reinterpret_cast<InitOnceContext*>(aParam);
+    bool result = ctx->mHook->Apply(ctx->mFromModule, ctx->mToModuleName,
+                                    ctx->mFnName, ctx->mHookDest);
+
+    *aOutContext =
+        result ? reinterpret_cast<PVOID>(1U << INIT_ONCE_CTX_RESERVED_BITS)
+               : nullptr;
+    return TRUE;
+  }
+
+ private:
+  INIT_ONCE mInitOnce;
+  HMODULE mFromModule;  // never freed
+  FuncPtrT mOrigFunc;
+};
+
+/**
+ * This class applies an irreversible patch to jump to a target function
+ * without backing up the original function.
+ */
+class WindowsDllEntryPointInterceptor final {
+  using DllMainFn = BOOL(WINAPI*)(HINSTANCE, DWORD, LPVOID);
+  using MMPolicyT = MMPolicyInProcessEarlyStage;
+
+  MMPolicyT mMMPolicy;
+
+ public:
+  explicit WindowsDllEntryPointInterceptor(
+      const MMPolicyT::Kernel32Exports& aK32Exports)
+      : mMMPolicy(aK32Exports) {}
+
+  bool Set(const nt::PEHeaders& aHeaders, DllMainFn aDestination) {
+    if (!aHeaders) {
+      return false;
+    }
+
+    WindowsDllDetourPatcherPrimitive<MMPolicyT> patcher;
+    return patcher.AddIrreversibleHook(
+        mMMPolicy, aHeaders.GetEntryPoint(),
+        reinterpret_cast<uintptr_t>(aDestination));
+  }
+};
+
+}  // namespace interceptor
+
+using WindowsDllInterceptor = interceptor::WindowsDllInterceptor<>;
+
+using CrossProcessDllInterceptor = interceptor::WindowsDllInterceptor<
+    mozilla::interceptor::VMSharingPolicyUnique<
+        mozilla::interceptor::MMPolicyOutOfProcess>>;
+
+using WindowsIATPatcher = interceptor::WindowsIATPatcher;
+
+}  // namespace mozilla
+
+#endif /* NS_WINDOWS_DLL_INTERCEPTOR_H_ */
diff --git a/mozglue/moz.build b/mozglue/moz.build
new file mode 100644
index 0000000000..d3813122c9
--- /dev/null
+++ b/mozglue/moz.build
@@ -0,0 +1,26 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files("**"):
+    BUG_COMPONENT = ("Core", "mozglue")
+
+if CONFIG["MOZ_LINKER"] or CONFIG["MOZ_WIDGET_TOOLKIT"] == "android":
+    DIRS += ["linker"]
+
+if CONFIG["MOZ_WIDGET_TOOLKIT"] == "android":
+    DIRS += ["android"]
+
+if CONFIG["OS_TARGET"] == "WINNT":
+    DIRS += ["dllservices"]
+
+DIRS += [
+    "baseprofiler",
+    "build",
+    "misc",
+]
+
+if CONFIG["MOZ_WIDGET_TOOLKIT"]:
+    TEST_DIRS += ["tests"]
diff --git a/mozglue/static/README b/mozglue/static/README
new file mode 100644
index 0000000000..e52c716166
--- /dev/null
+++ b/mozglue/static/README
@@ -0,0 +1,2 @@
+mozglue/static contains parts of the mozglue library that can/should be
+statically linked to e.g. js/Gecko.
diff --git a/mozglue/static/rust/Cargo.toml b/mozglue/static/rust/Cargo.toml
new file mode 100644
index 0000000000..e78b396cde
--- /dev/null
+++ b/mozglue/static/rust/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "mozglue-static"
+version = "0.1.0"
+edition = "2018"
+license = "MPL"
+
+[lib]
+path = "lib.rs"
+
+[dependencies]
+arrayvec = "0.5"
+
+[build-dependencies]
+cc = "1"
diff --git a/mozglue/static/rust/build.rs b/mozglue/static/rust/build.rs
new file mode 100644
index 0000000000..5bc6bae40c
--- /dev/null
+++ b/mozglue/static/rust/build.rs
@@ -0,0 +1,38 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::env;
+use std::path::PathBuf;
+
+fn main() {
+    let dist_path = {
+        let path = PathBuf::from(env::var_os("MOZ_DIST").unwrap());
+        if !path.is_absolute() || !path.is_dir() {
+            panic!(
+                "MOZ_DIST must be an absolute directory, was: {}",
+                path.display()
+            );
+        }
+        path
+    };
+    let topobjdir = {
+        let path = PathBuf::from(env::var_os("MOZ_TOPOBJDIR").unwrap());
+        if !path.is_absolute() || !path.is_dir() {
+            panic!(
+                "MOZ_TOPOBJDIR must be an absolute directory, was: {}",
+                path.display()
+            );
+        }
+        path
+    };
+    let mut build = cc::Build::new();
+    build.cpp(true);
+    // For js-confdefs.h, see wrappers.cpp.
+    build.include(topobjdir.join("js").join("src"));
+    build.include(dist_path.join("include"));
+    build.define("MOZ_HAS_MOZGLUE", None);
+    build.file("wrappers.cpp");
+    build.compile("wrappers");
+    println!("cargo:rerun-if-changed=wrappers.cpp");
+}
diff --git a/mozglue/static/rust/lib.rs b/mozglue/static/rust/lib.rs
new file mode 100644
index 0000000000..7faf9d3873
--- /dev/null
+++ b/mozglue/static/rust/lib.rs
@@ -0,0 +1,101 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use arrayvec::{Array, ArrayString};
+use std::cmp;
+use std::ops::Deref;
+use std::os::raw::c_char;
+use std::os::raw::c_int;
+use std::panic;
+
+#[link(name = "wrappers")]
+extern "C" {
+    // We can't use MOZ_Crash directly because it may be weakly linked
+    // and rust can't handle that.
+    fn RustMozCrash(filename: *const c_char, line: c_int, reason: *const c_char) -> !;
+}
+
+/// Truncate a string at the closest unicode character boundary
+/// ```
+/// assert_eq!(str_truncate_valid("éà", 3), "é");
+/// assert_eq!(str_truncate_valid("éà", 4), "éè");
+/// ```
+fn str_truncate_valid(s: &str, mut mid: usize) -> &str {
+    loop {
+        if let Some(res) = s.get(..mid) {
+            return res;
+        }
+        mid -= 1;
+    }
+}
+
+/// Similar to ArrayString, but with terminating nul character.
+#[derive(Debug, PartialEq)]
+struct ArrayCString<A: Array<Item = u8> + Copy> {
+    inner: ArrayString<A>,
+}
+
+impl<S: AsRef<str>, A: Array<Item = u8> + Copy> From<S> for ArrayCString<A> {
+    /// Contrary to ArrayString::from, truncates at the closest unicode
+    /// character boundary.
+    /// ```
+    /// assert_eq!(ArrayCString::<[_; 4]>::from("éà"),
+    ///            ArrayCString::<[_; 4]>::from("é"));
+    /// assert_eq!(&*ArrayCString::<[_; 4]>::from("éà"), "é\0");
+    /// ```
+    fn from(s: S) -> Self {
+        let s = s.as_ref();
+        let len = cmp::min(s.len(), A::CAPACITY - 1);
+        let mut result = Self {
+            inner: ArrayString::from(str_truncate_valid(s, len)).unwrap(),
+        };
+        result.inner.push('\0');
+        result
+    }
+}
+
+impl<A: Array<Item = u8> + Copy> Deref for ArrayCString<A> {
+    type Target = str;
+
+    fn deref(&self) -> &str {
+        self.inner.as_str()
+    }
+}
+
+fn panic_hook(info: &panic::PanicInfo) {
+    // Try to handle &str/String payloads, which should handle 99% of cases.
+    let payload = info.payload();
+    let message = if let Some(s) = payload.downcast_ref::<&str>() {
+        s
+    } else if let Some(s) = payload.downcast_ref::<String>() {
+        s.as_str()
+    } else {
+        // Not the most helpful thing, but seems unlikely to happen
+        // in practice.
+        "Unhandled rust panic payload!"
+    };
+    let (filename, line) = if let Some(loc) = info.location() {
+        (loc.file(), loc.line())
+    } else {
+        ("unknown.rs", 0)
+    };
+    // Copy the message and filename to the stack in order to safely add
+    // a terminating nul character (since rust strings don't come with one
+    // and RustMozCrash wants one).
+    let message = ArrayCString::<[_; 512]>::from(message);
+    let filename = ArrayCString::<[_; 512]>::from(filename);
+    unsafe {
+        RustMozCrash(
+            filename.as_ptr() as *const c_char,
+            line as c_int,
+            message.as_ptr() as *const c_char,
+        );
+    }
+}
+
+/// Configure a panic hook to redirect rust panics to MFBT's MOZ_Crash.
+#[no_mangle]
+pub extern "C" fn install_rust_panic_hook() {
+    panic::set_hook(Box::new(panic_hook));
+}
diff --git a/mozglue/static/rust/wrappers.cpp b/mozglue/static/rust/wrappers.cpp
new file mode 100644
index 0000000000..ac86bbdd5b
--- /dev/null
+++ b/mozglue/static/rust/wrappers.cpp
@@ -0,0 +1,18 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This ensures the right configuration for e.g. MOZ_GLUE_IN_PROGRAM,
+// used in the MFBT headers included further below. We use js-confdefs.h
+// instead of mozilla-config.h because the latter is not present in
+// spidermonkey standalone builds while the former is always present.
+#include "js-confdefs.h"
+#include "mozilla/Assertions.h"
+#include "mozilla/Types.h"
+
+// MOZ_Crash wrapper for use by rust, since MOZ_Crash is an inline function.
+extern "C" void RustMozCrash(const char* aFilename, int aLine,
+                             const char* aReason) {
+  MOZ_Crash(aFilename, aLine, aReason);
+}
diff --git a/mozglue/tests/ShowSSEConfig.cpp b/mozglue/tests/ShowSSEConfig.cpp
new file mode 100644
index 0000000000..a19b30198c
--- /dev/null
+++ b/mozglue/tests/ShowSSEConfig.cpp
@@ -0,0 +1,125 @@
+/* vim: set shiftwidth=2 tabstop=8 autoindent cindent expandtab: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/SSE.h"
+#include <stdio.h>
+
+#if defined(XP_WIN)
+int wmain()
+#else
+int main()
+#endif  // defined(XP_WIN)
+{
+  printf("CPUID detection present: %s\n",
+#ifdef MOZILLA_SSE_HAVE_CPUID_DETECTION
+         "yes"
+#else
+         "no"
+#endif
+  );
+
+#ifdef MOZILLA_COMPILE_WITH_MMX
+#  define COMPILE_MMX_STRING "Y"
+#else
+#  define COMPILE_MMX_STRING "-"
+#endif
+#ifdef MOZILLA_PRESUME_MMX
+#  define PRESUME_MMX_STRING "Y"
+#else
+#  define PRESUME_MMX_STRING "-"
+#endif
+
+#ifdef MOZILLA_COMPILE_WITH_SSE
+#  define COMPILE_SSE_STRING "Y"
+#else
+#  define COMPILE_SSE_STRING "-"
+#endif
+#ifdef MOZILLA_PRESUME_SSE
+#  define PRESUME_SSE_STRING "Y"
+#else
+#  define PRESUME_SSE_STRING "-"
+#endif
+
+#ifdef MOZILLA_COMPILE_WITH_SSE2
+#  define COMPILE_SSE2_STRING "Y"
+#else
+#  define COMPILE_SSE2_STRING "-"
+#endif
+#ifdef MOZILLA_PRESUME_SSE2
+#  define PRESUME_SSE2_STRING "Y"
+#else
+#  define PRESUME_SSE2_STRING "-"
+#endif
+
+#ifdef MOZILLA_COMPILE_WITH_SSE3
+#  define COMPILE_SSE3_STRING "Y"
+#else
+#  define COMPILE_SSE3_STRING "-"
+#endif
+#ifdef MOZILLA_PRESUME_SSE3
+#  define PRESUME_SSE3_STRING "Y"
+#else
+#  define PRESUME_SSE3_STRING "-"
+#endif
+
+#ifdef MOZILLA_COMPILE_WITH_SSSE3
+#  define COMPILE_SSSE3_STRING "Y"
+#else
+#  define COMPILE_SSSE3_STRING "-"
+#endif
+#ifdef MOZILLA_PRESUME_SSSE3
+#  define PRESUME_SSSE3_STRING "Y"
+#else
+#  define PRESUME_SSSE3_STRING "-"
+#endif
+
+#ifdef MOZILLA_COMPILE_WITH_SSE4A
+#  define COMPILE_SSE4A_STRING "Y"
+#else
+#  define COMPILE_SSE4A_STRING "-"
+#endif
+#ifdef MOZILLA_PRESUME_SSE4A
+#  define PRESUME_SSE4A_STRING "Y"
+#else
+#  define PRESUME_SSE4A_STRING "-"
+#endif
+
+#ifdef MOZILLA_COMPILE_WITH_SSE4_1
+#  define COMPILE_SSE4_1_STRING "Y"
+#else
+#  define COMPILE_SSE4_1_STRING "-"
+#endif
+#ifdef MOZILLA_PRESUME_SSE4_1
+#  define PRESUME_SSE4_1_STRING "Y"
+#else
+#  define PRESUME_SSE4_1_STRING "-"
+#endif
+
+#ifdef MOZILLA_COMPILE_WITH_SSE4_2
+#  define COMPILE_SSE4_2_STRING "Y"
+#else
+#  define COMPILE_SSE4_2_STRING "-"
+#endif
+#ifdef MOZILLA_PRESUME_SSE4_2
+#  define PRESUME_SSE4_2_STRING "Y"
+#else
+#  define PRESUME_SSE4_2_STRING "-"
+#endif
+
+  printf("Feature Presume Compile Support  Use\n");
+#define SHOW_INFO(featurelc_, featureuc_)                              \
+  printf("%7s    %1s       %1s       %1s\n", #featurelc_,              \
+         PRESUME_##featureuc_##_STRING, COMPILE_##featureuc_##_STRING, \
+         (mozilla::supports_##featurelc_() ? "Y" : "-"));
+  SHOW_INFO(mmx, MMX)
+  SHOW_INFO(sse, SSE)
+  SHOW_INFO(sse2, SSE2)
+  SHOW_INFO(sse3, SSE3)
+  SHOW_INFO(ssse3, SSSE3)
+  SHOW_INFO(sse4a, SSE4A)
+  SHOW_INFO(sse4_1, SSE4_1)
+  SHOW_INFO(sse4_2, SSE4_2)
+  return 0;
+}
diff --git a/mozglue/tests/TestBaseProfiler.cpp b/mozglue/tests/TestBaseProfiler.cpp
new file mode 100644
index 0000000000..a3b0cde22e
--- /dev/null
+++ b/mozglue/tests/TestBaseProfiler.cpp
@@ -0,0 +1,4452 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BaseProfiler.h"
+
+#include "mozilla/Attributes.h"
+#include "mozilla/BaseProfileJSONWriter.h"
+
+#ifdef MOZ_GECKO_PROFILER
+#  include "mozilla/BaseProfilerMarkerTypes.h"
+#  include "mozilla/BlocksRingBuffer.h"
+#  include "mozilla/leb128iterator.h"
+#  include "mozilla/ModuloBuffer.h"
+#  include "mozilla/PowerOfTwo.h"
+#  include "mozilla/ProfileBufferChunk.h"
+#  include "mozilla/ProfileBufferChunkManagerSingle.h"
+#  include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
+#  include "mozilla/ProfileBufferControlledChunkManager.h"
+#  include "mozilla/ProfileChunkedBuffer.h"
+#  include "mozilla/Vector.h"
+#endif  // MOZ_GECKO_PROFILER
+
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#  include <windows.h>
+#  include <mmsystem.h>
+#  include <process.h>
+#else
+#  include <errno.h>
+#  include <string.h>
+#  include <time.h>
+#  include <unistd.h>
+#endif
+
+#include <algorithm>
+#include <atomic>
+#include <iostream>
+#include <random>
+#include <thread>
+#include <type_traits>
+#include <utility>
+
+#ifdef MOZ_GECKO_PROFILER
+
+MOZ_MAYBE_UNUSED static void SleepMilli(unsigned aMilliseconds) {
+#  if defined(_MSC_VER) || defined(__MINGW32__)
+  Sleep(aMilliseconds);
+#  else
+  struct timespec ts = {/* .tv_sec */ static_cast<time_t>(aMilliseconds / 1000),
+                        /* ts.tv_nsec */ long(aMilliseconds % 1000) * 1000000};
+  struct timespec tr = {0, 0};
+  while (nanosleep(&ts, &tr)) {
+    if (errno == EINTR) {
+      ts = tr;
+    } else {
+      printf("nanosleep() -> %s\n", strerror(errno));
+      exit(1);
+    }
+  }
+#  endif
+}
+
+MOZ_MAYBE_UNUSED static void WaitUntilTimeStampChanges(
+    const mozilla::TimeStamp& aTimeStampToCompare =
+        mozilla::TimeStamp::NowUnfuzzed()) {
+  while (aTimeStampToCompare == mozilla::TimeStamp::NowUnfuzzed()) {
+    SleepMilli(1);
+  }
+}
+
+using namespace mozilla;
+
+void TestPowerOfTwoMask() {
+  printf("TestPowerOfTwoMask...\n");
+
+  static_assert(MakePowerOfTwoMask<uint32_t, 0>().MaskValue() == 0);
+  constexpr PowerOfTwoMask<uint32_t> c0 = MakePowerOfTwoMask<uint32_t, 0>();
+  MOZ_RELEASE_ASSERT(c0.MaskValue() == 0);
+
+  static_assert(MakePowerOfTwoMask<uint32_t, 0xFFu>().MaskValue() == 0xFFu);
+  constexpr PowerOfTwoMask<uint32_t> cFF =
+      MakePowerOfTwoMask<uint32_t, 0xFFu>();
+  MOZ_RELEASE_ASSERT(cFF.MaskValue() == 0xFFu);
+
+  static_assert(MakePowerOfTwoMask<uint32_t, 0xFFFFFFFFu>().MaskValue() ==
+                0xFFFFFFFFu);
+  constexpr PowerOfTwoMask<uint32_t> cFFFFFFFF =
+      MakePowerOfTwoMask<uint32_t, 0xFFFFFFFFu>();
+  MOZ_RELEASE_ASSERT(cFFFFFFFF.MaskValue() == 0xFFFFFFFFu);
+
+  struct TestDataU32 {
+    uint32_t mInput;
+    uint32_t mMask;
+  };
+  // clang-format off
+  TestDataU32 tests[] = {
+    { 0, 0 },
+    { 1, 1 },
+    { 2, 3 },
+    { 3, 3 },
+    { 4, 7 },
+    { 5, 7 },
+    { (1u << 31) - 1, (1u << 31) - 1 },
+    { (1u << 31), uint32_t(-1) },
+    { (1u << 31) + 1, uint32_t(-1) },
+    { uint32_t(-1), uint32_t(-1) }
+  };
+  // clang-format on
+  for (const TestDataU32& test : tests) {
+    PowerOfTwoMask<uint32_t> p2m(test.mInput);
+    MOZ_RELEASE_ASSERT(p2m.MaskValue() == test.mMask);
+    for (const TestDataU32& inner : tests) {
+      if (p2m.MaskValue() != uint32_t(-1)) {
+        MOZ_RELEASE_ASSERT((inner.mInput % p2m) ==
+                           (inner.mInput % (p2m.MaskValue() + 1)));
+      }
+      MOZ_RELEASE_ASSERT((inner.mInput & p2m) == (inner.mInput % p2m));
+      MOZ_RELEASE_ASSERT((p2m & inner.mInput) == (inner.mInput & p2m));
+    }
+  }
+
+  printf("TestPowerOfTwoMask done\n");
+}
+
+void TestPowerOfTwo() {
+  printf("TestPowerOfTwo...\n");
+
+  static_assert(MakePowerOfTwo<uint32_t, 1>().Value() == 1);
+  constexpr PowerOfTwo<uint32_t> c1 = MakePowerOfTwo<uint32_t, 1>();
+  MOZ_RELEASE_ASSERT(c1.Value() == 1);
+  static_assert(MakePowerOfTwo<uint32_t, 1>().Mask().MaskValue() == 0);
+
+  static_assert(MakePowerOfTwo<uint32_t, 128>().Value() == 128);
+  constexpr PowerOfTwo<uint32_t> c128 = MakePowerOfTwo<uint32_t, 128>();
+  MOZ_RELEASE_ASSERT(c128.Value() == 128);
+  static_assert(MakePowerOfTwo<uint32_t, 128>().Mask().MaskValue() == 127);
+
+  static_assert(MakePowerOfTwo<uint32_t, 0x80000000u>().Value() == 0x80000000u);
+  constexpr PowerOfTwo<uint32_t> cMax = MakePowerOfTwo<uint32_t, 0x80000000u>();
+  MOZ_RELEASE_ASSERT(cMax.Value() == 0x80000000u);
+  static_assert(MakePowerOfTwo<uint32_t, 0x80000000u>().Mask().MaskValue() ==
+                0x7FFFFFFFu);
+
+  struct TestDataU32 {
+    uint32_t mInput;
+    uint32_t mValue;
+    uint32_t mMask;
+  };
+  // clang-format off
+  TestDataU32 tests[] = {
+    { 0, 1, 0 },
+    { 1, 1, 0 },
+    { 2, 2, 1 },
+    { 3, 4, 3 },
+    { 4, 4, 3 },
+    { 5, 8, 7 },
+    { (1u << 31) - 1, (1u << 31), (1u << 31) - 1 },
+    { (1u << 31), (1u << 31), (1u << 31) - 1 },
+    { (1u << 31) + 1, (1u << 31), (1u << 31) - 1 },
+    { uint32_t(-1), (1u << 31), (1u << 31) - 1 }
+  };
+  // clang-format on
+  for (const TestDataU32& test : tests) {
+    PowerOfTwo<uint32_t> p2(test.mInput);
+    MOZ_RELEASE_ASSERT(p2.Value() == test.mValue);
+    MOZ_RELEASE_ASSERT(p2.MaskValue() == test.mMask);
+    PowerOfTwoMask<uint32_t> p2m = p2.Mask();
+    MOZ_RELEASE_ASSERT(p2m.MaskValue() == test.mMask);
+    for (const TestDataU32& inner : tests) {
+      MOZ_RELEASE_ASSERT((inner.mInput % p2) == (inner.mInput % p2.Value()));
+    }
+  }
+
+  printf("TestPowerOfTwo done\n");
+}
+
+void TestLEB128() {
+  printf("TestLEB128...\n");
+
+  MOZ_RELEASE_ASSERT(ULEB128MaxSize<uint8_t>() == 2);
+  MOZ_RELEASE_ASSERT(ULEB128MaxSize<uint16_t>() == 3);
+  MOZ_RELEASE_ASSERT(ULEB128MaxSize<uint32_t>() == 5);
+  MOZ_RELEASE_ASSERT(ULEB128MaxSize<uint64_t>() == 10);
+
+  struct TestDataU64 {
+    uint64_t mValue;
+    unsigned mSize;
+    const char* mBytes;
+  };
+  // clang-format off
+  TestDataU64 tests[] = {
+    // Small numbers should keep their normal byte representation.
+    {                  0u,  1, "\0" },
+    {                  1u,  1, "\x01" },
+
+    // 0111 1111 (127, or 0x7F) is the highest number that fits into a single
+    // LEB128 byte. It gets encoded as 0111 1111, note the most significant bit
+    // is off.
+    {               0x7Fu,  1, "\x7F" },
+
+    // Next number: 128, or 0x80.
+    //   Original data representation:  1000 0000
+    //     Broken up into groups of 7:         1  0000000
+    // Padded with 0 (msB) or 1 (lsB):  00000001 10000000
+    //            Byte representation:  0x01     0x80
+    //            Little endian order:  -> 0x80 0x01
+    {               0x80u,  2, "\x80\x01" },
+
+    // Next: 129, or 0x81 (showing that we don't lose low bits.)
+    //   Original data representation:  1000 0001
+    //     Broken up into groups of 7:         1  0000001
+    // Padded with 0 (msB) or 1 (lsB):  00000001 10000001
+    //            Byte representation:  0x01     0x81
+    //            Little endian order:  -> 0x81 0x01
+    {               0x81u,  2, "\x81\x01" },
+
+    // Highest 8-bit number: 255, or 0xFF.
+    //   Original data representation:  1111 1111
+    //     Broken up into groups of 7:         1  1111111
+    // Padded with 0 (msB) or 1 (lsB):  00000001 11111111
+    //            Byte representation:  0x01     0xFF
+    //            Little endian order:  -> 0xFF 0x01
+    {               0xFFu,  2, "\xFF\x01" },
+
+    // Next: 256, or 0x100.
+    //   Original data representation:  1 0000 0000
+    //     Broken up into groups of 7:        10  0000000
+    // Padded with 0 (msB) or 1 (lsB):  00000010 10000000
+    //            Byte representation:  0x10     0x80
+    //            Little endian order:  -> 0x80 0x02
+    {              0x100u,  2, "\x80\x02" },
+
+    // Highest 32-bit number: 0xFFFFFFFF (8 bytes, all bits set).
+    // Original: 1111 1111 1111 1111 1111 1111 1111 1111
+    // Groups:     1111  1111111  1111111  1111111  1111111
+    // Padded: 00001111 11111111 11111111 11111111 11111111
+    // Bytes:  0x0F     0xFF     0xFF     0xFF     0xFF
+    // Little Endian: -> 0xFF 0xFF 0xFF 0xFF 0x0F
+    {         0xFFFFFFFFu,  5, "\xFF\xFF\xFF\xFF\x0F" },
+
+    // Highest 64-bit number: 0xFFFFFFFFFFFFFFFF (16 bytes, all bits set).
+    // 64 bits, that's 9 groups of 7 bits, plus 1 (most significant) bit.
+    { 0xFFFFFFFFFFFFFFFFu, 10, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01" }
+  };
+  // clang-format on
+
+  for (const TestDataU64& test : tests) {
+    MOZ_RELEASE_ASSERT(ULEB128Size(test.mValue) == test.mSize);
+    // Prepare a buffer that can accomodate the largest-possible LEB128.
+    uint8_t buffer[ULEB128MaxSize<uint64_t>()];
+    // Use a pointer into the buffer as iterator.
+    uint8_t* p = buffer;
+    // And write the LEB128.
+    WriteULEB128(test.mValue, p);
+    // Pointer (iterator) should have advanced just past the expected LEB128
+    // size.
+    MOZ_RELEASE_ASSERT(p == buffer + test.mSize);
+    // Check expected bytes.
+    for (unsigned i = 0; i < test.mSize; ++i) {
+      MOZ_RELEASE_ASSERT(buffer[i] == uint8_t(test.mBytes[i]));
+    }
+
+    // Move pointer (iterator) back to start of buffer.
+    p = buffer;
+    // And read the LEB128 we wrote above.
+    uint64_t read = ReadULEB128<uint64_t>(p);
+    // Pointer (iterator) should have also advanced just past the expected
+    // LEB128 size.
+    MOZ_RELEASE_ASSERT(p == buffer + test.mSize);
+    // And check the read value.
+    MOZ_RELEASE_ASSERT(read == test.mValue);
+
+    // Testing ULEB128 reader.
+    ULEB128Reader<uint64_t> reader;
+    MOZ_RELEASE_ASSERT(!reader.IsComplete());
+    // Move pointer back to start of buffer.
+    p = buffer;
+    for (;;) {
+      // Read a byte and feed it to the reader.
+      if (reader.FeedByteIsComplete(*p++)) {
+        break;
+      }
+      // Not complete yet, we shouldn't have reached the end pointer.
+      MOZ_RELEASE_ASSERT(!reader.IsComplete());
+      MOZ_RELEASE_ASSERT(p < buffer + test.mSize);
+    }
+    MOZ_RELEASE_ASSERT(reader.IsComplete());
+    // Pointer should have advanced just past the expected LEB128 size.
+    MOZ_RELEASE_ASSERT(p == buffer + test.mSize);
+    // And check the read value.
+    MOZ_RELEASE_ASSERT(reader.Value() == test.mValue);
+
+    // And again after a Reset.
+    reader.Reset();
+    MOZ_RELEASE_ASSERT(!reader.IsComplete());
+    p = buffer;
+    for (;;) {
+      if (reader.FeedByteIsComplete(*p++)) {
+        break;
+      }
+      MOZ_RELEASE_ASSERT(!reader.IsComplete());
+      MOZ_RELEASE_ASSERT(p < buffer + test.mSize);
+    }
+    MOZ_RELEASE_ASSERT(reader.IsComplete());
+    MOZ_RELEASE_ASSERT(p == buffer + test.mSize);
+    MOZ_RELEASE_ASSERT(reader.Value() == test.mValue);
+  }
+
+  printf("TestLEB128 done\n");
+}
+
+template <uint8_t byte, uint8_t... tail>
+constexpr bool TestConstexprULEB128Reader(ULEB128Reader<uint64_t>& aReader) {
+  if (aReader.IsComplete()) {
+    return false;
+  }
+  const bool isComplete = aReader.FeedByteIsComplete(byte);
+  if (aReader.IsComplete() != isComplete) {
+    return false;
+  }
+  if constexpr (sizeof...(tail) == 0) {
+    return isComplete;
+  } else {
+    if (isComplete) {
+      return false;
+    }
+    return TestConstexprULEB128Reader<tail...>(aReader);
+  }
+}
+
+template <uint64_t expected, uint8_t... bytes>
+constexpr bool TestConstexprULEB128Reader() {
+  ULEB128Reader<uint64_t> reader;
+  if (!TestConstexprULEB128Reader<bytes...>(reader)) {
+    return false;
+  }
+  if (!reader.IsComplete()) {
+    return false;
+  }
+  if (reader.Value() != expected) {
+    return false;
+  }
+
+  reader.Reset();
+  if (!TestConstexprULEB128Reader<bytes...>(reader)) {
+    return false;
+  }
+  if (!reader.IsComplete()) {
+    return false;
+  }
+  if (reader.Value() != expected) {
+    return false;
+  }
+
+  return true;
+}
+
+static_assert(TestConstexprULEB128Reader<0x0u, 0x0u>());
+static_assert(!TestConstexprULEB128Reader<0x0u, 0x0u, 0x0u>());
+static_assert(TestConstexprULEB128Reader<0x1u, 0x1u>());
+static_assert(TestConstexprULEB128Reader<0x7Fu, 0x7Fu>());
+static_assert(TestConstexprULEB128Reader<0x80u, 0x80u, 0x01u>());
+static_assert(!TestConstexprULEB128Reader<0x80u, 0x80u>());
+static_assert(!TestConstexprULEB128Reader<0x80u, 0x01u>());
+static_assert(TestConstexprULEB128Reader<0x81u, 0x81u, 0x01u>());
+static_assert(TestConstexprULEB128Reader<0xFFu, 0xFFu, 0x01u>());
+static_assert(TestConstexprULEB128Reader<0x100u, 0x80u, 0x02u>());
+static_assert(TestConstexprULEB128Reader<0xFFFFFFFFu, 0xFFu, 0xFFu, 0xFFu,
+                                         0xFFu, 0x0Fu>());
+static_assert(
+    !TestConstexprULEB128Reader<0xFFFFFFFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu>());
+static_assert(!TestConstexprULEB128Reader<0xFFFFFFFFu, 0xFFu, 0xFFu, 0xFFu,
+                                          0xFFu, 0xFFu, 0x0Fu>());
+static_assert(
+    TestConstexprULEB128Reader<0xFFFFFFFFFFFFFFFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
+                               0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0x01u>());
+static_assert(
+    !TestConstexprULEB128Reader<0xFFFFFFFFFFFFFFFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
+                                0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu>());
+
+static void TestChunk() {
+  printf("TestChunk...\n");
+
+  static_assert(!std::is_default_constructible_v<ProfileBufferChunk>,
+                "ProfileBufferChunk should not be default-constructible");
+  static_assert(
+      !std::is_constructible_v<ProfileBufferChunk, ProfileBufferChunk::Length>,
+      "ProfileBufferChunk should not be constructible from Length");
+
+  static_assert(
+      sizeof(ProfileBufferChunk::Header) ==
+          sizeof(ProfileBufferChunk::Header::mOffsetFirstBlock) +
+              sizeof(ProfileBufferChunk::Header::mOffsetPastLastBlock) +
+              sizeof(ProfileBufferChunk::Header::mDoneTimeStamp) +
+              sizeof(ProfileBufferChunk::Header::mBufferBytes) +
+              sizeof(ProfileBufferChunk::Header::mBlockCount) +
+              sizeof(ProfileBufferChunk::Header::mRangeStart) +
+              sizeof(ProfileBufferChunk::Header::mProcessId) +
+              sizeof(ProfileBufferChunk::Header::mPADDING),
+      "ProfileBufferChunk::Header may have unwanted padding, please review");
+  // Note: The above static_assert is an attempt at keeping
+  // ProfileBufferChunk::Header tightly packed, but some changes could make this
+  // impossible to achieve (most probably due to alignment) -- Just do your
+  // best!
+
+  constexpr ProfileBufferChunk::Length TestLen = 1000;
+
+  // Basic allocations of different sizes.
+  for (ProfileBufferChunk::Length len = 0; len <= TestLen; ++len) {
+    auto chunk = ProfileBufferChunk::Create(len);
+    static_assert(
+        std::is_same_v<decltype(chunk), UniquePtr<ProfileBufferChunk>>,
+        "ProfileBufferChunk::Create() should return a "
+        "UniquePtr<ProfileBufferChunk>");
+    MOZ_RELEASE_ASSERT(!!chunk, "OOM!?");
+    MOZ_RELEASE_ASSERT(chunk->BufferBytes() >= len);
+    MOZ_RELEASE_ASSERT(chunk->ChunkBytes() >=
+                       len + ProfileBufferChunk::SizeofChunkMetadata());
+    MOZ_RELEASE_ASSERT(chunk->RemainingBytes() == chunk->BufferBytes());
+    MOZ_RELEASE_ASSERT(chunk->OffsetFirstBlock() == 0);
+    MOZ_RELEASE_ASSERT(chunk->OffsetPastLastBlock() == 0);
+    MOZ_RELEASE_ASSERT(chunk->BlockCount() == 0);
+    MOZ_RELEASE_ASSERT(chunk->ProcessId() == 0);
+    MOZ_RELEASE_ASSERT(chunk->RangeStart() == 0);
+    MOZ_RELEASE_ASSERT(chunk->BufferSpan().LengthBytes() ==
+                       chunk->BufferBytes());
+    MOZ_RELEASE_ASSERT(!chunk->GetNext());
+    MOZ_RELEASE_ASSERT(!chunk->ReleaseNext());
+    MOZ_RELEASE_ASSERT(chunk->Last() == chunk.get());
+  }
+
+  // Allocate the main test Chunk.
+  auto chunkA = ProfileBufferChunk::Create(TestLen);
+  MOZ_RELEASE_ASSERT(!!chunkA, "OOM!?");
+  MOZ_RELEASE_ASSERT(chunkA->BufferBytes() >= TestLen);
+  MOZ_RELEASE_ASSERT(chunkA->ChunkBytes() >=
+                     TestLen + ProfileBufferChunk::SizeofChunkMetadata());
+  MOZ_RELEASE_ASSERT(!chunkA->GetNext());
+  MOZ_RELEASE_ASSERT(!chunkA->ReleaseNext());
+
+  constexpr ProfileBufferIndex chunkARangeStart = 12345;
+  chunkA->SetRangeStart(chunkARangeStart);
+  MOZ_RELEASE_ASSERT(chunkA->RangeStart() == chunkARangeStart);
+
+  // Get a read-only span over its buffer.
+  auto bufferA = chunkA->BufferSpan();
+  static_assert(
+      std::is_same_v<decltype(bufferA), Span<const ProfileBufferChunk::Byte>>,
+      "BufferSpan() should return a Span<const Byte>");
+  MOZ_RELEASE_ASSERT(bufferA.LengthBytes() == chunkA->BufferBytes());
+
+  // Add the initial tail block.
+  constexpr ProfileBufferChunk::Length initTailLen = 10;
+  auto initTail = chunkA->ReserveInitialBlockAsTail(initTailLen);
+  static_assert(
+      std::is_same_v<decltype(initTail), Span<ProfileBufferChunk::Byte>>,
+      "ReserveInitialBlockAsTail() should return a Span<Byte>");
+  MOZ_RELEASE_ASSERT(initTail.LengthBytes() == initTailLen);
+  MOZ_RELEASE_ASSERT(initTail.Elements() == bufferA.Elements());
+  MOZ_RELEASE_ASSERT(chunkA->OffsetFirstBlock() == initTailLen);
+  MOZ_RELEASE_ASSERT(chunkA->OffsetPastLastBlock() == initTailLen);
+
+  // Add the first complete block.
+  constexpr ProfileBufferChunk::Length block1Len = 20;
+  auto block1 = chunkA->ReserveBlock(block1Len);
+  static_assert(
+      std::is_same_v<decltype(block1), ProfileBufferChunk::ReserveReturn>,
+      "ReserveBlock() should return a ReserveReturn");
+  MOZ_RELEASE_ASSERT(block1.mBlockRangeIndex.ConvertToProfileBufferIndex() ==
+                     chunkARangeStart + initTailLen);
+  MOZ_RELEASE_ASSERT(block1.mSpan.LengthBytes() == block1Len);
+  MOZ_RELEASE_ASSERT(block1.mSpan.Elements() ==
+                     bufferA.Elements() + initTailLen);
+  MOZ_RELEASE_ASSERT(chunkA->OffsetFirstBlock() == initTailLen);
+  MOZ_RELEASE_ASSERT(chunkA->OffsetPastLastBlock() == initTailLen + block1Len);
+  MOZ_RELEASE_ASSERT(chunkA->RemainingBytes() != 0);
+
+  // Add another block to over-fill the ProfileBufferChunk.
+  const ProfileBufferChunk::Length remaining =
+      chunkA->BufferBytes() - (initTailLen + block1Len);
+  constexpr ProfileBufferChunk::Length overfill = 30;
+  const ProfileBufferChunk::Length block2Len = remaining + overfill;
+  ProfileBufferChunk::ReserveReturn block2 = chunkA->ReserveBlock(block2Len);
+  MOZ_RELEASE_ASSERT(block2.mBlockRangeIndex.ConvertToProfileBufferIndex() ==
+                     chunkARangeStart + initTailLen + block1Len);
+  MOZ_RELEASE_ASSERT(block2.mSpan.LengthBytes() == remaining);
+  MOZ_RELEASE_ASSERT(block2.mSpan.Elements() ==
+                     bufferA.Elements() + initTailLen + block1Len);
+  MOZ_RELEASE_ASSERT(chunkA->OffsetFirstBlock() == initTailLen);
+  MOZ_RELEASE_ASSERT(chunkA->OffsetPastLastBlock() == chunkA->BufferBytes());
+  MOZ_RELEASE_ASSERT(chunkA->RemainingBytes() == 0);
+
+  // Block must be marked "done" before it can be recycled.
+  chunkA->MarkDone();
+
+  // It must be marked "recycled" before data can be added to it again.
+  chunkA->MarkRecycled();
+
+  // Add an empty initial tail block.
+  Span<ProfileBufferChunk::Byte> initTail2 =
+      chunkA->ReserveInitialBlockAsTail(0);
+  MOZ_RELEASE_ASSERT(initTail2.LengthBytes() == 0);
+  MOZ_RELEASE_ASSERT(initTail2.Elements() == bufferA.Elements());
+  MOZ_RELEASE_ASSERT(chunkA->OffsetFirstBlock() == 0);
+  MOZ_RELEASE_ASSERT(chunkA->OffsetPastLastBlock() == 0);
+
+  // Block must be marked "done" before it can be destroyed.
+  chunkA->MarkDone();
+
+  chunkA->SetProcessId(123);
+  MOZ_RELEASE_ASSERT(chunkA->ProcessId() == 123);
+
+  printf("TestChunk done\n");
+}
+
+static void TestChunkManagerSingle() {
+  printf("TestChunkManagerSingle...\n");
+
+  // Construct a ProfileBufferChunkManagerSingle for one chunk of size >=1000.
+  constexpr ProfileBufferChunk::Length ChunkMinBufferBytes = 1000;
+  ProfileBufferChunkManagerSingle cms{ChunkMinBufferBytes};
+
+  // Reference to base class, to exercize virtual methods.
+  ProfileBufferChunkManager& cm = cms;
+
+#  ifdef DEBUG
+  const char* chunkManagerRegisterer = "TestChunkManagerSingle";
+  cm.RegisteredWith(chunkManagerRegisterer);
+#  endif  // DEBUG
+
+  const auto maxTotalSize = cm.MaxTotalSize();
+  MOZ_RELEASE_ASSERT(maxTotalSize >= ChunkMinBufferBytes);
+
+  cm.SetChunkDestroyedCallback([](const ProfileBufferChunk&) {
+    MOZ_RELEASE_ASSERT(
+        false,
+        "ProfileBufferChunkManagerSingle should never destroy its one chunk");
+  });
+
+  UniquePtr<ProfileBufferChunk> extantReleasedChunks =
+      cm.GetExtantReleasedChunks();
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Unexpected released chunk(s)");
+
+  // First request.
+  UniquePtr<ProfileBufferChunk> chunk = cm.GetChunk();
+  MOZ_RELEASE_ASSERT(!!chunk, "First chunk request should always work");
+  MOZ_RELEASE_ASSERT(chunk->BufferBytes() >= ChunkMinBufferBytes,
+                     "Unexpected chunk size");
+  MOZ_RELEASE_ASSERT(!chunk->GetNext(), "There should only be one chunk");
+
+  // Keep address, for later checks.
+  const uintptr_t chunkAddress = reinterpret_cast<uintptr_t>(chunk.get());
+
+  extantReleasedChunks = cm.GetExtantReleasedChunks();
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Unexpected released chunk(s)");
+
+  // Second request.
+  MOZ_RELEASE_ASSERT(!cm.GetChunk(), "Second chunk request should always fail");
+
+  extantReleasedChunks = cm.GetExtantReleasedChunks();
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Unexpected released chunk(s)");
+
+  // Add some data to the chunk (to verify recycling later on).
+  MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetFirstBlock == 0);
+  MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetPastLastBlock == 0);
+  MOZ_RELEASE_ASSERT(chunk->RangeStart() == 0);
+  chunk->SetRangeStart(100);
+  MOZ_RELEASE_ASSERT(chunk->RangeStart() == 100);
+  Unused << chunk->ReserveInitialBlockAsTail(1);
+  Unused << chunk->ReserveBlock(2);
+  MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetFirstBlock == 1);
+  MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetPastLastBlock == 1 + 2);
+
+  // Release the first chunk.
+  chunk->MarkDone();
+  cm.ReleaseChunk(std::move(chunk));
+  MOZ_RELEASE_ASSERT(!chunk, "chunk UniquePtr should have been moved-from");
+
+  // Request after release.
+  MOZ_RELEASE_ASSERT(!cm.GetChunk(),
+                     "Chunk request after release should also fail");
+
+  // Check released chunk.
+  extantReleasedChunks = cm.GetExtantReleasedChunks();
+  MOZ_RELEASE_ASSERT(!!extantReleasedChunks,
+                     "Could not retrieve released chunk");
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks->GetNext(),
+                     "There should only be one released chunk");
+  MOZ_RELEASE_ASSERT(
+      reinterpret_cast<uintptr_t>(extantReleasedChunks.get()) == chunkAddress,
+      "Released chunk should be first requested one");
+
+  MOZ_RELEASE_ASSERT(!cm.GetExtantReleasedChunks(),
+                     "Unexpected extra released chunk(s)");
+
+  // Another request after release.
+  MOZ_RELEASE_ASSERT(!cm.GetChunk(),
+                     "Chunk request after release should also fail");
+
+  MOZ_RELEASE_ASSERT(
+      cm.MaxTotalSize() == maxTotalSize,
+      "MaxTotalSize() should not change after requests&releases");
+
+  // Reset the chunk manager. (Single-only non-virtual function.)
+  cms.Reset(std::move(extantReleasedChunks));
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks,
+                     "Released chunk UniquePtr should have been moved-from");
+
+  MOZ_RELEASE_ASSERT(
+      cm.MaxTotalSize() == maxTotalSize,
+      "MaxTotalSize() should not change when resetting with the same chunk");
+
+  // 2nd round, first request. Theoretically async, but this implementation just
+  // immediately runs the callback.
+  bool ran = false;
+  cm.RequestChunk([&](UniquePtr<ProfileBufferChunk> aChunk) {
+    ran = true;
+    MOZ_RELEASE_ASSERT(!!aChunk);
+    chunk = std::move(aChunk);
+  });
+  MOZ_RELEASE_ASSERT(ran, "RequestChunk callback not called immediately");
+  ran = false;
+  cm.FulfillChunkRequests();
+  MOZ_RELEASE_ASSERT(!ran, "FulfillChunkRequests should not have any effects");
+  MOZ_RELEASE_ASSERT(!!chunk, "First chunk request should always work");
+  MOZ_RELEASE_ASSERT(chunk->BufferBytes() >= ChunkMinBufferBytes,
+                     "Unexpected chunk size");
+  MOZ_RELEASE_ASSERT(!chunk->GetNext(), "There should only be one chunk");
+  MOZ_RELEASE_ASSERT(reinterpret_cast<uintptr_t>(chunk.get()) == chunkAddress,
+                     "Requested chunk should be first requested one");
+  // Verify that chunk is empty and usable.
+  MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetFirstBlock == 0);
+  MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetPastLastBlock == 0);
+  MOZ_RELEASE_ASSERT(chunk->RangeStart() == 0);
+  chunk->SetRangeStart(200);
+  MOZ_RELEASE_ASSERT(chunk->RangeStart() == 200);
+  Unused << chunk->ReserveInitialBlockAsTail(3);
+  Unused << chunk->ReserveBlock(4);
+  MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetFirstBlock == 3);
+  MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetPastLastBlock == 3 + 4);
+
+  // Second request.
+  ran = false;
+  cm.RequestChunk([&](UniquePtr<ProfileBufferChunk> aChunk) {
+    ran = true;
+    MOZ_RELEASE_ASSERT(!aChunk, "Second chunk request should always fail");
+  });
+  MOZ_RELEASE_ASSERT(ran, "RequestChunk callback not called");
+
+  // This one does nothing.
+  cm.ForgetUnreleasedChunks();
+
+  // Don't forget to mark chunk "Done" before letting it die.
+  chunk->MarkDone();
+  chunk = nullptr;
+
+  // Create a tiny chunk and reset the chunk manager with it.
+  chunk = ProfileBufferChunk::Create(1);
+  MOZ_RELEASE_ASSERT(!!chunk);
+  auto tinyChunkSize = chunk->BufferBytes();
+  MOZ_RELEASE_ASSERT(tinyChunkSize >= 1);
+  MOZ_RELEASE_ASSERT(tinyChunkSize < ChunkMinBufferBytes);
+  MOZ_RELEASE_ASSERT(chunk->RangeStart() == 0);
+  chunk->SetRangeStart(300);
+  MOZ_RELEASE_ASSERT(chunk->RangeStart() == 300);
+  cms.Reset(std::move(chunk));
+  MOZ_RELEASE_ASSERT(!chunk, "chunk UniquePtr should have been moved-from");
+  MOZ_RELEASE_ASSERT(cm.MaxTotalSize() == tinyChunkSize,
+                     "MaxTotalSize() should match the new chunk size");
+  chunk = cm.GetChunk();
+  MOZ_RELEASE_ASSERT(chunk->RangeStart() == 0, "Got non-recycled chunk");
+
+  // Enough testing! Clean-up.
+  Unused << chunk->ReserveInitialBlockAsTail(0);
+  chunk->MarkDone();
+  cm.ForgetUnreleasedChunks();
+
+#  ifdef DEBUG
+  cm.DeregisteredFrom(chunkManagerRegisterer);
+#  endif  // DEBUG
+
+  printf("TestChunkManagerSingle done\n");
+}
+
+static void TestChunkManagerWithLocalLimit() {
+  printf("TestChunkManagerWithLocalLimit...\n");
+
+  // Construct a ProfileBufferChunkManagerWithLocalLimit with chunk of minimum
+  // size >=100, up to 1000 bytes.
+  constexpr ProfileBufferChunk::Length MaxTotalBytes = 1000;
+  constexpr ProfileBufferChunk::Length ChunkMinBufferBytes = 100;
+  ProfileBufferChunkManagerWithLocalLimit cmll{MaxTotalBytes,
+                                               ChunkMinBufferBytes};
+
+  // Reference to base class, to exercize virtual methods.
+  ProfileBufferChunkManager& cm = cmll;
+
+#  ifdef DEBUG
+  const char* chunkManagerRegisterer = "TestChunkManagerWithLocalLimit";
+  cm.RegisteredWith(chunkManagerRegisterer);
+#  endif  // DEBUG
+
+  MOZ_RELEASE_ASSERT(cm.MaxTotalSize() == MaxTotalBytes,
+                     "Max total size should be exactly as given");
+
+  unsigned destroyedChunks = 0;
+  unsigned destroyedBytes = 0;
+  cm.SetChunkDestroyedCallback([&](const ProfileBufferChunk& aChunks) {
+    for (const ProfileBufferChunk* chunk = &aChunks; chunk;
+         chunk = chunk->GetNext()) {
+      destroyedChunks += 1;
+      destroyedBytes += chunk->BufferBytes();
+    }
+  });
+
+  UniquePtr<ProfileBufferChunk> extantReleasedChunks =
+      cm.GetExtantReleasedChunks();
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Unexpected released chunk(s)");
+
+  // First request.
+  UniquePtr<ProfileBufferChunk> chunk = cm.GetChunk();
+  MOZ_RELEASE_ASSERT(!!chunk,
+                     "First chunk immediate request should always work");
+  const auto chunkActualBufferBytes = chunk->BufferBytes();
+  MOZ_RELEASE_ASSERT(chunkActualBufferBytes >= ChunkMinBufferBytes,
+                     "Unexpected chunk size");
+  MOZ_RELEASE_ASSERT(!chunk->GetNext(), "There should only be one chunk");
+
+  // Keep address, for later checks.
+  const uintptr_t chunk1Address = reinterpret_cast<uintptr_t>(chunk.get());
+
+  extantReleasedChunks = cm.GetExtantReleasedChunks();
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Unexpected released chunk(s)");
+
+  // Verify that ReleaseChunk accepts zero chunks.
+  cm.ReleaseChunk(nullptr);
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Unexpected released chunk(s)");
+
+  // For this test, we need to be able to get at least 2 chunks without hitting
+  // the limit. (If this failed, it wouldn't necessary be a problem with
+  // ProfileBufferChunkManagerWithLocalLimit, fiddle with constants at the top
+  // of this test.)
+  MOZ_RELEASE_ASSERT(chunkActualBufferBytes < 2 * MaxTotalBytes);
+
+  unsigned chunk1ReuseCount = 0;
+
+  // We will do enough loops to go through the maximum size a number of times.
+  const unsigned Rollovers = 3;
+  const unsigned Loops = Rollovers * MaxTotalBytes / chunkActualBufferBytes;
+  for (unsigned i = 0; i < Loops; ++i) {
+    // Add some data to the chunk.
+    MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetFirstBlock == 0);
+    MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetPastLastBlock == 0);
+    MOZ_RELEASE_ASSERT(chunk->RangeStart() == 0);
+    const ProfileBufferIndex index = 1 + i * chunkActualBufferBytes;
+    chunk->SetRangeStart(index);
+    MOZ_RELEASE_ASSERT(chunk->RangeStart() == index);
+    Unused << chunk->ReserveInitialBlockAsTail(1);
+    Unused << chunk->ReserveBlock(2);
+    MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetFirstBlock == 1);
+    MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mOffsetPastLastBlock == 1 + 2);
+
+    // Request a new chunk.
+    bool ran = false;
+    UniquePtr<ProfileBufferChunk> newChunk;
+    cm.RequestChunk([&](UniquePtr<ProfileBufferChunk> aChunk) {
+      ran = true;
+      newChunk = std::move(aChunk);
+    });
+    MOZ_RELEASE_ASSERT(
+        !ran, "RequestChunk should not immediately fulfill the request");
+    cm.FulfillChunkRequests();
+    MOZ_RELEASE_ASSERT(ran, "FulfillChunkRequests should invoke the callback");
+    MOZ_RELEASE_ASSERT(!!newChunk, "Chunk request should always work");
+    MOZ_RELEASE_ASSERT(newChunk->BufferBytes() == chunkActualBufferBytes,
+                       "Unexpected chunk size");
+    MOZ_RELEASE_ASSERT(!newChunk->GetNext(), "There should only be one chunk");
+
+    // Mark previous chunk done and release it.
+    WaitUntilTimeStampChanges();  // Force "done" timestamp to change.
+    chunk->MarkDone();
+    cm.ReleaseChunk(std::move(chunk));
+
+    // And cycle to the new chunk.
+    chunk = std::move(newChunk);
+
+    if (reinterpret_cast<uintptr_t>(chunk.get()) == chunk1Address) {
+      ++chunk1ReuseCount;
+    }
+  }
+
+  // Expect all rollovers except 1 to destroy chunks.
+  MOZ_RELEASE_ASSERT(destroyedChunks >= (Rollovers - 1) * MaxTotalBytes /
+                                            chunkActualBufferBytes,
+                     "Not enough destroyed chunks");
+  MOZ_RELEASE_ASSERT(destroyedBytes == destroyedChunks * chunkActualBufferBytes,
+                     "Mismatched destroyed chunks and bytes");
+  MOZ_RELEASE_ASSERT(chunk1ReuseCount >= (Rollovers - 1),
+                     "Not enough reuse of the first chunks");
+
+  // Check that chunk manager is reentrant from request callback.
+  bool ran = false;
+  bool ranInner = false;
+  UniquePtr<ProfileBufferChunk> newChunk;
+  cm.RequestChunk([&](UniquePtr<ProfileBufferChunk> aChunk) {
+    ran = true;
+    MOZ_RELEASE_ASSERT(!!aChunk, "Chunk request should always work");
+    Unused << aChunk->ReserveInitialBlockAsTail(0);
+    WaitUntilTimeStampChanges();  // Force "done" timestamp to change.
+    aChunk->MarkDone();
+    UniquePtr<ProfileBufferChunk> anotherChunk = cm.GetChunk();
+    MOZ_RELEASE_ASSERT(!!anotherChunk);
+    Unused << anotherChunk->ReserveInitialBlockAsTail(0);
+    WaitUntilTimeStampChanges();  // Force "done" timestamp to change.
+    anotherChunk->MarkDone();
+    cm.RequestChunk([&](UniquePtr<ProfileBufferChunk> aChunk) {
+      ranInner = true;
+      MOZ_RELEASE_ASSERT(!!aChunk, "Chunk request should always work");
+      Unused << aChunk->ReserveInitialBlockAsTail(0);
+      WaitUntilTimeStampChanges();  // Force "done" timestamp to change.
+      aChunk->MarkDone();
+    });
+    MOZ_RELEASE_ASSERT(
+        !ranInner, "RequestChunk should not immediately fulfill the request");
+  });
+  MOZ_RELEASE_ASSERT(!ran,
+                     "RequestChunk should not immediately fulfill the request");
+  MOZ_RELEASE_ASSERT(
+      !ranInner,
+      "RequestChunk should not immediately fulfill the inner request");
+  cm.FulfillChunkRequests();
+  MOZ_RELEASE_ASSERT(ran, "FulfillChunkRequests should invoke the callback");
+  MOZ_RELEASE_ASSERT(!ranInner,
+                     "FulfillChunkRequests should not immediately fulfill "
+                     "the inner request");
+  cm.FulfillChunkRequests();
+  MOZ_RELEASE_ASSERT(
+      ran, "2nd FulfillChunkRequests should invoke the inner request callback");
+
+  // Enough testing! Clean-up.
+  Unused << chunk->ReserveInitialBlockAsTail(0);
+  WaitUntilTimeStampChanges();  // Force "done" timestamp to change.
+  chunk->MarkDone();
+  cm.ForgetUnreleasedChunks();
+
+  // Special testing of the release algorithm, to make sure released chunks get
+  // sorted.
+  constexpr unsigned RandomReleaseChunkLoop = 100;
+  // Build a vector of chunks, and mark them "done", ready to be released.
+  Vector<UniquePtr<ProfileBufferChunk>> chunksToRelease;
+  MOZ_RELEASE_ASSERT(chunksToRelease.reserve(RandomReleaseChunkLoop));
+  Vector<TimeStamp> chunksTimeStamps;
+  MOZ_RELEASE_ASSERT(chunksTimeStamps.reserve(RandomReleaseChunkLoop));
+  for (unsigned i = 0; i < RandomReleaseChunkLoop; ++i) {
+    UniquePtr<ProfileBufferChunk> chunk = cm.GetChunk();
+    MOZ_RELEASE_ASSERT(chunk);
+    Unused << chunk->ReserveInitialBlockAsTail(0);
+    chunk->MarkDone();
+    MOZ_RELEASE_ASSERT(!chunk->ChunkHeader().mDoneTimeStamp.IsNull());
+    chunksTimeStamps.infallibleEmplaceBack(chunk->ChunkHeader().mDoneTimeStamp);
+    chunksToRelease.infallibleEmplaceBack(std::move(chunk));
+    if (i % 10 == 0) {
+      // "Done" timestamps should *usually* increase, let's make extra sure some
+      // timestamps are actually different.
+      WaitUntilTimeStampChanges();
+    }
+  }
+  // Shuffle the list.
+  std::random_device randomDevice;
+  std::mt19937 generator(randomDevice());
+  std::shuffle(chunksToRelease.begin(), chunksToRelease.end(), generator);
+  // And release chunks one by one, checking that the list of released chunks
+  // is always sorted.
+  printf("TestChunkManagerWithLocalLimit - Shuffle test timestamps:");
+  for (unsigned i = 0; i < RandomReleaseChunkLoop; ++i) {
+    printf(" %f", (chunksToRelease[i]->ChunkHeader().mDoneTimeStamp -
+                   TimeStamp::ProcessCreation())
+                      .ToMicroseconds());
+    cm.ReleaseChunk(std::move(chunksToRelease[i]));
+    cm.PeekExtantReleasedChunks([i](const ProfileBufferChunk* releasedChunks) {
+      MOZ_RELEASE_ASSERT(releasedChunks);
+      unsigned releasedChunkCount = 1;
+      for (;;) {
+        const ProfileBufferChunk* nextChunk = releasedChunks->GetNext();
+        if (!nextChunk) {
+          break;
+        }
+        ++releasedChunkCount;
+        MOZ_RELEASE_ASSERT(releasedChunks->ChunkHeader().mDoneTimeStamp <=
+                           nextChunk->ChunkHeader().mDoneTimeStamp);
+        releasedChunks = nextChunk;
+      }
+      MOZ_RELEASE_ASSERT(releasedChunkCount == i + 1);
+    });
+  }
+  printf("\n");
+  // Finally, the whole list of released chunks should have the exact same
+  // timestamps as the initial list of "done" chunks.
+  extantReleasedChunks = cm.GetExtantReleasedChunks();
+  for (unsigned i = 0; i < RandomReleaseChunkLoop; ++i) {
+    MOZ_RELEASE_ASSERT(extantReleasedChunks, "Not enough released chunks");
+    MOZ_RELEASE_ASSERT(extantReleasedChunks->ChunkHeader().mDoneTimeStamp ==
+                       chunksTimeStamps[i]);
+    Unused << std::exchange(extantReleasedChunks,
+                            extantReleasedChunks->ReleaseNext());
+  }
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Too many released chunks");
+
+#  ifdef DEBUG
+  cm.DeregisteredFrom(chunkManagerRegisterer);
+#  endif  // DEBUG
+
+  printf("TestChunkManagerWithLocalLimit done\n");
+}
+
+static bool IsSameMetadata(
+    const ProfileBufferControlledChunkManager::ChunkMetadata& a1,
+    const ProfileBufferControlledChunkManager::ChunkMetadata& a2) {
+  return a1.mDoneTimeStamp == a2.mDoneTimeStamp &&
+         a1.mBufferBytes == a2.mBufferBytes;
+};
+
+static bool IsSameUpdate(
+    const ProfileBufferControlledChunkManager::Update& a1,
+    const ProfileBufferControlledChunkManager::Update& a2) {
+  // Final and not-an-update don't carry other data, so we can test these two
+  // states first.
+  if (a1.IsFinal() || a2.IsFinal()) {
+    return a1.IsFinal() && a2.IsFinal();
+  }
+  if (a1.IsNotUpdate() || a2.IsNotUpdate()) {
+    return a1.IsNotUpdate() && a2.IsNotUpdate();
+  }
+
+  // Here, both are "normal" udpates, check member variables:
+
+  if (a1.UnreleasedBytes() != a2.UnreleasedBytes()) {
+    return false;
+  }
+  if (a1.ReleasedBytes() != a2.ReleasedBytes()) {
+    return false;
+  }
+  if (a1.OldestDoneTimeStamp() != a2.OldestDoneTimeStamp()) {
+    return false;
+  }
+  if (a1.NewlyReleasedChunksRef().size() !=
+      a2.NewlyReleasedChunksRef().size()) {
+    return false;
+  }
+  for (unsigned i = 0; i < a1.NewlyReleasedChunksRef().size(); ++i) {
+    if (!IsSameMetadata(a1.NewlyReleasedChunksRef()[i],
+                        a2.NewlyReleasedChunksRef()[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+static void TestControlledChunkManagerUpdate() {
+  printf("TestControlledChunkManagerUpdate...\n");
+
+  using Update = ProfileBufferControlledChunkManager::Update;
+
+  // Default construction.
+  Update update1;
+  MOZ_RELEASE_ASSERT(update1.IsNotUpdate());
+  MOZ_RELEASE_ASSERT(!update1.IsFinal());
+
+  // Clear an already-cleared update.
+  update1.Clear();
+  MOZ_RELEASE_ASSERT(update1.IsNotUpdate());
+  MOZ_RELEASE_ASSERT(!update1.IsFinal());
+
+  // Final construction with nullptr.
+  const Update final(nullptr);
+  MOZ_RELEASE_ASSERT(final.IsFinal());
+  MOZ_RELEASE_ASSERT(!final.IsNotUpdate());
+
+  // Copy final to cleared.
+  update1 = final;
+  MOZ_RELEASE_ASSERT(update1.IsFinal());
+  MOZ_RELEASE_ASSERT(!update1.IsNotUpdate());
+
+  // Copy final to final.
+  update1 = final;
+  MOZ_RELEASE_ASSERT(update1.IsFinal());
+  MOZ_RELEASE_ASSERT(!update1.IsNotUpdate());
+
+  // Clear a final update.
+  update1.Clear();
+  MOZ_RELEASE_ASSERT(update1.IsNotUpdate());
+  MOZ_RELEASE_ASSERT(!update1.IsFinal());
+
+  // Move final to cleared.
+  update1 = Update(nullptr);
+  MOZ_RELEASE_ASSERT(update1.IsFinal());
+  MOZ_RELEASE_ASSERT(!update1.IsNotUpdate());
+
+  // Move final to final.
+  update1 = Update(nullptr);
+  MOZ_RELEASE_ASSERT(update1.IsFinal());
+  MOZ_RELEASE_ASSERT(!update1.IsNotUpdate());
+
+  // Move from not-an-update (effectively same as Clear).
+  update1 = Update();
+  MOZ_RELEASE_ASSERT(update1.IsNotUpdate());
+  MOZ_RELEASE_ASSERT(!update1.IsFinal());
+
+  auto CreateBiggerChunkAfter = [](const ProfileBufferChunk& aChunkToBeat) {
+    while (TimeStamp::NowUnfuzzed() <=
+           aChunkToBeat.ChunkHeader().mDoneTimeStamp) {
+      ::SleepMilli(1);
+    }
+    auto chunk = ProfileBufferChunk::Create(aChunkToBeat.BufferBytes() * 2);
+    MOZ_RELEASE_ASSERT(!!chunk);
+    MOZ_RELEASE_ASSERT(chunk->BufferBytes() >= aChunkToBeat.BufferBytes() * 2);
+    Unused << chunk->ReserveInitialBlockAsTail(0);
+    chunk->MarkDone();
+    MOZ_RELEASE_ASSERT(chunk->ChunkHeader().mDoneTimeStamp >
+                       aChunkToBeat.ChunkHeader().mDoneTimeStamp);
+    return chunk;
+  };
+
+  update1 = Update(1, 2, nullptr, nullptr);
+
+  // Create initial update with 2 released chunks and 1 unreleased chunk.
+  auto released = ProfileBufferChunk::Create(10);
+  ProfileBufferChunk* c1 = released.get();
+  Unused << c1->ReserveInitialBlockAsTail(0);
+  c1->MarkDone();
+
+  released->SetLast(CreateBiggerChunkAfter(*c1));
+  ProfileBufferChunk* c2 = c1->GetNext();
+
+  auto unreleased = CreateBiggerChunkAfter(*c2);
+  ProfileBufferChunk* c3 = unreleased.get();
+
+  Update update2(c3->BufferBytes(), c1->BufferBytes() + c2->BufferBytes(), c1,
+                 c1);
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update2,
+      Update(c3->BufferBytes(), c1->BufferBytes() + c2->BufferBytes(),
+             c1->ChunkHeader().mDoneTimeStamp,
+             {{c1->ChunkHeader().mDoneTimeStamp, c1->BufferBytes()},
+              {c2->ChunkHeader().mDoneTimeStamp, c2->BufferBytes()}})));
+  // Check every field, this time only, after that we'll trust that the
+  // `SameUpdate` test will be enough.
+  MOZ_RELEASE_ASSERT(!update2.IsNotUpdate());
+  MOZ_RELEASE_ASSERT(!update2.IsFinal());
+  MOZ_RELEASE_ASSERT(update2.UnreleasedBytes() == c3->BufferBytes());
+  MOZ_RELEASE_ASSERT(update2.ReleasedBytes() ==
+                     c1->BufferBytes() + c2->BufferBytes());
+  MOZ_RELEASE_ASSERT(update2.OldestDoneTimeStamp() ==
+                     c1->ChunkHeader().mDoneTimeStamp);
+  MOZ_RELEASE_ASSERT(update2.NewlyReleasedChunksRef().size() == 2);
+  MOZ_RELEASE_ASSERT(
+      IsSameMetadata(update2.NewlyReleasedChunksRef()[0],
+                     {c1->ChunkHeader().mDoneTimeStamp, c1->BufferBytes()}));
+  MOZ_RELEASE_ASSERT(
+      IsSameMetadata(update2.NewlyReleasedChunksRef()[1],
+                     {c2->ChunkHeader().mDoneTimeStamp, c2->BufferBytes()}));
+
+  // Fold into not-an-update.
+  update1.Fold(std::move(update2));
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update1,
+      Update(c3->BufferBytes(), c1->BufferBytes() + c2->BufferBytes(),
+             c1->ChunkHeader().mDoneTimeStamp,
+             {{c1->ChunkHeader().mDoneTimeStamp, c1->BufferBytes()},
+              {c2->ChunkHeader().mDoneTimeStamp, c2->BufferBytes()}})));
+
+  // Pretend nothing happened.
+  update2 = Update(c3->BufferBytes(), c1->BufferBytes() + c2->BufferBytes(), c1,
+                   nullptr);
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update2, Update(c3->BufferBytes(), c1->BufferBytes() + c2->BufferBytes(),
+                      c1->ChunkHeader().mDoneTimeStamp, {})));
+  update1.Fold(std::move(update2));
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update1,
+      Update(c3->BufferBytes(), c1->BufferBytes() + c2->BufferBytes(),
+             c1->ChunkHeader().mDoneTimeStamp,
+             {{c1->ChunkHeader().mDoneTimeStamp, c1->BufferBytes()},
+              {c2->ChunkHeader().mDoneTimeStamp, c2->BufferBytes()}})));
+
+  // Pretend there's a new unreleased chunk.
+  c3->SetLast(CreateBiggerChunkAfter(*c3));
+  ProfileBufferChunk* c4 = c3->GetNext();
+  update2 = Update(c3->BufferBytes() + c4->BufferBytes(),
+                   c1->BufferBytes() + c2->BufferBytes(), c1, nullptr);
+  MOZ_RELEASE_ASSERT(
+      IsSameUpdate(update2, Update(c3->BufferBytes() + c4->BufferBytes(),
+                                   c1->BufferBytes() + c2->BufferBytes(),
+                                   c1->ChunkHeader().mDoneTimeStamp, {})));
+  update1.Fold(std::move(update2));
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update1,
+      Update(c3->BufferBytes() + c4->BufferBytes(),
+             c1->BufferBytes() + c2->BufferBytes(),
+             c1->ChunkHeader().mDoneTimeStamp,
+             {{c1->ChunkHeader().mDoneTimeStamp, c1->BufferBytes()},
+              {c2->ChunkHeader().mDoneTimeStamp, c2->BufferBytes()}})));
+
+  // Pretend the first unreleased chunk c3 has been released.
+  released->SetLast(std::exchange(unreleased, unreleased->ReleaseNext()));
+  update2 =
+      Update(c4->BufferBytes(),
+             c1->BufferBytes() + c2->BufferBytes() + c3->BufferBytes(), c1, c3);
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update2,
+      Update(c4->BufferBytes(),
+             c1->BufferBytes() + c2->BufferBytes() + c3->BufferBytes(),
+             c1->ChunkHeader().mDoneTimeStamp,
+             {{c3->ChunkHeader().mDoneTimeStamp, c3->BufferBytes()}})));
+  update1.Fold(std::move(update2));
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update1,
+      Update(c4->BufferBytes(),
+             c1->BufferBytes() + c2->BufferBytes() + c3->BufferBytes(),
+             c1->ChunkHeader().mDoneTimeStamp,
+             {{c1->ChunkHeader().mDoneTimeStamp, c1->BufferBytes()},
+              {c2->ChunkHeader().mDoneTimeStamp, c2->BufferBytes()},
+              {c3->ChunkHeader().mDoneTimeStamp, c3->BufferBytes()}})));
+
+  // Pretend c1 has been destroyed, so the oldest timestamp is now at c2.
+  released = released->ReleaseNext();
+  c1 = nullptr;
+  update2 = Update(c4->BufferBytes(), c2->BufferBytes() + c3->BufferBytes(), c2,
+                   nullptr);
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update2, Update(c4->BufferBytes(), c2->BufferBytes() + c3->BufferBytes(),
+                      c2->ChunkHeader().mDoneTimeStamp, {})));
+  update1.Fold(std::move(update2));
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update1,
+      Update(c4->BufferBytes(), c2->BufferBytes() + c3->BufferBytes(),
+             c2->ChunkHeader().mDoneTimeStamp,
+             {{c2->ChunkHeader().mDoneTimeStamp, c2->BufferBytes()},
+              {c3->ChunkHeader().mDoneTimeStamp, c3->BufferBytes()}})));
+
+  // Pretend c2 has been recycled to make unreleased c5, and c4 has been
+  // released.
+  auto recycled = std::exchange(released, released->ReleaseNext());
+  recycled->MarkRecycled();
+  Unused << recycled->ReserveInitialBlockAsTail(0);
+  recycled->MarkDone();
+  released->SetLast(std::move(unreleased));
+  unreleased = std::move(recycled);
+  ProfileBufferChunk* c5 = c2;
+  c2 = nullptr;
+  update2 =
+      Update(c5->BufferBytes(), c3->BufferBytes() + c4->BufferBytes(), c3, c4);
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update2,
+      Update(c5->BufferBytes(), c3->BufferBytes() + c4->BufferBytes(),
+             c3->ChunkHeader().mDoneTimeStamp,
+             {{c4->ChunkHeader().mDoneTimeStamp, c4->BufferBytes()}})));
+  update1.Fold(std::move(update2));
+  MOZ_RELEASE_ASSERT(IsSameUpdate(
+      update1,
+      Update(c5->BufferBytes(), c3->BufferBytes() + c4->BufferBytes(),
+             c3->ChunkHeader().mDoneTimeStamp,
+             {{c3->ChunkHeader().mDoneTimeStamp, c3->BufferBytes()},
+              {c4->ChunkHeader().mDoneTimeStamp, c4->BufferBytes()}})));
+
+  // And send a final update.
+  update1.Fold(Update(nullptr));
+  MOZ_RELEASE_ASSERT(update1.IsFinal());
+  MOZ_RELEASE_ASSERT(!update1.IsNotUpdate());
+
+  printf("TestControlledChunkManagerUpdate done\n");
+}
+
+static void TestControlledChunkManagerWithLocalLimit() {
+  printf("TestControlledChunkManagerWithLocalLimit...\n");
+
+  // Construct a ProfileBufferChunkManagerWithLocalLimit with chunk of minimum
+  // size >=100, up to 1000 bytes.
+  constexpr ProfileBufferChunk::Length MaxTotalBytes = 1000;
+  constexpr ProfileBufferChunk::Length ChunkMinBufferBytes = 100;
+  ProfileBufferChunkManagerWithLocalLimit cmll{MaxTotalBytes,
+                                               ChunkMinBufferBytes};
+
+  // Reference to chunk manager base class.
+  ProfileBufferChunkManager& cm = cmll;
+
+  // Reference to controlled chunk manager base class.
+  ProfileBufferControlledChunkManager& ccm = cmll;
+
+#  ifdef DEBUG
+  const char* chunkManagerRegisterer =
+      "TestControlledChunkManagerWithLocalLimit";
+  cm.RegisteredWith(chunkManagerRegisterer);
+#  endif  // DEBUG
+
+  MOZ_RELEASE_ASSERT(cm.MaxTotalSize() == MaxTotalBytes,
+                     "Max total size should be exactly as given");
+
+  unsigned destroyedChunks = 0;
+  unsigned destroyedBytes = 0;
+  cm.SetChunkDestroyedCallback([&](const ProfileBufferChunk& aChunks) {
+    for (const ProfileBufferChunk* chunk = &aChunks; chunk;
+         chunk = chunk->GetNext()) {
+      destroyedChunks += 1;
+      destroyedBytes += chunk->BufferBytes();
+    }
+  });
+
+  using Update = ProfileBufferControlledChunkManager::Update;
+  unsigned updateCount = 0;
+  ProfileBufferControlledChunkManager::Update update;
+  MOZ_RELEASE_ASSERT(update.IsNotUpdate());
+  auto updateCallback = [&](Update&& aUpdate) {
+    ++updateCount;
+    update.Fold(std::move(aUpdate));
+  };
+  ccm.SetUpdateCallback(updateCallback);
+  MOZ_RELEASE_ASSERT(updateCount == 1,
+                     "SetUpdateCallback should have triggered an update");
+  MOZ_RELEASE_ASSERT(IsSameUpdate(update, Update(0, 0, TimeStamp{}, {})));
+  updateCount = 0;
+  update.Clear();
+
+  UniquePtr<ProfileBufferChunk> extantReleasedChunks =
+      cm.GetExtantReleasedChunks();
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Unexpected released chunk(s)");
+  MOZ_RELEASE_ASSERT(updateCount == 1,
+                     "GetExtantReleasedChunks should have triggered an update");
+  MOZ_RELEASE_ASSERT(IsSameUpdate(update, Update(0, 0, TimeStamp{}, {})));
+  updateCount = 0;
+  update.Clear();
+
+  // First request.
+  UniquePtr<ProfileBufferChunk> chunk = cm.GetChunk();
+  MOZ_RELEASE_ASSERT(!!chunk,
+                     "First chunk immediate request should always work");
+  const auto chunkActualBufferBytes = chunk->BufferBytes();
+  // Keep address, for later checks.
+  const uintptr_t chunk1Address = reinterpret_cast<uintptr_t>(chunk.get());
+  MOZ_RELEASE_ASSERT(updateCount == 1,
+                     "GetChunk should have triggered an update");
+  MOZ_RELEASE_ASSERT(
+      IsSameUpdate(update, Update(chunk->BufferBytes(), 0, TimeStamp{}, {})));
+  updateCount = 0;
+  update.Clear();
+
+  extantReleasedChunks = cm.GetExtantReleasedChunks();
+  MOZ_RELEASE_ASSERT(!extantReleasedChunks, "Unexpected released chunk(s)");
+  MOZ_RELEASE_ASSERT(updateCount == 1,
+                     "GetExtantReleasedChunks should have triggered an update");
+  MOZ_RELEASE_ASSERT(
+      IsSameUpdate(update, Update(chunk->BufferBytes(), 0, TimeStamp{}, {})));
+  updateCount = 0;
+  update.Clear();
+
+  // For this test, we need to be able to get at least 2 chunks without hitting
+  // the limit. (If this failed, it wouldn't necessary be a problem with
+  // ProfileBufferChunkManagerWithLocalLimit, fiddle with constants at the top
+  // of this test.)
+  MOZ_RELEASE_ASSERT(chunkActualBufferBytes < 2 * MaxTotalBytes);
+
+  ProfileBufferChunk::Length previousUnreleasedBytes = chunk->BufferBytes();
+  ProfileBufferChunk::Length previousReleasedBytes = 0;
+  TimeStamp previousOldestDoneTimeStamp;
+
+  unsigned chunk1ReuseCount = 0;
+
+  // We will do enough loops to go through the maximum size a number of times.
+  const unsigned Rollovers = 3;
+  const unsigned Loops = Rollovers * MaxTotalBytes / chunkActualBufferBytes;
+  for (unsigned i = 0; i < Loops; ++i) {
+    // Add some data to the chunk.
+    const ProfileBufferIndex index =
+        ProfileBufferIndex(chunkActualBufferBytes) * i + 1;
+    chunk->SetRangeStart(index);
+    Unused << chunk->ReserveInitialBlockAsTail(1);
+    Unused << chunk->ReserveBlock(2);
+
+    // Request a new chunk.
+    UniquePtr<ProfileBufferChunk> newChunk;
+    cm.RequestChunk([&](UniquePtr<ProfileBufferChunk> aChunk) {
+      newChunk = std::move(aChunk);
+    });
+    MOZ_RELEASE_ASSERT(updateCount == 0,
+                       "RequestChunk() shouldn't have triggered an update");
+    cm.FulfillChunkRequests();
+    MOZ_RELEASE_ASSERT(!!newChunk, "Chunk request should always work");
+    MOZ_RELEASE_ASSERT(newChunk->BufferBytes() == chunkActualBufferBytes,
+                       "Unexpected chunk size");
+    MOZ_RELEASE_ASSERT(!newChunk->GetNext(), "There should only be one chunk");
+
+    MOZ_RELEASE_ASSERT(updateCount == 1,
+                       "FulfillChunkRequests() after a request should have "
+                       "triggered an update");
+    MOZ_RELEASE_ASSERT(!update.IsFinal());
+    MOZ_RELEASE_ASSERT(!update.IsNotUpdate());
+    MOZ_RELEASE_ASSERT(update.UnreleasedBytes() ==
+                       previousUnreleasedBytes + newChunk->BufferBytes());
+    previousUnreleasedBytes = update.UnreleasedBytes();
+    MOZ_RELEASE_ASSERT(update.ReleasedBytes() <= previousReleasedBytes);
+    previousReleasedBytes = update.ReleasedBytes();
+    MOZ_RELEASE_ASSERT(previousOldestDoneTimeStamp.IsNull() ||
+                       update.OldestDoneTimeStamp() >=
+                           previousOldestDoneTimeStamp);
+    previousOldestDoneTimeStamp = update.OldestDoneTimeStamp();
+    MOZ_RELEASE_ASSERT(update.NewlyReleasedChunksRef().empty());
+    updateCount = 0;
+    update.Clear();
+
+    // Make sure the "Done" timestamp below cannot be the same as from the
+    // previous loop.
+    const TimeStamp now = TimeStamp::NowUnfuzzed();
+    while (TimeStamp::NowUnfuzzed() == now) {
+      ::SleepMilli(1);
+    }
+
+    // Mark previous chunk done and release it.
+    WaitUntilTimeStampChanges();  // Force "done" timestamp to change.
+    chunk->MarkDone();
+    const auto doneTimeStamp = chunk->ChunkHeader().mDoneTimeStamp;
+    const auto bufferBytes = chunk->BufferBytes();
+    cm.ReleaseChunk(std::move(chunk));
+
+    MOZ_RELEASE_ASSERT(updateCount == 1,
+                       "ReleaseChunk() should have triggered an update");
+    MOZ_RELEASE_ASSERT(!update.IsFinal());
+    MOZ_RELEASE_ASSERT(!update.IsNotUpdate());
+    MOZ_RELEASE_ASSERT(update.UnreleasedBytes() ==
+                       previousUnreleasedBytes - bufferBytes);
+    previousUnreleasedBytes = update.UnreleasedBytes();
+    MOZ_RELEASE_ASSERT(update.ReleasedBytes() ==
+                       previousReleasedBytes + bufferBytes);
+    previousReleasedBytes = update.ReleasedBytes();
+    MOZ_RELEASE_ASSERT(previousOldestDoneTimeStamp.IsNull() ||
+                       update.OldestDoneTimeStamp() >=
+                           previousOldestDoneTimeStamp);
+    previousOldestDoneTimeStamp = update.OldestDoneTimeStamp();
+    MOZ_RELEASE_ASSERT(update.OldestDoneTimeStamp() <= doneTimeStamp);
+    MOZ_RELEASE_ASSERT(update.NewlyReleasedChunksRef().size() == 1);
+    MOZ_RELEASE_ASSERT(update.NewlyReleasedChunksRef()[0].mDoneTimeStamp ==
+                       doneTimeStamp);
+    MOZ_RELEASE_ASSERT(update.NewlyReleasedChunksRef()[0].mBufferBytes ==
+                       bufferBytes);
+    updateCount = 0;
+    update.Clear();
+
+    // And cycle to the new chunk.
+    chunk = std::move(newChunk);
+
+    if (reinterpret_cast<uintptr_t>(chunk.get()) == chunk1Address) {
+      ++chunk1ReuseCount;
+    }
+  }
+
+  // Enough testing! Clean-up.
+  Unused << chunk->ReserveInitialBlockAsTail(0);
+  chunk->MarkDone();
+  cm.ForgetUnreleasedChunks();
+  MOZ_RELEASE_ASSERT(
+      updateCount == 1,
+      "ForgetUnreleasedChunks() should have triggered an update");
+  MOZ_RELEASE_ASSERT(!update.IsFinal());
+  MOZ_RELEASE_ASSERT(!update.IsNotUpdate());
+  MOZ_RELEASE_ASSERT(update.UnreleasedBytes() == 0);
+  MOZ_RELEASE_ASSERT(update.ReleasedBytes() == previousReleasedBytes);
+  MOZ_RELEASE_ASSERT(update.NewlyReleasedChunksRef().empty() == 1);
+  updateCount = 0;
+  update.Clear();
+
+  ccm.SetUpdateCallback({});
+  MOZ_RELEASE_ASSERT(updateCount == 1,
+                     "SetUpdateCallback({}) should have triggered an update");
+  MOZ_RELEASE_ASSERT(update.IsFinal());
+
+#  ifdef DEBUG
+  cm.DeregisteredFrom(chunkManagerRegisterer);
+#  endif  // DEBUG
+
+  printf("TestControlledChunkManagerWithLocalLimit done\n");
+}
+
+#  define VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(                         \
+      aProfileChunkedBuffer, aStart, aEnd, aPushed, aCleared, aFailed)        \
+    {                                                                         \
+      ProfileChunkedBuffer::State state = (aProfileChunkedBuffer).GetState(); \
+      MOZ_RELEASE_ASSERT(state.mRangeStart == (aStart));                      \
+      MOZ_RELEASE_ASSERT(state.mRangeEnd == (aEnd));                          \
+      MOZ_RELEASE_ASSERT(state.mPushedBlockCount == (aPushed));               \
+      MOZ_RELEASE_ASSERT(state.mClearedBlockCount == (aCleared));             \
+      MOZ_RELEASE_ASSERT(state.mFailedPutBytes == (aFailed));                 \
+    }
+
+static void TestChunkedBuffer() {
+  printf("TestChunkedBuffer...\n");
+
+  ProfileBufferBlockIndex blockIndex;
+  MOZ_RELEASE_ASSERT(!blockIndex);
+  MOZ_RELEASE_ASSERT(blockIndex == nullptr);
+
+  // Create an out-of-session ProfileChunkedBuffer.
+  ProfileChunkedBuffer cb(ProfileChunkedBuffer::ThreadSafety::WithMutex);
+
+  MOZ_RELEASE_ASSERT(cb.BufferLength().isNothing());
+
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1, 1, 0, 0, 0);
+
+  int result = 0;
+  result = cb.ReserveAndPut(
+      []() {
+        MOZ_RELEASE_ASSERT(false);
+        return 1;
+      },
+      [](Maybe<ProfileBufferEntryWriter>& aEW) { return aEW ? 2 : 3; });
+  MOZ_RELEASE_ASSERT(result == 3);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1, 1, 0, 0, 0);
+
+  result = 0;
+  result = cb.Put(
+      1, [](Maybe<ProfileBufferEntryWriter>& aEW) { return aEW ? 1 : 2; });
+  MOZ_RELEASE_ASSERT(result == 2);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1, 1, 0, 0, 0);
+
+  blockIndex = cb.PutFrom(&result, 1);
+  MOZ_RELEASE_ASSERT(!blockIndex);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1, 1, 0, 0, 0);
+
+  blockIndex = cb.PutObjects(123, result, "hello");
+  MOZ_RELEASE_ASSERT(!blockIndex);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1, 1, 0, 0, 0);
+
+  blockIndex = cb.PutObject(123);
+  MOZ_RELEASE_ASSERT(!blockIndex);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1, 1, 0, 0, 0);
+
+  auto chunks = cb.GetAllChunks();
+  static_assert(std::is_same_v<decltype(chunks), UniquePtr<ProfileBufferChunk>>,
+                "ProfileChunkedBuffer::GetAllChunks() should return a "
+                "UniquePtr<ProfileBufferChunk>");
+  MOZ_RELEASE_ASSERT(!chunks, "Expected no chunks when out-of-session");
+
+  bool ran = false;
+  result = 0;
+  result = cb.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    ran = true;
+    MOZ_RELEASE_ASSERT(!aReader);
+    return 3;
+  });
+  MOZ_RELEASE_ASSERT(ran);
+  MOZ_RELEASE_ASSERT(result == 3);
+
+  cb.ReadEach([](ProfileBufferEntryReader&) { MOZ_RELEASE_ASSERT(false); });
+
+  result = 0;
+  result = cb.ReadAt(nullptr, [](Maybe<ProfileBufferEntryReader>&& er) {
+    MOZ_RELEASE_ASSERT(er.isNothing());
+    return 4;
+  });
+  MOZ_RELEASE_ASSERT(result == 4);
+
+  // Use ProfileBufferChunkManagerWithLocalLimit, which will give away
+  // ProfileBufferChunks that can contain 128 bytes, using up to 1KB of memory
+  // (including usable 128 bytes and headers).
+  constexpr size_t bufferMaxSize = 1024;
+  constexpr ProfileChunkedBuffer::Length chunkMinSize = 128;
+  ProfileBufferChunkManagerWithLocalLimit cm(bufferMaxSize, chunkMinSize);
+  cb.SetChunkManager(cm);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1, 1, 0, 0, 0);
+
+  // Let the chunk manager fulfill the initial request for an extra chunk.
+  cm.FulfillChunkRequests();
+
+  MOZ_RELEASE_ASSERT(cm.MaxTotalSize() == bufferMaxSize);
+  MOZ_RELEASE_ASSERT(cb.BufferLength().isSome());
+  MOZ_RELEASE_ASSERT(*cb.BufferLength() == bufferMaxSize);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1, 1, 0, 0, 0);
+
+  // Write an int with the main `ReserveAndPut` function.
+  const int test = 123;
+  ran = false;
+  blockIndex = nullptr;
+  bool success = cb.ReserveAndPut(
+      []() { return sizeof(test); },
+      [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+        ran = true;
+        if (!aEW) {
+          return false;
+        }
+        blockIndex = aEW->CurrentBlockIndex();
+        MOZ_RELEASE_ASSERT(aEW->RemainingBytes() == sizeof(test));
+        aEW->WriteObject(test);
+        MOZ_RELEASE_ASSERT(aEW->RemainingBytes() == 0);
+        return true;
+      });
+  MOZ_RELEASE_ASSERT(ran);
+  MOZ_RELEASE_ASSERT(success);
+  MOZ_RELEASE_ASSERT(blockIndex.ConvertToProfileBufferIndex() == 1);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cb, 1, 1 + ULEB128Size(sizeof(test)) + sizeof(test), 1, 0, 0);
+
+  ran = false;
+  result = 0;
+  result = cb.Read([&](ProfileChunkedBuffer::Reader* aReader) {
+    ran = true;
+    MOZ_RELEASE_ASSERT(!!aReader);
+    // begin() and end() should be at the range edges (verified above).
+    MOZ_RELEASE_ASSERT(
+        aReader->begin().CurrentBlockIndex().ConvertToProfileBufferIndex() ==
+        1);
+    MOZ_RELEASE_ASSERT(
+        aReader->end().CurrentBlockIndex().ConvertToProfileBufferIndex() == 0);
+    // Null ProfileBufferBlockIndex clamped to the beginning.
+    MOZ_RELEASE_ASSERT(aReader->At(nullptr) == aReader->begin());
+    MOZ_RELEASE_ASSERT(aReader->At(blockIndex) == aReader->begin());
+    // At(begin) same as begin().
+    MOZ_RELEASE_ASSERT(aReader->At(aReader->begin().CurrentBlockIndex()) ==
+                       aReader->begin());
+    // At(past block) same as end().
+    MOZ_RELEASE_ASSERT(
+        aReader->At(ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
+            1 + 1 + sizeof(test))) == aReader->end());
+
+    size_t read = 0;
+    aReader->ForEach([&](ProfileBufferEntryReader& er) {
+      ++read;
+      MOZ_RELEASE_ASSERT(er.RemainingBytes() == sizeof(test));
+      const auto value = er.ReadObject<decltype(test)>();
+      MOZ_RELEASE_ASSERT(value == test);
+      MOZ_RELEASE_ASSERT(er.RemainingBytes() == 0);
+    });
+    MOZ_RELEASE_ASSERT(read == 1);
+
+    read = 0;
+    for (auto er : *aReader) {
+      static_assert(std::is_same_v<decltype(er), ProfileBufferEntryReader>,
+                    "ProfileChunkedBuffer::Reader range-for should produce "
+                    "ProfileBufferEntryReader objects");
+      ++read;
+      MOZ_RELEASE_ASSERT(er.RemainingBytes() == sizeof(test));
+      const auto value = er.ReadObject<decltype(test)>();
+      MOZ_RELEASE_ASSERT(value == test);
+      MOZ_RELEASE_ASSERT(er.RemainingBytes() == 0);
+    };
+    MOZ_RELEASE_ASSERT(read == 1);
+    return 5;
+  });
+  MOZ_RELEASE_ASSERT(ran);
+  MOZ_RELEASE_ASSERT(result == 5);
+
+  // Read the int directly from the ProfileChunkedBuffer, without block index.
+  size_t read = 0;
+  cb.ReadEach([&](ProfileBufferEntryReader& er) {
+    ++read;
+    MOZ_RELEASE_ASSERT(er.RemainingBytes() == sizeof(test));
+    const auto value = er.ReadObject<decltype(test)>();
+    MOZ_RELEASE_ASSERT(value == test);
+    MOZ_RELEASE_ASSERT(er.RemainingBytes() == 0);
+  });
+  MOZ_RELEASE_ASSERT(read == 1);
+
+  // Read the int directly from the ProfileChunkedBuffer, with block index.
+  read = 0;
+  blockIndex = nullptr;
+  cb.ReadEach(
+      [&](ProfileBufferEntryReader& er, ProfileBufferBlockIndex aBlockIndex) {
+        ++read;
+        MOZ_RELEASE_ASSERT(!!aBlockIndex);
+        MOZ_RELEASE_ASSERT(!blockIndex);
+        blockIndex = aBlockIndex;
+        MOZ_RELEASE_ASSERT(er.RemainingBytes() == sizeof(test));
+        const auto value = er.ReadObject<decltype(test)>();
+        MOZ_RELEASE_ASSERT(value == test);
+        MOZ_RELEASE_ASSERT(er.RemainingBytes() == 0);
+      });
+  MOZ_RELEASE_ASSERT(read == 1);
+  MOZ_RELEASE_ASSERT(!!blockIndex);
+  MOZ_RELEASE_ASSERT(blockIndex != nullptr);
+
+  // Read the int from its block index.
+  read = 0;
+  result = 0;
+  result = cb.ReadAt(blockIndex, [&](Maybe<ProfileBufferEntryReader>&& er) {
+    ++read;
+    MOZ_RELEASE_ASSERT(er.isSome());
+    MOZ_RELEASE_ASSERT(er->CurrentBlockIndex() == blockIndex);
+    MOZ_RELEASE_ASSERT(!er->NextBlockIndex());
+    MOZ_RELEASE_ASSERT(er->RemainingBytes() == sizeof(test));
+    const auto value = er->ReadObject<decltype(test)>();
+    MOZ_RELEASE_ASSERT(value == test);
+    MOZ_RELEASE_ASSERT(er->RemainingBytes() == 0);
+    return 6;
+  });
+  MOZ_RELEASE_ASSERT(result == 6);
+  MOZ_RELEASE_ASSERT(read == 1);
+
+  // No changes after reads.
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cb, 1, 1 + ULEB128Size(sizeof(test)) + sizeof(test), 1, 0, 0);
+
+  // Steal the underlying ProfileBufferChunks from the ProfileChunkedBuffer.
+  chunks = cb.GetAllChunks();
+  MOZ_RELEASE_ASSERT(!!chunks, "Expected at least one chunk");
+  MOZ_RELEASE_ASSERT(!!chunks->GetNext(), "Expected two chunks");
+  MOZ_RELEASE_ASSERT(!chunks->GetNext()->GetNext(), "Expected only two chunks");
+  const ProfileChunkedBuffer::Length chunkActualSize = chunks->BufferBytes();
+  MOZ_RELEASE_ASSERT(chunkActualSize >= chunkMinSize);
+  MOZ_RELEASE_ASSERT(chunks->RangeStart() == 1);
+  MOZ_RELEASE_ASSERT(chunks->OffsetFirstBlock() == 0);
+  MOZ_RELEASE_ASSERT(chunks->OffsetPastLastBlock() == 1 + sizeof(test));
+
+  // GetAllChunks() should have advanced the index one full chunk forward.
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1 + chunkActualSize,
+                                             1 + chunkActualSize, 1, 0, 0);
+
+  // Nothing more to read from the now-empty ProfileChunkedBuffer.
+  cb.ReadEach([](ProfileBufferEntryReader&) { MOZ_RELEASE_ASSERT(false); });
+  cb.ReadEach([](ProfileBufferEntryReader&, ProfileBufferBlockIndex) {
+    MOZ_RELEASE_ASSERT(false);
+  });
+  result = 0;
+  result = cb.ReadAt(nullptr, [](Maybe<ProfileBufferEntryReader>&& er) {
+    MOZ_RELEASE_ASSERT(er.isNothing());
+    return 7;
+  });
+  MOZ_RELEASE_ASSERT(result == 7);
+
+  // Read the int from the stolen chunks.
+  read = 0;
+  ProfileChunkedBuffer::ReadEach(
+      chunks.get(), nullptr,
+      [&](ProfileBufferEntryReader& er, ProfileBufferBlockIndex aBlockIndex) {
+        ++read;
+        MOZ_RELEASE_ASSERT(aBlockIndex == blockIndex);
+        MOZ_RELEASE_ASSERT(er.RemainingBytes() == sizeof(test));
+        const auto value = er.ReadObject<decltype(test)>();
+        MOZ_RELEASE_ASSERT(value == test);
+        MOZ_RELEASE_ASSERT(er.RemainingBytes() == 0);
+      });
+  MOZ_RELEASE_ASSERT(read == 1);
+
+  // No changes after reads.
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, 1 + chunkActualSize,
+                                             1 + chunkActualSize, 1, 0, 0);
+
+  // Write lots of numbers (by memcpy), which should trigger Chunk destructions.
+  ProfileBufferBlockIndex firstBlockIndex;
+  MOZ_RELEASE_ASSERT(!firstBlockIndex);
+  ProfileBufferBlockIndex lastBlockIndex;
+  MOZ_RELEASE_ASSERT(!lastBlockIndex);
+  const size_t lots = 2 * bufferMaxSize / (1 + sizeof(int));
+  for (size_t i = 1; i < lots; ++i) {
+    ProfileBufferBlockIndex blockIndex = cb.PutFrom(&i, sizeof(i));
+    MOZ_RELEASE_ASSERT(!!blockIndex);
+    MOZ_RELEASE_ASSERT(blockIndex > firstBlockIndex);
+    if (!firstBlockIndex) {
+      firstBlockIndex = blockIndex;
+    }
+    MOZ_RELEASE_ASSERT(blockIndex > lastBlockIndex);
+    lastBlockIndex = blockIndex;
+  }
+
+  ProfileChunkedBuffer::State stateAfterPuts = cb.GetState();
+  ProfileBufferIndex startAfterPuts = stateAfterPuts.mRangeStart;
+  MOZ_RELEASE_ASSERT(startAfterPuts > 1 + chunkActualSize);
+  ProfileBufferIndex endAfterPuts = stateAfterPuts.mRangeEnd;
+  MOZ_RELEASE_ASSERT(endAfterPuts > startAfterPuts);
+  uint64_t pushedAfterPuts = stateAfterPuts.mPushedBlockCount;
+  MOZ_RELEASE_ASSERT(pushedAfterPuts > 0);
+  uint64_t clearedAfterPuts = stateAfterPuts.mClearedBlockCount;
+  MOZ_RELEASE_ASSERT(clearedAfterPuts > 0);
+  MOZ_RELEASE_ASSERT(stateAfterPuts.mFailedPutBytes == 0);
+
+  // Read extant numbers, which should at least follow each other.
+  read = 0;
+  size_t i = 0;
+  cb.ReadEach(
+      [&](ProfileBufferEntryReader& er, ProfileBufferBlockIndex aBlockIndex) {
+        ++read;
+        MOZ_RELEASE_ASSERT(!!aBlockIndex);
+        MOZ_RELEASE_ASSERT(aBlockIndex > firstBlockIndex);
+        MOZ_RELEASE_ASSERT(aBlockIndex <= lastBlockIndex);
+        MOZ_RELEASE_ASSERT(er.RemainingBytes() == sizeof(size_t));
+        const auto value = er.ReadObject<size_t>();
+        if (i == 0) {
+          i = value;
+        } else {
+          MOZ_RELEASE_ASSERT(value == ++i);
+        }
+        MOZ_RELEASE_ASSERT(er.RemainingBytes() == 0);
+      });
+  MOZ_RELEASE_ASSERT(read != 0);
+  MOZ_RELEASE_ASSERT(read < lots);
+
+  // Read first extant number.
+  read = 0;
+  i = 0;
+  blockIndex = nullptr;
+  success =
+      cb.ReadAt(firstBlockIndex, [&](Maybe<ProfileBufferEntryReader>&& er) {
+        MOZ_ASSERT(er.isSome());
+        ++read;
+        MOZ_RELEASE_ASSERT(er->CurrentBlockIndex() > firstBlockIndex);
+        MOZ_RELEASE_ASSERT(!!er->NextBlockIndex());
+        MOZ_RELEASE_ASSERT(er->NextBlockIndex() > firstBlockIndex);
+        MOZ_RELEASE_ASSERT(er->NextBlockIndex() < lastBlockIndex);
+        blockIndex = er->NextBlockIndex();
+        MOZ_RELEASE_ASSERT(er->RemainingBytes() == sizeof(size_t));
+        const auto value = er->ReadObject<size_t>();
+        MOZ_RELEASE_ASSERT(i == 0);
+        i = value;
+        MOZ_RELEASE_ASSERT(er->RemainingBytes() == 0);
+        return 7;
+      });
+  MOZ_RELEASE_ASSERT(success);
+  MOZ_RELEASE_ASSERT(read == 1);
+  // Read other extant numbers one by one.
+  do {
+    bool success =
+        cb.ReadAt(blockIndex, [&](Maybe<ProfileBufferEntryReader>&& er) {
+          MOZ_ASSERT(er.isSome());
+          ++read;
+          MOZ_RELEASE_ASSERT(er->CurrentBlockIndex() == blockIndex);
+          MOZ_RELEASE_ASSERT(!er->NextBlockIndex() ||
+                             er->NextBlockIndex() > blockIndex);
+          MOZ_RELEASE_ASSERT(!er->NextBlockIndex() ||
+                             er->NextBlockIndex() > firstBlockIndex);
+          MOZ_RELEASE_ASSERT(!er->NextBlockIndex() ||
+                             er->NextBlockIndex() <= lastBlockIndex);
+          MOZ_RELEASE_ASSERT(er->NextBlockIndex()
+                                 ? blockIndex < lastBlockIndex
+                                 : blockIndex == lastBlockIndex,
+                             "er->NextBlockIndex() should only be null when "
+                             "blockIndex is at the last block");
+          blockIndex = er->NextBlockIndex();
+          MOZ_RELEASE_ASSERT(er->RemainingBytes() == sizeof(size_t));
+          const auto value = er->ReadObject<size_t>();
+          MOZ_RELEASE_ASSERT(value == ++i);
+          MOZ_RELEASE_ASSERT(er->RemainingBytes() == 0);
+          return true;
+        });
+    MOZ_RELEASE_ASSERT(success);
+  } while (blockIndex);
+  MOZ_RELEASE_ASSERT(read > 1);
+
+  // No changes after reads.
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cb, startAfterPuts, endAfterPuts, pushedAfterPuts, clearedAfterPuts, 0);
+
+#  ifdef DEBUG
+  // cb.Dump();
+#  endif
+
+  cb.Clear();
+
+#  ifdef DEBUG
+  // cb.Dump();
+#  endif
+
+  ProfileChunkedBuffer::State stateAfterClear = cb.GetState();
+  ProfileBufferIndex startAfterClear = stateAfterClear.mRangeStart;
+  MOZ_RELEASE_ASSERT(startAfterClear > startAfterPuts);
+  ProfileBufferIndex endAfterClear = stateAfterClear.mRangeEnd;
+  MOZ_RELEASE_ASSERT(endAfterClear == startAfterClear);
+  MOZ_RELEASE_ASSERT(stateAfterClear.mPushedBlockCount == 0);
+  MOZ_RELEASE_ASSERT(stateAfterClear.mClearedBlockCount == 0);
+  MOZ_RELEASE_ASSERT(stateAfterClear.mFailedPutBytes == 0);
+
+  // Start writer threads.
+  constexpr int ThreadCount = 32;
+  std::thread threads[ThreadCount];
+  for (int threadNo = 0; threadNo < ThreadCount; ++threadNo) {
+    threads[threadNo] = std::thread(
+        [&](int aThreadNo) {
+          ::SleepMilli(1);
+          constexpr int pushCount = 1024;
+          for (int push = 0; push < pushCount; ++push) {
+            // Reserve as many bytes as the thread number (but at least enough
+            // to store an int), and write an increasing int.
+            const bool success =
+                cb.Put(std::max(aThreadNo, int(sizeof(push))),
+                       [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+                         if (!aEW) {
+                           return false;
+                         }
+                         aEW->WriteObject(aThreadNo * 1000000 + push);
+                         // Advance writer to the end.
+                         for (size_t r = aEW->RemainingBytes(); r != 0; --r) {
+                           aEW->WriteObject<char>('_');
+                         }
+                         return true;
+                       });
+            MOZ_RELEASE_ASSERT(success);
+          }
+        },
+        threadNo);
+  }
+
+  // Wait for all writer threads to die.
+  for (auto&& thread : threads) {
+    thread.join();
+  }
+
+#  ifdef DEBUG
+  // cb.Dump();
+#  endif
+
+  ProfileChunkedBuffer::State stateAfterMTPuts = cb.GetState();
+  ProfileBufferIndex startAfterMTPuts = stateAfterMTPuts.mRangeStart;
+  MOZ_RELEASE_ASSERT(startAfterMTPuts > startAfterClear);
+  ProfileBufferIndex endAfterMTPuts = stateAfterMTPuts.mRangeEnd;
+  MOZ_RELEASE_ASSERT(endAfterMTPuts > startAfterMTPuts);
+  MOZ_RELEASE_ASSERT(stateAfterMTPuts.mPushedBlockCount > 0);
+  MOZ_RELEASE_ASSERT(stateAfterMTPuts.mClearedBlockCount > 0);
+  MOZ_RELEASE_ASSERT(stateAfterMTPuts.mFailedPutBytes == 0);
+
+  // Reset to out-of-session.
+  cb.ResetChunkManager();
+
+  ProfileChunkedBuffer::State stateAfterReset = cb.GetState();
+  ProfileBufferIndex startAfterReset = stateAfterReset.mRangeStart;
+  MOZ_RELEASE_ASSERT(startAfterReset == endAfterMTPuts);
+  ProfileBufferIndex endAfterReset = stateAfterReset.mRangeEnd;
+  MOZ_RELEASE_ASSERT(endAfterReset == startAfterReset);
+  MOZ_RELEASE_ASSERT(stateAfterReset.mPushedBlockCount == 0);
+  MOZ_RELEASE_ASSERT(stateAfterReset.mClearedBlockCount == 0);
+  MOZ_RELEASE_ASSERT(stateAfterReset.mFailedPutBytes == 0);
+
+  success = cb.ReserveAndPut(
+      []() {
+        MOZ_RELEASE_ASSERT(false);
+        return 1;
+      },
+      [](Maybe<ProfileBufferEntryWriter>& aEW) { return !!aEW; });
+  MOZ_RELEASE_ASSERT(!success);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, startAfterReset, endAfterReset,
+                                             0, 0, 0);
+
+  success =
+      cb.Put(1, [](Maybe<ProfileBufferEntryWriter>& aEW) { return !!aEW; });
+  MOZ_RELEASE_ASSERT(!success);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, startAfterReset, endAfterReset,
+                                             0, 0, 0);
+
+  blockIndex = cb.PutFrom(&success, 1);
+  MOZ_RELEASE_ASSERT(!blockIndex);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, startAfterReset, endAfterReset,
+                                             0, 0, 0);
+
+  blockIndex = cb.PutObjects(123, success, "hello");
+  MOZ_RELEASE_ASSERT(!blockIndex);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, startAfterReset, endAfterReset,
+                                             0, 0, 0);
+
+  blockIndex = cb.PutObject(123);
+  MOZ_RELEASE_ASSERT(!blockIndex);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, startAfterReset, endAfterReset,
+                                             0, 0, 0);
+
+  chunks = cb.GetAllChunks();
+  MOZ_RELEASE_ASSERT(!chunks, "Expected no chunks when out-of-session");
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, startAfterReset, endAfterReset,
+                                             0, 0, 0);
+
+  cb.ReadEach([](ProfileBufferEntryReader&) { MOZ_RELEASE_ASSERT(false); });
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, startAfterReset, endAfterReset,
+                                             0, 0, 0);
+
+  success = cb.ReadAt(nullptr, [](Maybe<ProfileBufferEntryReader>&& er) {
+    MOZ_RELEASE_ASSERT(er.isNothing());
+    return true;
+  });
+  MOZ_RELEASE_ASSERT(success);
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cb, startAfterReset, endAfterReset,
+                                             0, 0, 0);
+
+  printf("TestChunkedBuffer done\n");
+}
+
+static void TestChunkedBufferSingle() {
+  printf("TestChunkedBufferSingle...\n");
+
+  constexpr ProfileChunkedBuffer::Length chunkMinSize = 128;
+
+  // Create a ProfileChunkedBuffer that will own&use a
+  // ProfileBufferChunkManagerSingle, which will give away one
+  // ProfileBufferChunk that can contain 128 bytes.
+  ProfileChunkedBuffer cbSingle(
+      ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
+      MakeUnique<ProfileBufferChunkManagerSingle>(chunkMinSize));
+
+  MOZ_RELEASE_ASSERT(cbSingle.BufferLength().isSome());
+  const ProfileChunkedBuffer::Length bufferBytes = *cbSingle.BufferLength();
+  MOZ_RELEASE_ASSERT(bufferBytes >= chunkMinSize);
+
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cbSingle, 1, 1, 0, 0, 0);
+
+  // We will write this many blocks to fill the chunk.
+  constexpr size_t testBlocks = 4;
+  const ProfileChunkedBuffer::Length blockBytes = bufferBytes / testBlocks;
+  MOZ_RELEASE_ASSERT(ULEB128Size(blockBytes) == 1,
+                     "This test assumes block sizes are small enough so that "
+                     "their ULEB128-encoded size is 1 byte");
+  const ProfileChunkedBuffer::Length entryBytes =
+      blockBytes - ULEB128Size(blockBytes);
+
+  // First buffer-filling test: Try to write a too-big entry at the end of the
+  // chunk.
+
+  // Write all but one block.
+  for (size_t i = 0; i < testBlocks - 1; ++i) {
+    cbSingle.Put(entryBytes, [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+      MOZ_RELEASE_ASSERT(aEW.isSome());
+      while (aEW->RemainingBytes() > 0) {
+        **aEW = '0' + i;
+        ++(*aEW);
+      }
+    });
+    VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+        cbSingle, 1, 1 + blockBytes * (i + 1), i + 1, 0, 0);
+  }
+
+  // Write the last block so that it's too big (by 1 byte) to fit in the chunk,
+  // this should fail.
+  const ProfileChunkedBuffer::Length remainingBytesForLastBlock =
+      bufferBytes - blockBytes * (testBlocks - 1);
+  MOZ_RELEASE_ASSERT(ULEB128Size(remainingBytesForLastBlock) == 1,
+                     "This test assumes block sizes are small enough so that "
+                     "their ULEB128-encoded size is 1 byte");
+  const ProfileChunkedBuffer::Length entryToFitRemainingBytes =
+      remainingBytesForLastBlock - ULEB128Size(remainingBytesForLastBlock);
+  cbSingle.Put(entryToFitRemainingBytes + 1,
+               [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+                 MOZ_RELEASE_ASSERT(aEW.isNothing());
+               });
+  // The buffer state should not have changed, apart from the failed bytes.
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cbSingle, 1, 1 + blockBytes * (testBlocks - 1), testBlocks - 1, 0,
+      remainingBytesForLastBlock + 1);
+
+  size_t read = 0;
+  cbSingle.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(aER.RemainingBytes() == entryBytes);
+    while (aER.RemainingBytes() > 0) {
+      MOZ_RELEASE_ASSERT(*aER == '0' + read);
+      ++aER;
+    }
+    ++read;
+  });
+  MOZ_RELEASE_ASSERT(read == testBlocks - 1);
+
+  // ~Interlude~ Test AppendContent:
+  // Create another ProfileChunkedBuffer that will use a
+  // ProfileBufferChunkManagerWithLocalLimit, which will give away
+  // ProfileBufferChunks that can contain 128 bytes, using up to 1KB of memory
+  // (including usable 128 bytes and headers).
+  constexpr size_t bufferMaxSize = 1024;
+  ProfileBufferChunkManagerWithLocalLimit cmTarget(bufferMaxSize, chunkMinSize);
+  ProfileChunkedBuffer cbTarget(ProfileChunkedBuffer::ThreadSafety::WithMutex,
+                                cmTarget);
+
+  // It should start empty.
+  cbTarget.ReadEach(
+      [](ProfileBufferEntryReader&) { MOZ_RELEASE_ASSERT(false); });
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cbTarget, 1, 1, 0, 0, 0);
+
+  // Copy the contents from cbSingle to cbTarget.
+  cbTarget.AppendContents(cbSingle);
+
+  // And verify that we now have the same contents in cbTarget.
+  read = 0;
+  cbTarget.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(aER.RemainingBytes() == entryBytes);
+    while (aER.RemainingBytes() > 0) {
+      MOZ_RELEASE_ASSERT(*aER == '0' + read);
+      ++aER;
+    }
+    ++read;
+  });
+  MOZ_RELEASE_ASSERT(read == testBlocks - 1);
+  // The state should be the same as the source.
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cbTarget, 1, 1 + blockBytes * (testBlocks - 1), testBlocks - 1, 0, 0);
+
+#  ifdef DEBUG
+  // cbSingle.Dump();
+  // cbTarget.Dump();
+#  endif
+
+  // Because we failed to write a too-big chunk above, the chunk was marked
+  // full, so that entries should be consistently rejected from now on.
+  cbSingle.Put(1, [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+    MOZ_RELEASE_ASSERT(aEW.isNothing());
+  });
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cbSingle, 1, 1 + blockBytes * ((testBlocks - 1)), testBlocks - 1, 0,
+      remainingBytesForLastBlock + 1 + ULEB128Size(1u) + 1);
+
+  // Clear the buffer before the next test.
+
+  cbSingle.Clear();
+  // Clear() should move the index to the next chunk range -- even if it's
+  // really reusing the same chunk.
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cbSingle, 1 + bufferBytes,
+                                             1 + bufferBytes, 0, 0, 0);
+  cbSingle.ReadEach(
+      [&](ProfileBufferEntryReader& aER) { MOZ_RELEASE_ASSERT(false); });
+
+  // Second buffer-filling test: Try to write a final entry that just fits at
+  // the end of the chunk.
+
+  // Write all but one block.
+  for (size_t i = 0; i < testBlocks - 1; ++i) {
+    cbSingle.Put(entryBytes, [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+      MOZ_RELEASE_ASSERT(aEW.isSome());
+      while (aEW->RemainingBytes() > 0) {
+        **aEW = 'a' + i;
+        ++(*aEW);
+      }
+    });
+    VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+        cbSingle, 1 + bufferBytes, 1 + bufferBytes + blockBytes * (i + 1),
+        i + 1, 0, 0);
+  }
+
+  read = 0;
+  cbSingle.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(aER.RemainingBytes() == entryBytes);
+    while (aER.RemainingBytes() > 0) {
+      MOZ_RELEASE_ASSERT(*aER == 'a' + read);
+      ++aER;
+    }
+    ++read;
+  });
+  MOZ_RELEASE_ASSERT(read == testBlocks - 1);
+
+  // Write the last block so that it fits exactly in the chunk.
+  cbSingle.Put(entryToFitRemainingBytes,
+               [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+                 MOZ_RELEASE_ASSERT(aEW.isSome());
+                 while (aEW->RemainingBytes() > 0) {
+                   **aEW = 'a' + (testBlocks - 1);
+                   ++(*aEW);
+                 }
+               });
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cbSingle, 1 + bufferBytes, 1 + bufferBytes + blockBytes * testBlocks,
+      testBlocks, 0, 0);
+
+  read = 0;
+  cbSingle.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(
+        aER.RemainingBytes() ==
+        ((read < testBlocks) ? entryBytes : entryToFitRemainingBytes));
+    while (aER.RemainingBytes() > 0) {
+      MOZ_RELEASE_ASSERT(*aER == 'a' + read);
+      ++aER;
+    }
+    ++read;
+  });
+  MOZ_RELEASE_ASSERT(read == testBlocks);
+
+  // Because the single chunk has been filled, it shouldn't be possible to write
+  // more entries.
+  cbSingle.Put(1, [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+    MOZ_RELEASE_ASSERT(aEW.isNothing());
+  });
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cbSingle, 1 + bufferBytes, 1 + bufferBytes + blockBytes * testBlocks,
+      testBlocks, 0, ULEB128Size(1u) + 1);
+
+  cbSingle.Clear();
+  // Clear() should move the index to the next chunk range -- even if it's
+  // really reusing the same chunk.
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(cbSingle, 1 + bufferBytes * 2,
+                                             1 + bufferBytes * 2, 0, 0, 0);
+  cbSingle.ReadEach(
+      [&](ProfileBufferEntryReader& aER) { MOZ_RELEASE_ASSERT(false); });
+
+  // Clear() recycles the released chunk, so we should be able to record new
+  // entries.
+  cbSingle.Put(entryBytes, [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+    MOZ_RELEASE_ASSERT(aEW.isSome());
+    while (aEW->RemainingBytes() > 0) {
+      **aEW = 'x';
+      ++(*aEW);
+    }
+  });
+  VERIFY_PCB_START_END_PUSHED_CLEARED_FAILED(
+      cbSingle, 1 + bufferBytes * 2,
+      1 + bufferBytes * 2 + ULEB128Size(entryBytes) + entryBytes, 1, 0, 0);
+  read = 0;
+  cbSingle.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(read == 0);
+    MOZ_RELEASE_ASSERT(aER.RemainingBytes() == entryBytes);
+    while (aER.RemainingBytes() > 0) {
+      MOZ_RELEASE_ASSERT(*aER == 'x');
+      ++aER;
+    }
+    ++read;
+  });
+  MOZ_RELEASE_ASSERT(read == 1);
+
+  printf("TestChunkedBufferSingle done\n");
+}
+
+static void TestModuloBuffer(ModuloBuffer<>& mb, uint32_t MBSize) {
+  using MB = ModuloBuffer<>;
+
+  MOZ_RELEASE_ASSERT(mb.BufferLength().Value() == MBSize);
+
+  // Iterator comparisons.
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(2) == mb.ReaderAt(2));
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(2) != mb.ReaderAt(3));
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(2) < mb.ReaderAt(3));
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(2) <= mb.ReaderAt(2));
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(2) <= mb.ReaderAt(3));
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(3) > mb.ReaderAt(2));
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(2) >= mb.ReaderAt(2));
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(3) >= mb.ReaderAt(2));
+
+  // Iterators indices don't wrap around (even though they may be pointing at
+  // the same location).
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(2) != mb.ReaderAt(MBSize + 2));
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(MBSize + 2) != mb.ReaderAt(2));
+
+  // Dereference.
+  static_assert(std::is_same<decltype(*mb.ReaderAt(0)), const MB::Byte&>::value,
+                "Dereferencing from a reader should return const Byte*");
+  static_assert(std::is_same<decltype(*mb.WriterAt(0)), MB::Byte&>::value,
+                "Dereferencing from a writer should return Byte*");
+  // Contiguous between 0 and MBSize-1.
+  MOZ_RELEASE_ASSERT(&*mb.ReaderAt(MBSize - 1) ==
+                     &*mb.ReaderAt(0) + (MBSize - 1));
+  // Wraps around.
+  MOZ_RELEASE_ASSERT(&*mb.ReaderAt(MBSize) == &*mb.ReaderAt(0));
+  MOZ_RELEASE_ASSERT(&*mb.ReaderAt(MBSize + MBSize - 1) ==
+                     &*mb.ReaderAt(MBSize - 1));
+  MOZ_RELEASE_ASSERT(&*mb.ReaderAt(MBSize + MBSize) == &*mb.ReaderAt(0));
+  // Power of 2 modulo wrapping.
+  MOZ_RELEASE_ASSERT(&*mb.ReaderAt(uint32_t(-1)) == &*mb.ReaderAt(MBSize - 1));
+  MOZ_RELEASE_ASSERT(&*mb.ReaderAt(static_cast<MB::Index>(-1)) ==
+                     &*mb.ReaderAt(MBSize - 1));
+
+  // Arithmetic.
+  MB::Reader arit = mb.ReaderAt(0);
+  MOZ_RELEASE_ASSERT(++arit == mb.ReaderAt(1));
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(1));
+
+  MOZ_RELEASE_ASSERT(--arit == mb.ReaderAt(0));
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(0));
+
+  MOZ_RELEASE_ASSERT(arit++ == mb.ReaderAt(0));
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(1));
+
+  MOZ_RELEASE_ASSERT(arit-- == mb.ReaderAt(1));
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(0));
+
+  MOZ_RELEASE_ASSERT(arit + 3 == mb.ReaderAt(3));
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(0));
+
+  MOZ_RELEASE_ASSERT(4 + arit == mb.ReaderAt(4));
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(0));
+
+  // (Can't have assignments inside asserts, hence the split.)
+  const bool checkPlusEq = ((arit += 3) == mb.ReaderAt(3));
+  MOZ_RELEASE_ASSERT(checkPlusEq);
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(3));
+
+  MOZ_RELEASE_ASSERT((arit - 2) == mb.ReaderAt(1));
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(3));
+
+  const bool checkMinusEq = ((arit -= 2) == mb.ReaderAt(1));
+  MOZ_RELEASE_ASSERT(checkMinusEq);
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(1));
+
+  // Random access.
+  MOZ_RELEASE_ASSERT(&arit[3] == &*(arit + 3));
+  MOZ_RELEASE_ASSERT(arit == mb.ReaderAt(1));
+
+  // Iterator difference.
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(3) - mb.ReaderAt(1) == 2);
+  MOZ_RELEASE_ASSERT(mb.ReaderAt(1) - mb.ReaderAt(3) == MB::Index(-2));
+
+  // Only testing Writer, as Reader is just a subset with no code differences.
+  MB::Writer it = mb.WriterAt(0);
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == 0);
+
+  // Write two characters at the start.
+  it.WriteObject('x');
+  it.WriteObject('y');
+
+  // Backtrack to read them.
+  it -= 2;
+  // PeekObject should read without moving.
+  MOZ_RELEASE_ASSERT(it.PeekObject<char>() == 'x');
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == 0);
+  // ReadObject should read and move past the character.
+  MOZ_RELEASE_ASSERT(it.ReadObject<char>() == 'x');
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == 1);
+  MOZ_RELEASE_ASSERT(it.PeekObject<char>() == 'y');
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == 1);
+  MOZ_RELEASE_ASSERT(it.ReadObject<char>() == 'y');
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == 2);
+
+  // Checking that a reader can be created from a writer.
+  MB::Reader it2(it);
+  MOZ_RELEASE_ASSERT(it2.CurrentIndex() == 2);
+  // Or assigned.
+  it2 = it;
+  MOZ_RELEASE_ASSERT(it2.CurrentIndex() == 2);
+
+  // Iterator traits.
+  static_assert(std::is_same<std::iterator_traits<MB::Reader>::difference_type,
+                             MB::Index>::value,
+                "ModuloBuffer::Reader::difference_type should be Index");
+  static_assert(std::is_same<std::iterator_traits<MB::Reader>::value_type,
+                             MB::Byte>::value,
+                "ModuloBuffer::Reader::value_type should be Byte");
+  static_assert(std::is_same<std::iterator_traits<MB::Reader>::pointer,
+                             const MB::Byte*>::value,
+                "ModuloBuffer::Reader::pointer should be const Byte*");
+  static_assert(std::is_same<std::iterator_traits<MB::Reader>::reference,
+                             const MB::Byte&>::value,
+                "ModuloBuffer::Reader::reference should be const Byte&");
+  static_assert(std::is_base_of<
+                    std::input_iterator_tag,
+                    std::iterator_traits<MB::Reader>::iterator_category>::value,
+                "ModuloBuffer::Reader::iterator_category should be derived "
+                "from input_iterator_tag");
+  static_assert(std::is_base_of<
+                    std::forward_iterator_tag,
+                    std::iterator_traits<MB::Reader>::iterator_category>::value,
+                "ModuloBuffer::Reader::iterator_category should be derived "
+                "from forward_iterator_tag");
+  static_assert(std::is_base_of<
+                    std::bidirectional_iterator_tag,
+                    std::iterator_traits<MB::Reader>::iterator_category>::value,
+                "ModuloBuffer::Reader::iterator_category should be derived "
+                "from bidirectional_iterator_tag");
+  static_assert(
+      std::is_same<std::iterator_traits<MB::Reader>::iterator_category,
+                   std::random_access_iterator_tag>::value,
+      "ModuloBuffer::Reader::iterator_category should be "
+      "random_access_iterator_tag");
+
+  // Use as input iterator by std::string constructor (which is only considered
+  // with proper input iterators.)
+  std::string s(mb.ReaderAt(0), mb.ReaderAt(2));
+  MOZ_RELEASE_ASSERT(s == "xy");
+
+  // Write 4-byte number at index 2.
+  it.WriteObject(int32_t(123));
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == 6);
+  // And another, which should now wrap around (but index continues on.)
+  it.WriteObject(int32_t(456));
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == MBSize + 2);
+  // Even though index==MBSize+2, we can read the object we wrote at 2.
+  MOZ_RELEASE_ASSERT(it.ReadObject<int32_t>() == 123);
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == MBSize + 6);
+  // And similarly, index MBSize+6 points at the same location as index 6.
+  MOZ_RELEASE_ASSERT(it.ReadObject<int32_t>() == 456);
+  MOZ_RELEASE_ASSERT(it.CurrentIndex() == MBSize + MBSize + 2);
+}
+
+void TestModuloBuffer() {
+  printf("TestModuloBuffer...\n");
+
+  // Testing ModuloBuffer with default template arguments.
+  using MB = ModuloBuffer<>;
+
+  // Only 8-byte buffers, to easily test wrap-around.
+  constexpr uint32_t MBSize = 8;
+
+  // MB with self-allocated heap buffer.
+  MB mbByLength(MakePowerOfTwo32<MBSize>());
+  TestModuloBuffer(mbByLength, MBSize);
+
+  // MB taking ownership of a provided UniquePtr to a buffer.
+  auto uniqueBuffer = MakeUnique<uint8_t[]>(MBSize);
+  MB mbByUniquePtr(MakeUnique<uint8_t[]>(MBSize), MakePowerOfTwo32<MBSize>());
+  TestModuloBuffer(mbByUniquePtr, MBSize);
+
+  // MB using part of a buffer on the stack. The buffer is three times the
+  // required size: The middle third is where ModuloBuffer will work, the first
+  // and last thirds are only used to later verify that ModuloBuffer didn't go
+  // out of its bounds.
+  uint8_t buffer[MBSize * 3];
+  // Pre-fill the buffer with a known pattern, so we can later see what changed.
+  for (size_t i = 0; i < MBSize * 3; ++i) {
+    buffer[i] = uint8_t('A' + i);
+  }
+  MB mbByBuffer(&buffer[MBSize], MakePowerOfTwo32<MBSize>());
+  TestModuloBuffer(mbByBuffer, MBSize);
+
+  // Check that only the provided stack-based sub-buffer was modified.
+  uint32_t changed = 0;
+  for (size_t i = MBSize; i < MBSize * 2; ++i) {
+    changed += (buffer[i] == uint8_t('A' + i)) ? 0 : 1;
+  }
+  // Expect at least 75% changes.
+  MOZ_RELEASE_ASSERT(changed >= MBSize * 6 / 8);
+
+  // Everything around the sub-buffer should be unchanged.
+  for (size_t i = 0; i < MBSize; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+  for (size_t i = MBSize * 2; i < MBSize * 3; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+
+  // Check that move-construction is allowed. This verifies that we do not
+  // crash from a double free, when `mbByBuffer` and `mbByStolenBuffer` are both
+  // destroyed at the end of this function.
+  MB mbByStolenBuffer = std::move(mbByBuffer);
+  TestModuloBuffer(mbByStolenBuffer, MBSize);
+
+  // Check that only the provided stack-based sub-buffer was modified.
+  changed = 0;
+  for (size_t i = MBSize; i < MBSize * 2; ++i) {
+    changed += (buffer[i] == uint8_t('A' + i)) ? 0 : 1;
+  }
+  // Expect at least 75% changes.
+  MOZ_RELEASE_ASSERT(changed >= MBSize * 6 / 8);
+
+  // Everything around the sub-buffer should be unchanged.
+  for (size_t i = 0; i < MBSize; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+  for (size_t i = MBSize * 2; i < MBSize * 3; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+
+  // This test function does a `ReadInto` as directed, and checks that the
+  // result is the same as if the copy had been done manually byte-by-byte.
+  // `TestReadInto(3, 7, 2)` copies from index 3 to index 7, 2 bytes long.
+  // Return the output string (from `ReadInto`) for external checks.
+  auto TestReadInto = [](MB::Index aReadFrom, MB::Index aWriteTo,
+                         MB::Length aBytes) {
+    constexpr uint32_t TRISize = 16;
+
+    // Prepare an input buffer, all different elements.
+    uint8_t input[TRISize + 1] = "ABCDEFGHIJKLMNOP";
+    const MB mbInput(input, MakePowerOfTwo32<TRISize>());
+
+    // Prepare an output buffer, different from input.
+    uint8_t output[TRISize + 1] = "abcdefghijklmnop";
+    MB mbOutput(output, MakePowerOfTwo32<TRISize>());
+
+    // Run ReadInto.
+    auto writer = mbOutput.WriterAt(aWriteTo);
+    mbInput.ReaderAt(aReadFrom).ReadInto(writer, aBytes);
+
+    // Do the same operation manually.
+    uint8_t outputCheck[TRISize + 1] = "abcdefghijklmnop";
+    MB mbOutputCheck(outputCheck, MakePowerOfTwo32<TRISize>());
+    auto readerCheck = mbInput.ReaderAt(aReadFrom);
+    auto writerCheck = mbOutputCheck.WriterAt(aWriteTo);
+    for (MB::Length i = 0; i < aBytes; ++i) {
+      *writerCheck++ = *readerCheck++;
+    }
+
+    // Compare the two outputs.
+    for (uint32_t i = 0; i < TRISize; ++i) {
+#  ifdef TEST_MODULOBUFFER_FAILURE_DEBUG
+      // Only used when debugging failures.
+      if (output[i] != outputCheck[i]) {
+        printf(
+            "*** from=%u to=%u bytes=%u i=%u\ninput:  '%s'\noutput: "
+            "'%s'\ncheck:  '%s'\n",
+            unsigned(aReadFrom), unsigned(aWriteTo), unsigned(aBytes),
+            unsigned(i), input, output, outputCheck);
+      }
+#  endif
+      MOZ_RELEASE_ASSERT(output[i] == outputCheck[i]);
+    }
+
+#  ifdef TEST_MODULOBUFFER_HELPER
+    // Only used when adding more tests.
+    printf("*** from=%u to=%u bytes=%u output: %s\n", unsigned(aReadFrom),
+           unsigned(aWriteTo), unsigned(aBytes), output);
+#  endif
+
+    return std::string(reinterpret_cast<const char*>(output));
+  };
+
+  // A few manual checks:
+  constexpr uint32_t TRISize = 16;
+  MOZ_RELEASE_ASSERT(TestReadInto(0, 0, 0) == "abcdefghijklmnop");
+  MOZ_RELEASE_ASSERT(TestReadInto(0, 0, TRISize) == "ABCDEFGHIJKLMNOP");
+  MOZ_RELEASE_ASSERT(TestReadInto(0, 5, TRISize) == "LMNOPABCDEFGHIJK");
+  MOZ_RELEASE_ASSERT(TestReadInto(5, 0, TRISize) == "FGHIJKLMNOPABCDE");
+
+  // Test everything! (16^3 = 4096, not too much.)
+  for (MB::Index r = 0; r < TRISize; ++r) {
+    for (MB::Index w = 0; w < TRISize; ++w) {
+      for (MB::Length len = 0; len < TRISize; ++len) {
+        TestReadInto(r, w, len);
+      }
+    }
+  }
+
+  printf("TestModuloBuffer done\n");
+}
+
+void TestBlocksRingBufferAPI() {
+  printf("TestBlocksRingBufferAPI...\n");
+
+  // Create a 16-byte buffer, enough to store up to 3 entries (1 byte size + 4
+  // bytes uint64_t).
+  constexpr uint32_t MBSize = 16;
+  uint8_t buffer[MBSize * 3];
+  for (size_t i = 0; i < MBSize * 3; ++i) {
+    buffer[i] = uint8_t('A' + i);
+  }
+
+  // Start a temporary block to constrain buffer lifetime.
+  {
+    BlocksRingBuffer rb(BlocksRingBuffer::ThreadSafety::WithMutex,
+                        &buffer[MBSize], MakePowerOfTwo32<MBSize>());
+
+#  define VERIFY_START_END_PUSHED_CLEARED(aStart, aEnd, aPushed, aCleared)  \
+    {                                                                       \
+      BlocksRingBuffer::State state = rb.GetState();                        \
+      MOZ_RELEASE_ASSERT(state.mRangeStart.ConvertToProfileBufferIndex() == \
+                         (aStart));                                         \
+      MOZ_RELEASE_ASSERT(state.mRangeEnd.ConvertToProfileBufferIndex() ==   \
+                         (aEnd));                                           \
+      MOZ_RELEASE_ASSERT(state.mPushedBlockCount == (aPushed));             \
+      MOZ_RELEASE_ASSERT(state.mClearedBlockCount == (aCleared));           \
+    }
+
+    // All entries will contain one 32-bit number. The resulting blocks will
+    // have the following structure:
+    // - 1 byte for the LEB128 size of 4
+    // - 4 bytes for the number.
+    // E.g., if we have entries with `123` and `456`:
+    //   .-- Index 0 reserved for empty ProfileBufferBlockIndex, nothing there.
+    //   | .-- first readable block at index 1
+    //   | |.-- first block at index 1
+    //   | ||.-- 1 byte for the entry size, which is `4` (32 bits)
+    //   | |||  .-- entry starts at index 2, contains 32-bit int
+    //   | |||  |             .-- entry and block finish *after* index 5 (so 6)
+    //   | |||  |             | .-- second block starts at index 6
+    //   | |||  |             | |         etc.
+    //   | |||  |             | |                  .-- End readable blocks: 11
+    //   v vvv  v             v V                  v
+    //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+    //   - S[4 |   int(123)   ] [4 |   int(456)   ]E
+
+    // Empty buffer to start with.
+    // Start&end indices still at 1 (0 is reserved for the default
+    // ProfileBufferBlockIndex{} that cannot point at a valid entry), nothing
+    // cleared.
+    VERIFY_START_END_PUSHED_CLEARED(1, 1, 0, 0);
+
+    // Default ProfileBufferBlockIndex.
+    ProfileBufferBlockIndex bi0;
+    if (bi0) {
+      MOZ_RELEASE_ASSERT(false,
+                         "if (ProfileBufferBlockIndex{}) should fail test");
+    }
+    if (!bi0) {
+    } else {
+      MOZ_RELEASE_ASSERT(false,
+                         "if (!ProfileBufferBlockIndex{}) should succeed test");
+    }
+    MOZ_RELEASE_ASSERT(!bi0);
+    MOZ_RELEASE_ASSERT(bi0 == bi0);
+    MOZ_RELEASE_ASSERT(bi0 <= bi0);
+    MOZ_RELEASE_ASSERT(bi0 >= bi0);
+    MOZ_RELEASE_ASSERT(!(bi0 != bi0));
+    MOZ_RELEASE_ASSERT(!(bi0 < bi0));
+    MOZ_RELEASE_ASSERT(!(bi0 > bi0));
+
+    // Default ProfileBufferBlockIndex can be used, but returns no valid entry.
+    rb.ReadAt(bi0, [](Maybe<ProfileBufferEntryReader>&& aMaybeReader) {
+      MOZ_RELEASE_ASSERT(aMaybeReader.isNothing());
+    });
+
+    // Push `1` directly.
+    MOZ_RELEASE_ASSERT(
+        rb.PutObject(uint32_t(1)).ConvertToProfileBufferIndex() == 1);
+    //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+    //   - S[4 |    int(1)    ]E
+    VERIFY_START_END_PUSHED_CLEARED(1, 6, 1, 0);
+
+    // Push `2` through ReserveAndPut, check output ProfileBufferBlockIndex.
+    auto bi2 = rb.ReserveAndPut([]() { return sizeof(uint32_t); },
+                                [](Maybe<ProfileBufferEntryWriter>& aEW) {
+                                  MOZ_RELEASE_ASSERT(aEW.isSome());
+                                  aEW->WriteObject(uint32_t(2));
+                                  return aEW->CurrentBlockIndex();
+                                });
+    static_assert(std::is_same<decltype(bi2), ProfileBufferBlockIndex>::value,
+                  "All index-returning functions should return a "
+                  "ProfileBufferBlockIndex");
+    MOZ_RELEASE_ASSERT(bi2.ConvertToProfileBufferIndex() == 6);
+    //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
+    //   - S[4 |    int(1)    ] [4 |    int(2)    ]E
+    VERIFY_START_END_PUSHED_CLEARED(1, 11, 2, 0);
+
+    // Check single entry at bi2, store next block index.
+    auto i2Next =
+        rb.ReadAt(bi2, [bi2](Maybe<ProfileBufferEntryReader>&& aMaybeReader) {
+          MOZ_RELEASE_ASSERT(aMaybeReader.isSome());
+          MOZ_RELEASE_ASSERT(aMaybeReader->CurrentBlockIndex() == bi2);
+          MOZ_RELEASE_ASSERT(aMaybeReader->NextBlockIndex() == nullptr);
+          size_t entrySize = aMaybeReader->RemainingBytes();
+          MOZ_RELEASE_ASSERT(aMaybeReader->ReadObject<uint32_t>() == 2);
+          // The next block index is after this block, which is made of the
+          // entry size (coded as ULEB128) followed by the entry itself.
+          return bi2.ConvertToProfileBufferIndex() + ULEB128Size(entrySize) +
+                 entrySize;
+        });
+    auto bi2Next = rb.GetState().mRangeEnd;
+    MOZ_RELEASE_ASSERT(bi2Next.ConvertToProfileBufferIndex() == i2Next);
+    // bi2Next is at the end, nothing to read.
+    rb.ReadAt(bi2Next, [](Maybe<ProfileBufferEntryReader>&& aMaybeReader) {
+      MOZ_RELEASE_ASSERT(aMaybeReader.isNothing());
+    });
+
+    // ProfileBufferBlockIndex tests.
+    if (bi2) {
+    } else {
+      MOZ_RELEASE_ASSERT(
+          false,
+          "if (non-default-ProfileBufferBlockIndex) should succeed test");
+    }
+    if (!bi2) {
+      MOZ_RELEASE_ASSERT(
+          false, "if (!non-default-ProfileBufferBlockIndex) should fail test");
+    }
+
+    MOZ_RELEASE_ASSERT(!!bi2);
+    MOZ_RELEASE_ASSERT(bi2 == bi2);
+    MOZ_RELEASE_ASSERT(bi2 <= bi2);
+    MOZ_RELEASE_ASSERT(bi2 >= bi2);
+    MOZ_RELEASE_ASSERT(!(bi2 != bi2));
+    MOZ_RELEASE_ASSERT(!(bi2 < bi2));
+    MOZ_RELEASE_ASSERT(!(bi2 > bi2));
+
+    MOZ_RELEASE_ASSERT(bi0 != bi2);
+    MOZ_RELEASE_ASSERT(bi0 < bi2);
+    MOZ_RELEASE_ASSERT(bi0 <= bi2);
+    MOZ_RELEASE_ASSERT(!(bi0 == bi2));
+    MOZ_RELEASE_ASSERT(!(bi0 > bi2));
+    MOZ_RELEASE_ASSERT(!(bi0 >= bi2));
+
+    MOZ_RELEASE_ASSERT(bi2 != bi0);
+    MOZ_RELEASE_ASSERT(bi2 > bi0);
+    MOZ_RELEASE_ASSERT(bi2 >= bi0);
+    MOZ_RELEASE_ASSERT(!(bi2 == bi0));
+    MOZ_RELEASE_ASSERT(!(bi2 < bi0));
+    MOZ_RELEASE_ASSERT(!(bi2 <= bi0));
+
+    MOZ_RELEASE_ASSERT(bi2 != bi2Next);
+    MOZ_RELEASE_ASSERT(bi2 < bi2Next);
+    MOZ_RELEASE_ASSERT(bi2 <= bi2Next);
+    MOZ_RELEASE_ASSERT(!(bi2 == bi2Next));
+    MOZ_RELEASE_ASSERT(!(bi2 > bi2Next));
+    MOZ_RELEASE_ASSERT(!(bi2 >= bi2Next));
+
+    MOZ_RELEASE_ASSERT(bi2Next != bi2);
+    MOZ_RELEASE_ASSERT(bi2Next > bi2);
+    MOZ_RELEASE_ASSERT(bi2Next >= bi2);
+    MOZ_RELEASE_ASSERT(!(bi2Next == bi2));
+    MOZ_RELEASE_ASSERT(!(bi2Next < bi2));
+    MOZ_RELEASE_ASSERT(!(bi2Next <= bi2));
+
+    // Push `3` through Put, check writer output
+    // is returned to the initial caller.
+    auto put3 =
+        rb.Put(sizeof(uint32_t), [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+          MOZ_RELEASE_ASSERT(aEW.isSome());
+          aEW->WriteObject(uint32_t(3));
+          MOZ_RELEASE_ASSERT(aEW->CurrentBlockIndex() == bi2Next);
+          return float(aEW->CurrentBlockIndex().ConvertToProfileBufferIndex());
+        });
+    static_assert(std::is_same<decltype(put3), float>::value,
+                  "Expect float as returned by callback.");
+    MOZ_RELEASE_ASSERT(put3 == 11.0);
+    //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15 (16)
+    //   - S[4 |    int(1)    ] [4 |    int(2)    ] [4 |    int(3)    ]E
+    VERIFY_START_END_PUSHED_CLEARED(1, 16, 3, 0);
+
+    // Re-Read single entry at bi2, it should now have a next entry.
+    rb.ReadAt(bi2, [&](Maybe<ProfileBufferEntryReader>&& aMaybeReader) {
+      MOZ_RELEASE_ASSERT(aMaybeReader.isSome());
+      MOZ_RELEASE_ASSERT(aMaybeReader->CurrentBlockIndex() == bi2);
+      MOZ_RELEASE_ASSERT(aMaybeReader->ReadObject<uint32_t>() == 2);
+      MOZ_RELEASE_ASSERT(aMaybeReader->NextBlockIndex() == bi2Next);
+    });
+
+    // Check that we have `1` to `3`.
+    uint32_t count = 0;
+    rb.ReadEach([&](ProfileBufferEntryReader& aReader) {
+      MOZ_RELEASE_ASSERT(aReader.ReadObject<uint32_t>() == ++count);
+    });
+    MOZ_RELEASE_ASSERT(count == 3);
+
+    // Push `4`, store its ProfileBufferBlockIndex for later.
+    // This will wrap around, and clear the first entry.
+    ProfileBufferBlockIndex bi4 = rb.PutObject(uint32_t(4));
+    // Before:
+    //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15 (16)
+    //   - S[4 |    int(1)    ] [4 |    int(2)    ] [4 |    int(3)    ]E
+    // 1. First entry cleared:
+    //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15 (16)
+    //   -   ?   ?   ?   ?   ? S[4 |    int(2)    ] [4 |    int(3)    ]E
+    // 2. New entry starts at 15 and wraps around: (shown on separate line)
+    //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15 (16)
+    //   -   ?   ?   ?   ?   ? S[4 |    int(2)    ] [4 |    int(3)    ]
+    //  16  17  18  19  20  21  ...
+    //  [4 |    int(4)    ]E
+    // (collapsed)
+    //  16  17  18  19  20  21   6   7   8   9  10  11  12  13  14  15 (16)
+    //  [4 |    int(4)    ]E ? S[4 |    int(2)    ] [4 |    int(3)    ]
+    VERIFY_START_END_PUSHED_CLEARED(6, 21, 4, 1);
+
+    // Check that we have `2` to `4`.
+    count = 1;
+    rb.ReadEach([&](ProfileBufferEntryReader& aReader) {
+      MOZ_RELEASE_ASSERT(aReader.ReadObject<uint32_t>() == ++count);
+    });
+    MOZ_RELEASE_ASSERT(count == 4);
+
+    // Push 5 through Put, no returns.
+    // This will clear the second entry.
+    // Check that the EntryWriter can access bi4 but not bi2.
+    auto bi5 =
+        rb.Put(sizeof(uint32_t), [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+          MOZ_RELEASE_ASSERT(aEW.isSome());
+          aEW->WriteObject(uint32_t(5));
+          return aEW->CurrentBlockIndex();
+        });
+    auto bi6 = rb.GetState().mRangeEnd;
+    //  16  17  18  19  20  21  22  23  24  25  26  11  12  13  14  15 (16)
+    //  [4 |    int(4)    ] [4 |    int(5)    ]E ? S[4 |    int(3)    ]
+    VERIFY_START_END_PUSHED_CLEARED(11, 26, 5, 2);
+
+    // Read single entry at bi2, should now gracefully fail.
+    rb.ReadAt(bi2, [](Maybe<ProfileBufferEntryReader>&& aMaybeReader) {
+      MOZ_RELEASE_ASSERT(aMaybeReader.isNothing());
+    });
+
+    // Read single entry at bi5.
+    rb.ReadAt(bi5, [](Maybe<ProfileBufferEntryReader>&& aMaybeReader) {
+      MOZ_RELEASE_ASSERT(aMaybeReader.isSome());
+      MOZ_RELEASE_ASSERT(aMaybeReader->ReadObject<uint32_t>() == 5);
+    });
+
+    rb.Read([&](BlocksRingBuffer::Reader* aReader) {
+      MOZ_RELEASE_ASSERT(!!aReader);
+      // begin() and end() should be at the range edges (verified above).
+      MOZ_RELEASE_ASSERT(
+          aReader->begin().CurrentBlockIndex().ConvertToProfileBufferIndex() ==
+          11);
+      MOZ_RELEASE_ASSERT(
+          aReader->end().CurrentBlockIndex().ConvertToProfileBufferIndex() ==
+          26);
+      // Null ProfileBufferBlockIndex clamped to the beginning.
+      MOZ_RELEASE_ASSERT(aReader->At(bi0) == aReader->begin());
+      // Cleared block index clamped to the beginning.
+      MOZ_RELEASE_ASSERT(aReader->At(bi2) == aReader->begin());
+      // At(begin) same as begin().
+      MOZ_RELEASE_ASSERT(aReader->At(aReader->begin().CurrentBlockIndex()) ==
+                         aReader->begin());
+      // bi5 at expected position.
+      MOZ_RELEASE_ASSERT(
+          aReader->At(bi5).CurrentBlockIndex().ConvertToProfileBufferIndex() ==
+          21);
+      // bi6 at expected position at the end.
+      MOZ_RELEASE_ASSERT(aReader->At(bi6) == aReader->end());
+      // At(end) same as end().
+      MOZ_RELEASE_ASSERT(aReader->At(aReader->end().CurrentBlockIndex()) ==
+                         aReader->end());
+    });
+
+    // Check that we have `3` to `5`.
+    count = 2;
+    rb.ReadEach([&](ProfileBufferEntryReader& aReader) {
+      MOZ_RELEASE_ASSERT(aReader.ReadObject<uint32_t>() == ++count);
+    });
+    MOZ_RELEASE_ASSERT(count == 5);
+
+    // Clear everything before `4`, this should clear `3`.
+    rb.ClearBefore(bi4);
+    //  16  17  18  19  20  21  22  23  24  25  26  11  12  13  14  15
+    // S[4 |    int(4)    ] [4 |    int(5)    ]E ?   ?   ?   ?   ?   ?
+    VERIFY_START_END_PUSHED_CLEARED(16, 26, 5, 3);
+
+    // Check that we have `4` to `5`.
+    count = 3;
+    rb.ReadEach([&](ProfileBufferEntryReader& aReader) {
+      MOZ_RELEASE_ASSERT(aReader.ReadObject<uint32_t>() == ++count);
+    });
+    MOZ_RELEASE_ASSERT(count == 5);
+
+    // Clear everything before `4` again, nothing to clear.
+    rb.ClearBefore(bi4);
+    VERIFY_START_END_PUSHED_CLEARED(16, 26, 5, 3);
+
+    // Clear everything, this should clear `4` and `5`, and bring the start
+    // index where the end index currently is.
+    rb.ClearBefore(bi6);
+    //  16  17  18  19  20  21  22  23  24  25  26  11  12  13  14  15
+    //   ?   ?   ?   ?   ?   ?   ?   ?   ?   ? SE?   ?   ?   ?   ?   ?
+    VERIFY_START_END_PUSHED_CLEARED(26, 26, 5, 5);
+
+    // Check that we have nothing to read.
+    rb.ReadEach([&](auto&&) { MOZ_RELEASE_ASSERT(false); });
+
+    // Read single entry at bi5, should now gracefully fail.
+    rb.ReadAt(bi5, [](Maybe<ProfileBufferEntryReader>&& aMaybeReader) {
+      MOZ_RELEASE_ASSERT(aMaybeReader.isNothing());
+    });
+
+    // Clear everything before now-cleared `4`, nothing to clear.
+    rb.ClearBefore(bi4);
+    VERIFY_START_END_PUSHED_CLEARED(26, 26, 5, 5);
+
+    // Push `6` directly.
+    MOZ_RELEASE_ASSERT(rb.PutObject(uint32_t(6)) == bi6);
+    //  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
+    //   ?   ?   ?   ?   ?   ?   ?   ?   ?   ? S[4 |    int(6)    ]E ?
+    VERIFY_START_END_PUSHED_CLEARED(26, 31, 6, 5);
+
+    {
+      // Create a 2nd buffer and fill it with `7` and `8`.
+      uint8_t buffer2[MBSize];
+      BlocksRingBuffer rb2(BlocksRingBuffer::ThreadSafety::WithoutMutex,
+                           buffer2, MakePowerOfTwo32<MBSize>());
+      rb2.PutObject(uint32_t(7));
+      rb2.PutObject(uint32_t(8));
+      // Main buffer shouldn't have changed.
+      VERIFY_START_END_PUSHED_CLEARED(26, 31, 6, 5);
+
+      // Append contents of rb2 to rb, this should end up being the same as
+      // pushing the two numbers.
+      rb.AppendContents(rb2);
+      //  32  33  34  35  36  37  38  39  40  41  26  27  28  29  30  31
+      //      int(7)    ] [4 |    int(8)    ]E ? S[4 |    int(6)    ] [4 |
+      VERIFY_START_END_PUSHED_CLEARED(26, 41, 8, 5);
+
+      // Append contents of rb2 to rb again, to verify that rb2 was not modified
+      // above. This should clear `6` and the first `7`.
+      rb.AppendContents(rb2);
+      //  48  49  50  51  36  37  38  39  40  41  42  43  44  45  46  47
+      //  int(8)    ]E ? S[4 |    int(8)    ] [4 |    int(7)    ] [4 |
+      VERIFY_START_END_PUSHED_CLEARED(36, 51, 10, 7);
+
+      // End of block where rb2 lives, to verify that it is not needed anymore
+      // for its copied values to survive in rb.
+    }
+    VERIFY_START_END_PUSHED_CLEARED(36, 51, 10, 7);
+
+    // bi6 should now have been cleared.
+    rb.ReadAt(bi6, [](Maybe<ProfileBufferEntryReader>&& aMaybeReader) {
+      MOZ_RELEASE_ASSERT(aMaybeReader.isNothing());
+    });
+
+    // Check that we have `8`, `7`, `8`.
+    count = 0;
+    uint32_t expected[3] = {8, 7, 8};
+    rb.ReadEach([&](ProfileBufferEntryReader& aReader) {
+      MOZ_RELEASE_ASSERT(count < 3);
+      MOZ_RELEASE_ASSERT(aReader.ReadObject<uint32_t>() == expected[count++]);
+    });
+    MOZ_RELEASE_ASSERT(count == 3);
+
+    // End of block where rb lives, BlocksRingBuffer destructor should call
+    // entry destructor for remaining entries.
+  }
+
+  // Check that only the provided stack-based sub-buffer was modified.
+  uint32_t changed = 0;
+  for (size_t i = MBSize; i < MBSize * 2; ++i) {
+    changed += (buffer[i] == uint8_t('A' + i)) ? 0 : 1;
+  }
+  // Expect at least 75% changes.
+  MOZ_RELEASE_ASSERT(changed >= MBSize * 6 / 8);
+
+  // Everything around the sub-buffer should be unchanged.
+  for (size_t i = 0; i < MBSize; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+  for (size_t i = MBSize * 2; i < MBSize * 3; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+
+  printf("TestBlocksRingBufferAPI done\n");
+}
+
+void TestBlocksRingBufferUnderlyingBufferChanges() {
+  printf("TestBlocksRingBufferUnderlyingBufferChanges...\n");
+
+  // Out-of-session BlocksRingBuffer to start with.
+  BlocksRingBuffer rb(BlocksRingBuffer::ThreadSafety::WithMutex);
+
+  // Block index to read at. Initially "null", but may be changed below.
+  ProfileBufferBlockIndex bi;
+
+  // Test all rb APIs when rb is out-of-session and therefore doesn't have an
+  // underlying buffer.
+  auto testOutOfSession = [&]() {
+    MOZ_RELEASE_ASSERT(rb.BufferLength().isNothing());
+    BlocksRingBuffer::State state = rb.GetState();
+    // When out-of-session, range start and ends are the same, and there are no
+    // pushed&cleared blocks.
+    MOZ_RELEASE_ASSERT(state.mRangeStart == state.mRangeEnd);
+    MOZ_RELEASE_ASSERT(state.mPushedBlockCount == 0);
+    MOZ_RELEASE_ASSERT(state.mClearedBlockCount == 0);
+    // `Put()` functions run the callback with `Nothing`.
+    int32_t ran = 0;
+    rb.Put(1, [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+      MOZ_RELEASE_ASSERT(aMaybeEntryWriter.isNothing());
+      ++ran;
+    });
+    MOZ_RELEASE_ASSERT(ran == 1);
+    // `PutFrom` won't do anything, and returns the null
+    // ProfileBufferBlockIndex.
+    MOZ_RELEASE_ASSERT(rb.PutFrom(&ran, sizeof(ran)) ==
+                       ProfileBufferBlockIndex{});
+    MOZ_RELEASE_ASSERT(rb.PutObject(ran) == ProfileBufferBlockIndex{});
+    // `Read()` functions run the callback with `Nothing`.
+    ran = 0;
+    rb.Read([&](BlocksRingBuffer::Reader* aReader) {
+      MOZ_RELEASE_ASSERT(!aReader);
+      ++ran;
+    });
+    MOZ_RELEASE_ASSERT(ran == 1);
+    ran = 0;
+    rb.ReadAt(ProfileBufferBlockIndex{},
+              [&](Maybe<ProfileBufferEntryReader>&& aMaybeEntryReader) {
+                MOZ_RELEASE_ASSERT(aMaybeEntryReader.isNothing());
+                ++ran;
+              });
+    MOZ_RELEASE_ASSERT(ran == 1);
+    ran = 0;
+    rb.ReadAt(bi, [&](Maybe<ProfileBufferEntryReader>&& aMaybeEntryReader) {
+      MOZ_RELEASE_ASSERT(aMaybeEntryReader.isNothing());
+      ++ran;
+    });
+    MOZ_RELEASE_ASSERT(ran == 1);
+    // `ReadEach` shouldn't run the callback (nothing to read).
+    rb.ReadEach([](auto&&) { MOZ_RELEASE_ASSERT(false); });
+  };
+
+  // As `testOutOfSession()` attempts to modify the buffer, we run it twice to
+  // make sure one run doesn't influence the next one.
+  testOutOfSession();
+  testOutOfSession();
+
+  rb.ClearBefore(bi);
+  testOutOfSession();
+  testOutOfSession();
+
+  rb.Clear();
+  testOutOfSession();
+  testOutOfSession();
+
+  rb.Reset();
+  testOutOfSession();
+  testOutOfSession();
+
+  constexpr uint32_t MBSize = 32;
+
+  rb.Set(MakePowerOfTwo<BlocksRingBuffer::Length, MBSize>());
+
+  constexpr bool EMPTY = true;
+  constexpr bool NOT_EMPTY = false;
+  // Test all rb APIs when rb has an underlying buffer.
+  auto testInSession = [&](bool aExpectEmpty) {
+    MOZ_RELEASE_ASSERT(rb.BufferLength().isSome());
+    BlocksRingBuffer::State state = rb.GetState();
+    if (aExpectEmpty) {
+      MOZ_RELEASE_ASSERT(state.mRangeStart == state.mRangeEnd);
+      MOZ_RELEASE_ASSERT(state.mPushedBlockCount == 0);
+      MOZ_RELEASE_ASSERT(state.mClearedBlockCount == 0);
+    } else {
+      MOZ_RELEASE_ASSERT(state.mRangeStart < state.mRangeEnd);
+      MOZ_RELEASE_ASSERT(state.mPushedBlockCount > 0);
+      MOZ_RELEASE_ASSERT(state.mClearedBlockCount <= state.mPushedBlockCount);
+    }
+    int32_t ran = 0;
+    // The following three `Put...` will write three int32_t of value 1.
+    bi = rb.Put(sizeof(ran),
+                [&](Maybe<ProfileBufferEntryWriter>& aMaybeEntryWriter) {
+                  MOZ_RELEASE_ASSERT(aMaybeEntryWriter.isSome());
+                  ++ran;
+                  aMaybeEntryWriter->WriteObject(ran);
+                  return aMaybeEntryWriter->CurrentBlockIndex();
+                });
+    MOZ_RELEASE_ASSERT(ran == 1);
+    MOZ_RELEASE_ASSERT(rb.PutFrom(&ran, sizeof(ran)) !=
+                       ProfileBufferBlockIndex{});
+    MOZ_RELEASE_ASSERT(rb.PutObject(ran) != ProfileBufferBlockIndex{});
+    ran = 0;
+    rb.Read([&](BlocksRingBuffer::Reader* aReader) {
+      MOZ_RELEASE_ASSERT(!!aReader);
+      ++ran;
+    });
+    MOZ_RELEASE_ASSERT(ran == 1);
+    ran = 0;
+    rb.ReadEach([&](ProfileBufferEntryReader& aEntryReader) {
+      MOZ_RELEASE_ASSERT(aEntryReader.RemainingBytes() == sizeof(ran));
+      MOZ_RELEASE_ASSERT(aEntryReader.ReadObject<decltype(ran)>() == 1);
+      ++ran;
+    });
+    MOZ_RELEASE_ASSERT(ran >= 3);
+    ran = 0;
+    rb.ReadAt(ProfileBufferBlockIndex{},
+              [&](Maybe<ProfileBufferEntryReader>&& aMaybeEntryReader) {
+                MOZ_RELEASE_ASSERT(aMaybeEntryReader.isNothing());
+                ++ran;
+              });
+    MOZ_RELEASE_ASSERT(ran == 1);
+    ran = 0;
+    rb.ReadAt(bi, [&](Maybe<ProfileBufferEntryReader>&& aMaybeEntryReader) {
+      MOZ_RELEASE_ASSERT(aMaybeEntryReader.isNothing() == !bi);
+      ++ran;
+    });
+    MOZ_RELEASE_ASSERT(ran == 1);
+  };
+
+  testInSession(EMPTY);
+  testInSession(NOT_EMPTY);
+
+  rb.Set(MakePowerOfTwo<BlocksRingBuffer::Length, 32>());
+  MOZ_RELEASE_ASSERT(rb.BufferLength().isSome());
+  rb.ReadEach([](auto&&) { MOZ_RELEASE_ASSERT(false); });
+
+  testInSession(EMPTY);
+  testInSession(NOT_EMPTY);
+
+  rb.Reset();
+  testOutOfSession();
+  testOutOfSession();
+
+  uint8_t buffer[MBSize * 3];
+  for (size_t i = 0; i < MBSize * 3; ++i) {
+    buffer[i] = uint8_t('A' + i);
+  }
+
+  rb.Set(&buffer[MBSize], MakePowerOfTwo<BlocksRingBuffer::Length, MBSize>());
+  MOZ_RELEASE_ASSERT(rb.BufferLength().isSome());
+  rb.ReadEach([](auto&&) { MOZ_RELEASE_ASSERT(false); });
+
+  testInSession(EMPTY);
+  testInSession(NOT_EMPTY);
+
+  rb.Reset();
+  testOutOfSession();
+  testOutOfSession();
+
+  rb.Set(&buffer[MBSize], MakePowerOfTwo<BlocksRingBuffer::Length, MBSize>());
+  MOZ_RELEASE_ASSERT(rb.BufferLength().isSome());
+  rb.ReadEach([](auto&&) { MOZ_RELEASE_ASSERT(false); });
+
+  testInSession(EMPTY);
+  testInSession(NOT_EMPTY);
+
+  // Remove the current underlying buffer, this should clear all entries.
+  rb.Reset();
+
+  // Check that only the provided stack-based sub-buffer was modified.
+  uint32_t changed = 0;
+  for (size_t i = MBSize; i < MBSize * 2; ++i) {
+    changed += (buffer[i] == uint8_t('A' + i)) ? 0 : 1;
+  }
+  // Expect at least 75% changes.
+  MOZ_RELEASE_ASSERT(changed >= MBSize * 6 / 8);
+
+  // Everything around the sub-buffer should be unchanged.
+  for (size_t i = 0; i < MBSize; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+  for (size_t i = MBSize * 2; i < MBSize * 3; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+
+  testOutOfSession();
+  testOutOfSession();
+
+  printf("TestBlocksRingBufferUnderlyingBufferChanges done\n");
+}
+
+void TestBlocksRingBufferThreading() {
+  printf("TestBlocksRingBufferThreading...\n");
+
+  constexpr uint32_t MBSize = 8192;
+  uint8_t buffer[MBSize * 3];
+  for (size_t i = 0; i < MBSize * 3; ++i) {
+    buffer[i] = uint8_t('A' + i);
+  }
+  BlocksRingBuffer rb(BlocksRingBuffer::ThreadSafety::WithMutex,
+                      &buffer[MBSize], MakePowerOfTwo32<MBSize>());
+
+  // Start reader thread.
+  std::atomic<bool> stopReader{false};
+  std::thread reader([&]() {
+    for (;;) {
+      BlocksRingBuffer::State state = rb.GetState();
+      printf(
+          "Reader: range=%llu..%llu (%llu bytes) pushed=%llu cleared=%llu "
+          "(alive=%llu)\n",
+          static_cast<unsigned long long>(
+              state.mRangeStart.ConvertToProfileBufferIndex()),
+          static_cast<unsigned long long>(
+              state.mRangeEnd.ConvertToProfileBufferIndex()),
+          static_cast<unsigned long long>(
+              state.mRangeEnd.ConvertToProfileBufferIndex()) -
+              static_cast<unsigned long long>(
+                  state.mRangeStart.ConvertToProfileBufferIndex()),
+          static_cast<unsigned long long>(state.mPushedBlockCount),
+          static_cast<unsigned long long>(state.mClearedBlockCount),
+          static_cast<unsigned long long>(state.mPushedBlockCount -
+                                          state.mClearedBlockCount));
+      if (stopReader) {
+        break;
+      }
+      ::SleepMilli(1);
+    }
+  });
+
+  // Start writer threads.
+  constexpr int ThreadCount = 32;
+  std::thread threads[ThreadCount];
+  for (int threadNo = 0; threadNo < ThreadCount; ++threadNo) {
+    threads[threadNo] = std::thread(
+        [&](int aThreadNo) {
+          ::SleepMilli(1);
+          constexpr int pushCount = 1024;
+          for (int push = 0; push < pushCount; ++push) {
+            // Reserve as many bytes as the thread number (but at least enough
+            // to store an int), and write an increasing int.
+            rb.Put(std::max(aThreadNo, int(sizeof(push))),
+                   [&](Maybe<ProfileBufferEntryWriter>& aEW) {
+                     MOZ_RELEASE_ASSERT(aEW.isSome());
+                     aEW->WriteObject(aThreadNo * 1000000 + push);
+                     *aEW += aEW->RemainingBytes();
+                   });
+          }
+        },
+        threadNo);
+  }
+
+  // Wait for all writer threads to die.
+  for (auto&& thread : threads) {
+    thread.join();
+  }
+
+  // Stop reader thread.
+  stopReader = true;
+  reader.join();
+
+  // Check that only the provided stack-based sub-buffer was modified.
+  uint32_t changed = 0;
+  for (size_t i = MBSize; i < MBSize * 2; ++i) {
+    changed += (buffer[i] == uint8_t('A' + i)) ? 0 : 1;
+  }
+  // Expect at least 75% changes.
+  MOZ_RELEASE_ASSERT(changed >= MBSize * 6 / 8);
+
+  // Everything around the sub-buffer should be unchanged.
+  for (size_t i = 0; i < MBSize; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+  for (size_t i = MBSize * 2; i < MBSize * 3; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+
+  printf("TestBlocksRingBufferThreading done\n");
+}
+
+void TestBlocksRingBufferSerialization() {
+  printf("TestBlocksRingBufferSerialization...\n");
+
+  constexpr uint32_t MBSize = 64;
+  uint8_t buffer[MBSize * 3];
+  for (size_t i = 0; i < MBSize * 3; ++i) {
+    buffer[i] = uint8_t('A' + i);
+  }
+  BlocksRingBuffer rb(BlocksRingBuffer::ThreadSafety::WithMutex,
+                      &buffer[MBSize], MakePowerOfTwo32<MBSize>());
+
+  // Will expect literal string to always have the same address.
+#  define THE_ANSWER "The answer is "
+  const char* theAnswer = THE_ANSWER;
+
+  rb.PutObjects('0', WrapProfileBufferLiteralCStringPointer(THE_ANSWER), 42,
+                std::string(" but pi="), 3.14);
+  rb.ReadEach([&](ProfileBufferEntryReader& aER) {
+    char c0;
+    const char* answer;
+    int integer;
+    std::string str;
+    double pi;
+    aER.ReadIntoObjects(c0, answer, integer, str, pi);
+    MOZ_RELEASE_ASSERT(c0 == '0');
+    MOZ_RELEASE_ASSERT(answer == theAnswer);
+    MOZ_RELEASE_ASSERT(integer == 42);
+    MOZ_RELEASE_ASSERT(str == " but pi=");
+    MOZ_RELEASE_ASSERT(pi == 3.14);
+  });
+  rb.ReadEach([&](ProfileBufferEntryReader& aER) {
+    char c0 = aER.ReadObject<char>();
+    MOZ_RELEASE_ASSERT(c0 == '0');
+    const char* answer = aER.ReadObject<const char*>();
+    MOZ_RELEASE_ASSERT(answer == theAnswer);
+    int integer = aER.ReadObject<int>();
+    MOZ_RELEASE_ASSERT(integer == 42);
+    std::string str = aER.ReadObject<std::string>();
+    MOZ_RELEASE_ASSERT(str == " but pi=");
+    double pi = aER.ReadObject<double>();
+    MOZ_RELEASE_ASSERT(pi == 3.14);
+  });
+
+  rb.Clear();
+  // Write an int and store its ProfileBufferBlockIndex.
+  ProfileBufferBlockIndex blockIndex = rb.PutObject(123);
+  // It should be non-0.
+  MOZ_RELEASE_ASSERT(blockIndex != ProfileBufferBlockIndex{});
+  // Write that ProfileBufferBlockIndex.
+  rb.PutObject(blockIndex);
+  rb.Read([&](BlocksRingBuffer::Reader* aR) {
+    BlocksRingBuffer::BlockIterator it = aR->begin();
+    const BlocksRingBuffer::BlockIterator itEnd = aR->end();
+    MOZ_RELEASE_ASSERT(it != itEnd);
+    MOZ_RELEASE_ASSERT((*it).ReadObject<int>() == 123);
+    ++it;
+    MOZ_RELEASE_ASSERT(it != itEnd);
+    MOZ_RELEASE_ASSERT((*it).ReadObject<ProfileBufferBlockIndex>() ==
+                       blockIndex);
+    ++it;
+    MOZ_RELEASE_ASSERT(it == itEnd);
+  });
+
+  rb.Clear();
+  rb.PutObjects(
+      std::make_tuple('0', WrapProfileBufferLiteralCStringPointer(THE_ANSWER),
+                      42, std::string(" but pi="), 3.14));
+  rb.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(aER.ReadObject<char>() == '0');
+    MOZ_RELEASE_ASSERT(aER.ReadObject<const char*>() == theAnswer);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<int>() == 42);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<std::string>() == " but pi=");
+    MOZ_RELEASE_ASSERT(aER.ReadObject<double>() == 3.14);
+  });
+
+  rb.Clear();
+  rb.PutObjects(MakeTuple('0',
+                          WrapProfileBufferLiteralCStringPointer(THE_ANSWER),
+                          42, std::string(" but pi="), 3.14));
+  rb.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(aER.ReadObject<char>() == '0');
+    MOZ_RELEASE_ASSERT(aER.ReadObject<const char*>() == theAnswer);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<int>() == 42);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<std::string>() == " but pi=");
+    MOZ_RELEASE_ASSERT(aER.ReadObject<double>() == 3.14);
+  });
+
+  rb.Clear();
+  {
+    UniqueFreePtr<char> ufps(strdup(THE_ANSWER));
+    rb.PutObjects(ufps);
+  }
+  rb.ReadEach([&](ProfileBufferEntryReader& aER) {
+    auto ufps = aER.ReadObject<UniqueFreePtr<char>>();
+    MOZ_RELEASE_ASSERT(!!ufps);
+    MOZ_RELEASE_ASSERT(std::string(THE_ANSWER) == ufps.get());
+  });
+
+  rb.Clear();
+  int intArray[] = {1, 2, 3, 4, 5};
+  rb.PutObjects(Span(intArray));
+  rb.ReadEach([&](ProfileBufferEntryReader& aER) {
+    int intArrayOut[sizeof(intArray) / sizeof(intArray[0])] = {0};
+    auto outSpan = Span(intArrayOut);
+    aER.ReadIntoObject(outSpan);
+    for (size_t i = 0; i < sizeof(intArray) / sizeof(intArray[0]); ++i) {
+      MOZ_RELEASE_ASSERT(intArrayOut[i] == intArray[i]);
+    }
+  });
+
+  rb.Clear();
+  rb.PutObjects(Maybe<int>(Nothing{}), Maybe<int>(Some(123)));
+  rb.ReadEach([&](ProfileBufferEntryReader& aER) {
+    Maybe<int> mi0, mi1;
+    aER.ReadIntoObjects(mi0, mi1);
+    MOZ_RELEASE_ASSERT(mi0.isNothing());
+    MOZ_RELEASE_ASSERT(mi1.isSome());
+    MOZ_RELEASE_ASSERT(*mi1 == 123);
+  });
+
+  rb.Clear();
+  using V = Variant<int, double, int>;
+  V v0(VariantIndex<0>{}, 123);
+  V v1(3.14);
+  V v2(VariantIndex<2>{}, 456);
+  rb.PutObjects(v0, v1, v2);
+  rb.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v0);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v1);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v2);
+  });
+
+  // 2nd BlocksRingBuffer to contain the 1st one. It has be be more than twice
+  // the size.
+  constexpr uint32_t MBSize2 = MBSize * 4;
+  uint8_t buffer2[MBSize2 * 3];
+  for (size_t i = 0; i < MBSize2 * 3; ++i) {
+    buffer2[i] = uint8_t('B' + i);
+  }
+  BlocksRingBuffer rb2(BlocksRingBuffer::ThreadSafety::WithoutMutex,
+                       &buffer2[MBSize2], MakePowerOfTwo32<MBSize2>());
+  rb2.PutObject(rb);
+
+  // 3rd BlocksRingBuffer deserialized from the 2nd one.
+  uint8_t buffer3[MBSize * 3];
+  for (size_t i = 0; i < MBSize * 3; ++i) {
+    buffer3[i] = uint8_t('C' + i);
+  }
+  BlocksRingBuffer rb3(BlocksRingBuffer::ThreadSafety::WithoutMutex,
+                       &buffer3[MBSize], MakePowerOfTwo32<MBSize>());
+  rb2.ReadEach([&](ProfileBufferEntryReader& aER) { aER.ReadIntoObject(rb3); });
+
+  // And a 4th heap-allocated one.
+  UniquePtr<BlocksRingBuffer> rb4up;
+  rb2.ReadEach([&](ProfileBufferEntryReader& aER) {
+    rb4up = aER.ReadObject<UniquePtr<BlocksRingBuffer>>();
+  });
+  MOZ_RELEASE_ASSERT(!!rb4up);
+
+  // Clear 1st and 2nd BlocksRingBuffers, to ensure we have made a deep copy
+  // into the 3rd&4th ones.
+  rb.Clear();
+  rb2.Clear();
+
+  // And now the 3rd one should have the same contents as the 1st one had.
+  rb3.ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v0);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v1);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v2);
+  });
+
+  // And 4th.
+  rb4up->ReadEach([&](ProfileBufferEntryReader& aER) {
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v0);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v1);
+    MOZ_RELEASE_ASSERT(aER.ReadObject<V>() == v2);
+  });
+
+  // In fact, the 3rd and 4th ones should have the same state, because they were
+  // created the same way.
+  MOZ_RELEASE_ASSERT(rb3.GetState().mRangeStart ==
+                     rb4up->GetState().mRangeStart);
+  MOZ_RELEASE_ASSERT(rb3.GetState().mRangeEnd == rb4up->GetState().mRangeEnd);
+  MOZ_RELEASE_ASSERT(rb3.GetState().mPushedBlockCount ==
+                     rb4up->GetState().mPushedBlockCount);
+  MOZ_RELEASE_ASSERT(rb3.GetState().mClearedBlockCount ==
+                     rb4up->GetState().mClearedBlockCount);
+
+  // Check that only the provided stack-based sub-buffer was modified.
+  uint32_t changed = 0;
+  for (size_t i = MBSize; i < MBSize * 2; ++i) {
+    changed += (buffer[i] == uint8_t('A' + i)) ? 0 : 1;
+  }
+  // Expect at least 75% changes.
+  MOZ_RELEASE_ASSERT(changed >= MBSize * 6 / 8);
+
+  // Everything around the sub-buffers should be unchanged.
+  for (size_t i = 0; i < MBSize; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+  for (size_t i = MBSize * 2; i < MBSize * 3; ++i) {
+    MOZ_RELEASE_ASSERT(buffer[i] == uint8_t('A' + i));
+  }
+
+  for (size_t i = 0; i < MBSize2; ++i) {
+    MOZ_RELEASE_ASSERT(buffer2[i] == uint8_t('B' + i));
+  }
+  for (size_t i = MBSize2 * 2; i < MBSize2 * 3; ++i) {
+    MOZ_RELEASE_ASSERT(buffer2[i] == uint8_t('B' + i));
+  }
+
+  for (size_t i = 0; i < MBSize; ++i) {
+    MOZ_RELEASE_ASSERT(buffer3[i] == uint8_t('C' + i));
+  }
+  for (size_t i = MBSize * 2; i < MBSize * 3; ++i) {
+    MOZ_RELEASE_ASSERT(buffer3[i] == uint8_t('C' + i));
+  }
+
+  printf("TestBlocksRingBufferSerialization done\n");
+}
+
+void TestLiteralEmptyStringView() {
+  printf("TestLiteralEmptyStringView...\n");
+
+  static_assert(mozilla::LiteralEmptyStringView<char>() ==
+                std::string_view(""));
+  static_assert(!!mozilla::LiteralEmptyStringView<char>().data());
+  static_assert(mozilla::LiteralEmptyStringView<char>().length() == 0);
+
+  static_assert(mozilla::LiteralEmptyStringView<char16_t>() ==
+                std::basic_string_view<char16_t>(u""));
+  static_assert(!!mozilla::LiteralEmptyStringView<char16_t>().data());
+  static_assert(mozilla::LiteralEmptyStringView<char16_t>().length() == 0);
+
+  printf("TestLiteralEmptyStringView done\n");
+}
+
+template <typename CHAR>
+void TestProfilerStringView() {
+  if constexpr (std::is_same_v<CHAR, char>) {
+    printf("TestProfilerStringView<char>...\n");
+  } else if constexpr (std::is_same_v<CHAR, char16_t>) {
+    printf("TestProfilerStringView<char16_t>...\n");
+  } else {
+    MOZ_RELEASE_ASSERT(false,
+                       "TestProfilerStringView only handles char and char16_t");
+  }
+
+  // Used to verify implicit constructions, as this will normally be used in
+  // function parameters.
+  auto BSV = [](mozilla::ProfilerStringView<CHAR>&& aBSV) {
+    return std::move(aBSV);
+  };
+
+  // These look like string literals, as expected by some string constructors.
+  const CHAR empty[0 + 1] = {CHAR('\0')};
+  const CHAR hi[2 + 1] = {
+      CHAR('h'),
+      CHAR('i'),
+      CHAR('\0'),
+  };
+
+  // Literal empty string.
+  MOZ_RELEASE_ASSERT(BSV(empty).Data());
+  MOZ_RELEASE_ASSERT(BSV(empty).Data()[0] == CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(empty).Length() == 0);
+  MOZ_RELEASE_ASSERT(BSV(empty).IsLiteral());
+  MOZ_RELEASE_ASSERT(!BSV(empty).IsReference());
+
+  // Literal non-empty string.
+  MOZ_RELEASE_ASSERT(BSV(hi).Data());
+  MOZ_RELEASE_ASSERT(BSV(hi).Data()[0] == CHAR('h'));
+  MOZ_RELEASE_ASSERT(BSV(hi).Data()[1] == CHAR('i'));
+  MOZ_RELEASE_ASSERT(BSV(hi).Data()[2] == CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(hi).Length() == 2);
+  MOZ_RELEASE_ASSERT(BSV(hi).IsLiteral());
+  MOZ_RELEASE_ASSERT(!BSV(hi).IsReference());
+
+  // std::string_view to a literal empty string.
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(empty)).Data());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(empty)).Data()[0] ==
+                     CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(empty)).Length() == 0);
+  MOZ_RELEASE_ASSERT(!BSV(std::basic_string_view<CHAR>(empty)).IsLiteral());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(empty)).IsReference());
+
+  // std::string_view to a literal non-empty string.
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(hi)).Data());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(hi)).Data()[0] ==
+                     CHAR('h'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(hi)).Data()[1] ==
+                     CHAR('i'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(hi)).Data()[2] ==
+                     CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(hi)).Length() == 2);
+  MOZ_RELEASE_ASSERT(!BSV(std::basic_string_view<CHAR>(hi)).IsLiteral());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>(hi)).IsReference());
+
+  // Default std::string_view points at nullptr, ProfilerStringView converts it
+  // to the literal empty string.
+  MOZ_RELEASE_ASSERT(!std::basic_string_view<CHAR>().data());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>()).Data());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>()).Data()[0] ==
+                     CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>()).Length() == 0);
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string_view<CHAR>()).IsLiteral());
+  MOZ_RELEASE_ASSERT(!BSV(std::basic_string_view<CHAR>()).IsReference());
+
+  // std::string to a literal empty string.
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(empty)).Data());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(empty)).Data()[0] ==
+                     CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(empty)).Length() == 0);
+  MOZ_RELEASE_ASSERT(!BSV(std::basic_string<CHAR>(empty)).IsLiteral());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(empty)).IsReference());
+
+  // std::string to a literal non-empty string.
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(hi)).Data());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(hi)).Data()[0] == CHAR('h'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(hi)).Data()[1] == CHAR('i'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(hi)).Data()[2] == CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(hi)).Length() == 2);
+  MOZ_RELEASE_ASSERT(!BSV(std::basic_string<CHAR>(hi)).IsLiteral());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>(hi)).IsReference());
+
+  // Default std::string contains an empty null-terminated string.
+  MOZ_RELEASE_ASSERT(std::basic_string<CHAR>().data());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>()).Data());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>()).Data()[0] == CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>()).Length() == 0);
+  MOZ_RELEASE_ASSERT(!BSV(std::basic_string<CHAR>()).IsLiteral());
+  MOZ_RELEASE_ASSERT(BSV(std::basic_string<CHAR>()).IsReference());
+
+  // Class that quacks like nsTString (with Data(), Length(), IsLiteral()), to
+  // check that ProfilerStringView can read from them.
+  class FakeNsTString {
+   public:
+    FakeNsTString(const CHAR* aData, size_t aLength, bool aIsLiteral)
+        : mData(aData), mLength(aLength), mIsLiteral(aIsLiteral) {}
+
+    const CHAR* Data() const { return mData; }
+    size_t Length() const { return mLength; }
+    bool IsLiteral() const { return mIsLiteral; }
+
+   private:
+    const CHAR* mData;
+    size_t mLength;
+    bool mIsLiteral;
+  };
+
+  // FakeNsTString to nullptr.
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(nullptr, 0, true)).Data());
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(nullptr, 0, true)).Data()[0] ==
+                     CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(nullptr, 0, true)).Length() == 0);
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(nullptr, 0, true)).IsLiteral());
+  MOZ_RELEASE_ASSERT(!BSV(FakeNsTString(nullptr, 0, true)).IsReference());
+
+  // FakeNsTString to a literal empty string.
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(empty, 0, true)).Data());
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(empty, 0, true)).Data()[0] ==
+                     CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(empty, 0, true)).Length() == 0);
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(empty, 0, true)).IsLiteral());
+  MOZ_RELEASE_ASSERT(!BSV(FakeNsTString(empty, 0, true)).IsReference());
+
+  // FakeNsTString to a literal non-empty string.
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, true)).Data());
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, true)).Data()[0] == CHAR('h'));
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, true)).Data()[1] == CHAR('i'));
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, true)).Data()[2] == CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, true)).Length() == 2);
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, true)).IsLiteral());
+  MOZ_RELEASE_ASSERT(!BSV(FakeNsTString(hi, 2, true)).IsReference());
+
+  // FakeNsTString to a non-literal non-empty string.
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, false)).Data());
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, false)).Data()[0] == CHAR('h'));
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, false)).Data()[1] == CHAR('i'));
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, false)).Data()[2] == CHAR('\0'));
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, false)).Length() == 2);
+  MOZ_RELEASE_ASSERT(!BSV(FakeNsTString(hi, 2, false)).IsLiteral());
+  MOZ_RELEASE_ASSERT(BSV(FakeNsTString(hi, 2, false)).IsReference());
+
+  // Serialization and deserialization (with ownership).
+  constexpr size_t bufferMaxSize = 1024;
+  constexpr ProfileChunkedBuffer::Length chunkMinSize = 128;
+  ProfileBufferChunkManagerWithLocalLimit cm(bufferMaxSize, chunkMinSize);
+  ProfileChunkedBuffer cb(ProfileChunkedBuffer::ThreadSafety::WithMutex, cm);
+
+  // Literal string, serialized as raw pointer.
+  MOZ_RELEASE_ASSERT(cb.PutObject(BSV(hi)));
+  {
+    unsigned read = 0;
+    ProfilerStringView<CHAR> outerBSV;
+    cb.ReadEach([&](ProfileBufferEntryReader& aER) {
+      ++read;
+      auto bsv = aER.ReadObject<ProfilerStringView<CHAR>>();
+      MOZ_RELEASE_ASSERT(bsv.Data());
+      MOZ_RELEASE_ASSERT(bsv.Data()[0] == CHAR('h'));
+      MOZ_RELEASE_ASSERT(bsv.Data()[1] == CHAR('i'));
+      MOZ_RELEASE_ASSERT(bsv.Data()[2] == CHAR('\0'));
+      MOZ_RELEASE_ASSERT(bsv.Length() == 2);
+      MOZ_RELEASE_ASSERT(bsv.IsLiteral());
+      MOZ_RELEASE_ASSERT(!bsv.IsReference());
+      outerBSV = std::move(bsv);
+    });
+    MOZ_RELEASE_ASSERT(read == 1);
+    MOZ_RELEASE_ASSERT(outerBSV.Data());
+    MOZ_RELEASE_ASSERT(outerBSV.Data()[0] == CHAR('h'));
+    MOZ_RELEASE_ASSERT(outerBSV.Data()[1] == CHAR('i'));
+    MOZ_RELEASE_ASSERT(outerBSV.Data()[2] == CHAR('\0'));
+    MOZ_RELEASE_ASSERT(outerBSV.Length() == 2);
+    MOZ_RELEASE_ASSERT(outerBSV.IsLiteral());
+    MOZ_RELEASE_ASSERT(!outerBSV.IsReference());
+  }
+
+  cb.Clear();
+
+  // Non-literal string, content is serialized.
+  std::basic_string<CHAR> hiString(hi);
+  MOZ_RELEASE_ASSERT(cb.PutObject(BSV(hiString)));
+  {
+    unsigned read = 0;
+    ProfilerStringView<CHAR> outerBSV;
+    cb.ReadEach([&](ProfileBufferEntryReader& aER) {
+      ++read;
+      auto bsv = aER.ReadObject<ProfilerStringView<CHAR>>();
+      MOZ_RELEASE_ASSERT(bsv.Data());
+      MOZ_RELEASE_ASSERT(bsv.Data() != hiString.data());
+      MOZ_RELEASE_ASSERT(bsv.Data()[0] == CHAR('h'));
+      MOZ_RELEASE_ASSERT(bsv.Data()[1] == CHAR('i'));
+      MOZ_RELEASE_ASSERT(bsv.Data()[2] == CHAR('\0'));
+      MOZ_RELEASE_ASSERT(bsv.Length() == 2);
+      // Special ownership case, neither a literal nor a reference!
+      MOZ_RELEASE_ASSERT(!bsv.IsLiteral());
+      MOZ_RELEASE_ASSERT(!bsv.IsReference());
+      // Test move of ownership.
+      outerBSV = std::move(bsv);
+      // NOLINTNEXTLINE(bugprone-use-after-move, clang-analyzer-cplusplus.Move)
+      MOZ_RELEASE_ASSERT(bsv.Length() == 0);
+    });
+    MOZ_RELEASE_ASSERT(read == 1);
+    MOZ_RELEASE_ASSERT(outerBSV.Data());
+    MOZ_RELEASE_ASSERT(outerBSV.Data() != hiString.data());
+    MOZ_RELEASE_ASSERT(outerBSV.Data()[0] == CHAR('h'));
+    MOZ_RELEASE_ASSERT(outerBSV.Data()[1] == CHAR('i'));
+    MOZ_RELEASE_ASSERT(outerBSV.Data()[2] == CHAR('\0'));
+    MOZ_RELEASE_ASSERT(outerBSV.Length() == 2);
+    MOZ_RELEASE_ASSERT(!outerBSV.IsLiteral());
+    MOZ_RELEASE_ASSERT(!outerBSV.IsReference());
+  }
+
+  if constexpr (std::is_same_v<CHAR, char>) {
+    printf("TestProfilerStringView<char> done\n");
+  } else if constexpr (std::is_same_v<CHAR, char16_t>) {
+    printf("TestProfilerStringView<char16_t> done\n");
+  }
+}
+
+void TestProfilerDependencies() {
+  TestPowerOfTwoMask();
+  TestPowerOfTwo();
+  TestLEB128();
+  TestChunk();
+  TestChunkManagerSingle();
+  TestChunkManagerWithLocalLimit();
+  TestControlledChunkManagerUpdate();
+  TestControlledChunkManagerWithLocalLimit();
+  TestChunkedBuffer();
+  TestChunkedBufferSingle();
+  TestModuloBuffer();
+  TestBlocksRingBufferAPI();
+  TestBlocksRingBufferUnderlyingBufferChanges();
+  TestBlocksRingBufferThreading();
+  TestBlocksRingBufferSerialization();
+  TestLiteralEmptyStringView();
+  TestProfilerStringView<char>();
+  TestProfilerStringView<char16_t>();
+}
+
+// Increase the depth, to a maximum (to avoid too-deep recursion).
+static constexpr size_t NextDepth(size_t aDepth) {
+  constexpr size_t MAX_DEPTH = 128;
+  return (aDepth < MAX_DEPTH) ? (aDepth + 1) : aDepth;
+}
+
+Atomic<bool, Relaxed> sStopFibonacci;
+
+// Compute fibonacci the hard way (recursively: `f(n)=f(n-1)+f(n-2)`), and
+// prevent inlining.
+// The template parameter makes each depth be a separate function, to better
+// distinguish them in the profiler output.
+template <size_t DEPTH = 0>
+MOZ_NEVER_INLINE unsigned long long Fibonacci(unsigned long long n) {
+  AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING("fib", OTHER, std::to_string(DEPTH));
+  if (n == 0) {
+    return 0;
+  }
+  if (n == 1) {
+    return 1;
+  }
+  if (DEPTH < 5 && sStopFibonacci) {
+    return 1'000'000'000;
+  }
+  TimeStamp start = TimeStamp::NowUnfuzzed();
+  static constexpr size_t MAX_MARKER_DEPTH = 10;
+  unsigned long long f2 = Fibonacci<NextDepth(DEPTH)>(n - 2);
+  if (DEPTH == 0) {
+    BASE_PROFILER_MARKER_UNTYPED("Half-way through Fibonacci", OTHER);
+  }
+  unsigned long long f1 = Fibonacci<NextDepth(DEPTH)>(n - 1);
+  if (DEPTH < MAX_MARKER_DEPTH) {
+    BASE_PROFILER_MARKER_TEXT("fib", OTHER,
+                              MarkerTiming::IntervalUntilNowFrom(start),
+                              std::to_string(DEPTH));
+  }
+  return f2 + f1;
+}
+
+void TestProfiler() {
+  printf("TestProfiler starting -- pid: %d, tid: %d\n",
+         baseprofiler::profiler_current_process_id(),
+         baseprofiler::profiler_current_thread_id());
+  // ::SleepMilli(10000);
+
+  TestProfilerDependencies();
+
+  {
+    printf("profiler_init()...\n");
+    AUTO_BASE_PROFILER_INIT;
+
+    MOZ_RELEASE_ASSERT(!baseprofiler::profiler_is_active());
+    MOZ_RELEASE_ASSERT(!baseprofiler::profiler_thread_is_being_profiled());
+    MOZ_RELEASE_ASSERT(!baseprofiler::profiler_thread_is_sleeping());
+
+    const int mainThreadId =
+        mozilla::baseprofiler::profiler_current_thread_id();
+
+    MOZ_RELEASE_ASSERT(mozilla::baseprofiler::profiler_main_thread_id() ==
+                       mainThreadId);
+    MOZ_RELEASE_ASSERT(mozilla::baseprofiler::profiler_is_main_thread());
+
+    std::thread testThread([&]() {
+      const int testThreadId =
+          mozilla::baseprofiler::profiler_current_thread_id();
+      MOZ_RELEASE_ASSERT(testThreadId != mainThreadId);
+
+      MOZ_RELEASE_ASSERT(mozilla::baseprofiler::profiler_main_thread_id() !=
+                         testThreadId);
+      MOZ_RELEASE_ASSERT(!mozilla::baseprofiler::profiler_is_main_thread());
+    });
+    testThread.join();
+
+    printf("profiler_start()...\n");
+    Vector<const char*> filters;
+    // Profile all registered threads.
+    MOZ_RELEASE_ASSERT(filters.append(""));
+    const uint32_t features = baseprofiler::ProfilerFeature::Leaf |
+                              baseprofiler::ProfilerFeature::StackWalk |
+                              baseprofiler::ProfilerFeature::Threads;
+    baseprofiler::profiler_start(baseprofiler::BASE_PROFILER_DEFAULT_ENTRIES,
+                                 BASE_PROFILER_DEFAULT_INTERVAL, features,
+                                 filters.begin(), filters.length());
+
+    MOZ_RELEASE_ASSERT(baseprofiler::profiler_is_active());
+    MOZ_RELEASE_ASSERT(baseprofiler::profiler_thread_is_being_profiled());
+    MOZ_RELEASE_ASSERT(!baseprofiler::profiler_thread_is_sleeping());
+
+    sStopFibonacci = false;
+
+    std::thread threadFib([]() {
+      AUTO_BASE_PROFILER_REGISTER_THREAD("fibonacci");
+      SleepMilli(5);
+      auto cause = baseprofiler::profiler_capture_backtrace();
+      AUTO_BASE_PROFILER_MARKER_TEXT(
+          "fibonacci", OTHER, MarkerStack::TakeBacktrace(std::move(cause)),
+          "First leaf call");
+      static const unsigned long long fibStart = 37;
+      printf("Fibonacci(%llu)...\n", fibStart);
+      AUTO_BASE_PROFILER_LABEL("Label around Fibonacci", OTHER);
+
+      unsigned long long f = Fibonacci(fibStart);
+      printf("Fibonacci(%llu) = %llu\n", fibStart, f);
+    });
+
+    std::thread threadCancelFib([]() {
+      AUTO_BASE_PROFILER_REGISTER_THREAD("fibonacci canceller");
+      SleepMilli(5);
+      AUTO_BASE_PROFILER_MARKER_TEXT("fibonacci", OTHER, {}, "Canceller");
+      static const int waitMaxSeconds = 10;
+      for (int i = 0; i < waitMaxSeconds; ++i) {
+        if (sStopFibonacci) {
+          AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING("fibCancel", OTHER,
+                                                  std::to_string(i));
+          return;
+        }
+        AUTO_BASE_PROFILER_THREAD_SLEEP;
+        SleepMilli(1000);
+      }
+      AUTO_BASE_PROFILER_LABEL_DYNAMIC_STRING("fibCancel", OTHER,
+                                              "Cancelling!");
+      sStopFibonacci = true;
+    });
+
+    {
+      AUTO_BASE_PROFILER_MARKER_TEXT("main thread", OTHER, {},
+                                     "joining fibonacci thread");
+      AUTO_BASE_PROFILER_THREAD_SLEEP;
+      threadFib.join();
+    }
+
+    {
+      AUTO_BASE_PROFILER_MARKER_TEXT("main thread", OTHER, {},
+                                     "joining fibonacci-canceller thread");
+      sStopFibonacci = true;
+      AUTO_BASE_PROFILER_THREAD_SLEEP;
+      threadCancelFib.join();
+    }
+
+    // Just making sure all payloads know how to (de)serialize and stream.
+
+    MOZ_RELEASE_ASSERT(
+        baseprofiler::AddMarker("markers 2.0 without options (omitted)",
+                                mozilla::baseprofiler::category::OTHER));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 without options (implicit brace-init)",
+        mozilla::baseprofiler::category::OTHER, {}));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 without options (explicit init)",
+        mozilla::baseprofiler::category::OTHER, MarkerOptions()));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 without options (explicit brace-init)",
+        mozilla::baseprofiler::category::OTHER, MarkerOptions{}));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 with one option (implicit)",
+        mozilla::baseprofiler::category::OTHER, MarkerInnerWindowId(123)));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 with one option (implicit brace-init)",
+        mozilla::baseprofiler::category::OTHER, {MarkerInnerWindowId(123)}));
+
+    MOZ_RELEASE_ASSERT(
+        baseprofiler::AddMarker("markers 2.0 with one option (explicit init)",
+                                mozilla::baseprofiler::category::OTHER,
+                                MarkerOptions(MarkerInnerWindowId(123))));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 with one option (explicit brace-init)",
+        mozilla::baseprofiler::category::OTHER,
+        MarkerOptions{MarkerInnerWindowId(123)}));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 with two options (implicit brace-init)",
+        mozilla::baseprofiler::category::OTHER,
+        {MarkerInnerWindowId(123), MarkerStack::Capture()}));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 with two options (explicit init)",
+        mozilla::baseprofiler::category::OTHER,
+        MarkerOptions(MarkerInnerWindowId(123), MarkerStack::Capture())));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "markers 2.0 with two options (explicit brace-init)",
+        mozilla::baseprofiler::category::OTHER,
+        MarkerOptions{MarkerInnerWindowId(123), MarkerStack::Capture()}));
+
+    MOZ_RELEASE_ASSERT(
+        baseprofiler::AddMarker("default-templated markers 2.0 without options",
+                                mozilla::baseprofiler::category::OTHER));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "default-templated markers 2.0 with option",
+        mozilla::baseprofiler::category::OTHER, MarkerInnerWindowId(123)));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "explicitly-default-templated markers 2.0 without options",
+        mozilla::baseprofiler::category::OTHER, {},
+        ::mozilla::baseprofiler::markers::NoPayload{}));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "explicitly-default-templated markers 2.0 with option",
+        mozilla::baseprofiler::category::OTHER, MarkerInnerWindowId(123),
+        ::mozilla::baseprofiler::markers::NoPayload{}));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "tracing", mozilla::baseprofiler::category::OTHER, {},
+        mozilla::baseprofiler::markers::Tracing{}, "category"));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "text", mozilla::baseprofiler::category::OTHER, {},
+        mozilla::baseprofiler::markers::TextMarker{}, "text text"));
+
+    MOZ_RELEASE_ASSERT(baseprofiler::AddMarker(
+        "media sample", mozilla::baseprofiler::category::OTHER, {},
+        mozilla::baseprofiler::markers::MediaSampleMarker{}, 123, 456));
+
+    printf("Sleep 1s...\n");
+    {
+      AUTO_BASE_PROFILER_THREAD_SLEEP;
+      SleepMilli(1000);
+    }
+
+    printf("baseprofiler_pause()...\n");
+    baseprofiler::profiler_pause();
+
+    Maybe<baseprofiler::ProfilerBufferInfo> info =
+        baseprofiler::profiler_get_buffer_info();
+    MOZ_RELEASE_ASSERT(info.isSome());
+    printf("Profiler buffer range: %llu .. %llu (%llu bytes)\n",
+           static_cast<unsigned long long>(info->mRangeStart),
+           static_cast<unsigned long long>(info->mRangeEnd),
+           // sizeof(ProfileBufferEntry) == 9
+           (static_cast<unsigned long long>(info->mRangeEnd) -
+            static_cast<unsigned long long>(info->mRangeStart)) *
+               9);
+    printf("Stats:         min(us) .. mean(us) .. max(us)  [count]\n");
+    printf("- Intervals:   %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+           info->mIntervalsUs.min,
+           info->mIntervalsUs.sum / info->mIntervalsUs.n,
+           info->mIntervalsUs.max, info->mIntervalsUs.n);
+    printf("- Overheads:   %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+           info->mOverheadsUs.min,
+           info->mOverheadsUs.sum / info->mOverheadsUs.n,
+           info->mOverheadsUs.max, info->mOverheadsUs.n);
+    printf("  - Locking:   %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+           info->mLockingsUs.min, info->mLockingsUs.sum / info->mLockingsUs.n,
+           info->mLockingsUs.max, info->mLockingsUs.n);
+    printf("  - Clearning: %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+           info->mCleaningsUs.min,
+           info->mCleaningsUs.sum / info->mCleaningsUs.n,
+           info->mCleaningsUs.max, info->mCleaningsUs.n);
+    printf("  - Counters:  %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+           info->mCountersUs.min, info->mCountersUs.sum / info->mCountersUs.n,
+           info->mCountersUs.max, info->mCountersUs.n);
+    printf("  - Threads:   %7.1f .. %7.1f  .. %7.1f  [%u]\n",
+           info->mThreadsUs.min, info->mThreadsUs.sum / info->mThreadsUs.n,
+           info->mThreadsUs.max, info->mThreadsUs.n);
+
+    printf("baseprofiler_get_profile()...\n");
+    UniquePtr<char[]> profile = baseprofiler::profiler_get_profile();
+
+    // Use a string view over the profile contents, for easier testing.
+    std::string_view profileSV = profile.get();
+
+    constexpr const auto svnpos = std::string_view::npos;
+    // TODO: Properly parse profile and check fields.
+    // Check for some expected marker schema JSON output.
+    MOZ_RELEASE_ASSERT(profileSV.find("\"markerSchema\": [") != svnpos);
+    MOZ_RELEASE_ASSERT(profileSV.find("\"name\": \"Text\",") != svnpos);
+    MOZ_RELEASE_ASSERT(profileSV.find("\"name\": \"tracing\",") != svnpos);
+    MOZ_RELEASE_ASSERT(profileSV.find("\"name\": \"MediaSample\",") != svnpos);
+    MOZ_RELEASE_ASSERT(profileSV.find("\"display\": [") != svnpos);
+    MOZ_RELEASE_ASSERT(profileSV.find("\"marker-chart\"") != svnpos);
+    MOZ_RELEASE_ASSERT(profileSV.find("\"marker-table\"") != svnpos);
+    MOZ_RELEASE_ASSERT(profileSV.find("\"format\": \"string\"") != svnpos);
+    // TODO: Add more checks for what's expected in the profile. Some of them
+    // are done in gtest's.
+
+    printf("baseprofiler_save_profile_to_file()...\n");
+    baseprofiler::profiler_save_profile_to_file("TestProfiler_profile.json");
+
+    printf("profiler_stop()...\n");
+    baseprofiler::profiler_stop();
+
+    MOZ_RELEASE_ASSERT(!baseprofiler::profiler_is_active());
+    MOZ_RELEASE_ASSERT(!baseprofiler::profiler_thread_is_being_profiled());
+    MOZ_RELEASE_ASSERT(!baseprofiler::profiler_thread_is_sleeping());
+
+    printf("profiler_shutdown()...\n");
+  }
+
+  printf("TestProfiler done\n");
+}
+
+// Minimal string escaping, similar to how C++ stringliterals should be entered,
+// to help update comparison strings in tests below.
+void printEscaped(std::string_view aString) {
+  for (const char c : aString) {
+    switch (c) {
+      case '\n':
+        fprintf(stderr, "\\n\n");
+        break;
+      case '"':
+        fprintf(stderr, "\\\"");
+        break;
+      case '\\':
+        fprintf(stderr, "\\\\");
+        break;
+      default:
+        if (c >= ' ' && c <= '~') {
+          fprintf(stderr, "%c", c);
+        } else {
+          fprintf(stderr, "\\x%02x", unsigned(c));
+        }
+        break;
+    }
+  }
+}
+
+// Run aF(SpliceableChunkedJSONWriter&, UniqueJSONStrings&) from inside a JSON
+// array, then output the string table, and compare the full output to
+// aExpected.
+template <typename F>
+static void VerifyUniqueStringContents(
+    F&& aF, std::string_view aExpectedData,
+    std::string_view aExpectedUniqueStrings,
+    mozilla::baseprofiler::UniqueJSONStrings* aUniqueStringsOrNull = nullptr) {
+  mozilla::baseprofiler::SpliceableChunkedJSONWriter writer;
+
+  // By default use a local UniqueJSONStrings, otherwise use the one provided.
+  mozilla::baseprofiler::UniqueJSONStrings localUniqueStrings(
+      mozilla::JSONWriter::SingleLineStyle);
+  mozilla::baseprofiler::UniqueJSONStrings& uniqueStrings =
+      aUniqueStringsOrNull ? *aUniqueStringsOrNull : localUniqueStrings;
+
+  writer.Start(mozilla::JSONWriter::SingleLineStyle);
+  {
+    writer.StartArrayProperty("data", mozilla::JSONWriter::SingleLineStyle);
+    { std::forward<F>(aF)(writer, uniqueStrings); }
+    writer.EndArray();
+
+    writer.StartArrayProperty("stringTable",
+                              mozilla::JSONWriter::SingleLineStyle);
+    { uniqueStrings.SpliceStringTableElements(writer); }
+    writer.EndArray();
+  }
+  writer.End();
+
+  UniquePtr<char[]> jsonString = writer.ChunkedWriteFunc().CopyData();
+  MOZ_RELEASE_ASSERT(jsonString);
+  std::string_view jsonStringView(jsonString.get());
+  std::string expected = "{\"data\": [";
+  expected += aExpectedData;
+  expected += "], \"stringTable\": [";
+  expected += aExpectedUniqueStrings;
+  expected += "]}\n";
+  if (jsonStringView != expected) {
+    fprintf(stderr,
+            "Expected:\n"
+            "------\n");
+    printEscaped(expected);
+    fprintf(stderr,
+            "\n"
+            "------\n"
+            "Actual:\n"
+            "------\n");
+    printEscaped(jsonStringView);
+    fprintf(stderr,
+            "\n"
+            "------\n");
+  }
+  MOZ_RELEASE_ASSERT(jsonStringView == expected);
+}
+
+void TestUniqueJSONStrings() {
+  printf("TestUniqueJSONStrings...\n");
+
+  using SCJW = mozilla::baseprofiler::SpliceableChunkedJSONWriter;
+  using UJS = mozilla::baseprofiler::UniqueJSONStrings;
+
+  // Empty everything.
+  VerifyUniqueStringContents([](SCJW& aWriter, UJS& aUniqueStrings) {}, "", "");
+
+  // Empty unique strings.
+  VerifyUniqueStringContents(
+      [](SCJW& aWriter, UJS& aUniqueStrings) {
+        aWriter.StringElement("string");
+      },
+      R"("string")", "");
+
+  // One unique string.
+  VerifyUniqueStringContents(
+      [](SCJW& aWriter, UJS& aUniqueStrings) {
+        aUniqueStrings.WriteElement(aWriter, "string");
+      },
+      "0", R"("string")");
+
+  // One unique string twice.
+  VerifyUniqueStringContents(
+      [](SCJW& aWriter, UJS& aUniqueStrings) {
+        aUniqueStrings.WriteElement(aWriter, "string");
+        aUniqueStrings.WriteElement(aWriter, "string");
+      },
+      "0, 0", R"("string")");
+
+  // Two single unique strings.
+  VerifyUniqueStringContents(
+      [](SCJW& aWriter, UJS& aUniqueStrings) {
+        aUniqueStrings.WriteElement(aWriter, "string0");
+        aUniqueStrings.WriteElement(aWriter, "string1");
+      },
+      "0, 1", R"("string0", "string1")");
+
+  // Two unique strings with repetition.
+  VerifyUniqueStringContents(
+      [](SCJW& aWriter, UJS& aUniqueStrings) {
+        aUniqueStrings.WriteElement(aWriter, "string0");
+        aUniqueStrings.WriteElement(aWriter, "string1");
+        aUniqueStrings.WriteElement(aWriter, "string0");
+      },
+      "0, 1, 0", R"("string0", "string1")");
+
+  // Mix some object properties, for coverage.
+  VerifyUniqueStringContents(
+      [](SCJW& aWriter, UJS& aUniqueStrings) {
+        aUniqueStrings.WriteElement(aWriter, "string0");
+        aWriter.StartObjectElement(mozilla::JSONWriter::SingleLineStyle);
+        {
+          aUniqueStrings.WriteProperty(aWriter, "p0", "prop");
+          aUniqueStrings.WriteProperty(aWriter, "p1", "string0");
+          aUniqueStrings.WriteProperty(aWriter, "p2", "prop");
+        }
+        aWriter.EndObject();
+        aUniqueStrings.WriteElement(aWriter, "string1");
+        aUniqueStrings.WriteElement(aWriter, "string0");
+        aUniqueStrings.WriteElement(aWriter, "prop");
+      },
+      R"(0, {"p0": 1, "p1": 0, "p2": 1}, 2, 0, 1)",
+      R"("string0", "prop", "string1")");
+
+  // Unique string table with pre-existing data.
+  {
+    UJS ujs(mozilla::JSONWriter::SingleLineStyle);
+    {
+      SCJW writer;
+      ujs.WriteElement(writer, "external0");
+      ujs.WriteElement(writer, "external1");
+      ujs.WriteElement(writer, "external0");
+    }
+    VerifyUniqueStringContents(
+        [](SCJW& aWriter, UJS& aUniqueStrings) {
+          aUniqueStrings.WriteElement(aWriter, "string0");
+          aUniqueStrings.WriteElement(aWriter, "string1");
+          aUniqueStrings.WriteElement(aWriter, "string0");
+        },
+        "2, 3, 2", R"("external0", "external1", "string0", "string1")", &ujs);
+  }
+
+  // Unique string table with pre-existing data from another table.
+  {
+    UJS ujs(mozilla::JSONWriter::SingleLineStyle);
+    {
+      SCJW writer;
+      ujs.WriteElement(writer, "external0");
+      ujs.WriteElement(writer, "external1");
+      ujs.WriteElement(writer, "external0");
+    }
+    UJS ujsCopy(ujs, mozilla::JSONWriter::SingleLineStyle);
+    VerifyUniqueStringContents(
+        [](SCJW& aWriter, UJS& aUniqueStrings) {
+          aUniqueStrings.WriteElement(aWriter, "string0");
+          aUniqueStrings.WriteElement(aWriter, "string1");
+          aUniqueStrings.WriteElement(aWriter, "string0");
+        },
+        "2, 3, 2", R"("external0", "external1", "string0", "string1")", &ujs);
+  }
+
+  // Unique string table through SpliceableJSONWriter.
+  VerifyUniqueStringContents(
+      [](SCJW& aWriter, UJS& aUniqueStrings) {
+        aWriter.SetUniqueStrings(aUniqueStrings);
+        aWriter.UniqueStringElement("string0");
+        aWriter.StartObjectElement(mozilla::JSONWriter::SingleLineStyle);
+        {
+          aWriter.UniqueStringProperty("p0", "prop");
+          aWriter.UniqueStringProperty("p1", "string0");
+          aWriter.UniqueStringProperty("p2", "prop");
+        }
+        aWriter.EndObject();
+        aWriter.UniqueStringElement("string1");
+        aWriter.UniqueStringElement("string0");
+        aWriter.UniqueStringElement("prop");
+        aWriter.ResetUniqueStrings();
+      },
+      R"(0, {"p0": 1, "p1": 0, "p2": 1}, 2, 0, 1)",
+      R"("string0", "prop", "string1")");
+
+  printf("TestUniqueJSONStrings done\n");
+}
+
+void StreamMarkers(const mozilla::ProfileChunkedBuffer& aBuffer,
+                   mozilla::baseprofiler::SpliceableJSONWriter& aWriter) {
+  aWriter.StartArrayProperty("data");
+  {
+    aBuffer.ReadEach([&](mozilla::ProfileBufferEntryReader& aEntryReader) {
+      mozilla::ProfileBufferEntryKind entryKind =
+          aEntryReader.ReadObject<mozilla::ProfileBufferEntryKind>();
+      MOZ_RELEASE_ASSERT(entryKind == mozilla::ProfileBufferEntryKind::Marker);
+
+      const bool success =
+          mozilla::base_profiler_markers_detail::DeserializeAfterKindAndStream(
+              aEntryReader, aWriter, 0, [&](mozilla::ProfileChunkedBuffer&) {
+                aWriter.StringElement("Real backtrace would be here");
+              });
+      MOZ_RELEASE_ASSERT(success);
+    });
+  }
+  aWriter.EndArray();
+}
+
+void PrintMarkers(const mozilla::ProfileChunkedBuffer& aBuffer) {
+  mozilla::baseprofiler::SpliceableJSONWriter writer(
+      mozilla::MakeUnique<mozilla::baseprofiler::OStreamJSONWriteFunc>(
+          std::cout));
+  mozilla::baseprofiler::UniqueJSONStrings uniqueStrings;
+  writer.SetUniqueStrings(uniqueStrings);
+  writer.Start();
+  {
+    StreamMarkers(aBuffer, writer);
+
+    writer.StartArrayProperty("stringTable");
+    { uniqueStrings.SpliceStringTableElements(writer); }
+    writer.EndArray();
+  }
+  writer.End();
+  writer.ResetUniqueStrings();
+}
+
+static void SubTestMarkerCategory(
+    const mozilla::MarkerCategory& aMarkerCategory,
+    const mozilla::baseprofiler::ProfilingCategoryPair& aProfilingCategoryPair,
+    const mozilla::baseprofiler::ProfilingCategory& aProfilingCategory) {
+  MOZ_RELEASE_ASSERT(aMarkerCategory.CategoryPair() == aProfilingCategoryPair,
+                     "Unexpected MarkerCategory::CategoryPair()");
+
+  MOZ_RELEASE_ASSERT(
+      mozilla::MarkerCategory(aProfilingCategoryPair).CategoryPair() ==
+          aProfilingCategoryPair,
+      "MarkerCategory(<name>).CategoryPair() should return <name>");
+
+  MOZ_RELEASE_ASSERT(aMarkerCategory.GetCategory() == aProfilingCategory,
+                     "Unexpected MarkerCategory::GetCategory()");
+
+  mozilla::ProfileBufferChunkManagerSingle chunkManager(512);
+  mozilla::ProfileChunkedBuffer buffer(
+      mozilla::ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
+  mozilla::ProfileBufferBlockIndex i = buffer.PutObject(aMarkerCategory);
+  MOZ_RELEASE_ASSERT(i != mozilla::ProfileBufferBlockIndex{},
+                     "Failed serialization");
+  buffer.ReadEach([&](mozilla::ProfileBufferEntryReader& aER,
+                      mozilla::ProfileBufferBlockIndex aIndex) {
+    MOZ_RELEASE_ASSERT(aIndex == i, "Unexpected deserialization index");
+    const auto readCategory = aER.ReadObject<mozilla::MarkerCategory>();
+    MOZ_RELEASE_ASSERT(aER.RemainingBytes() == 0,
+                       "Unexpected extra serialized bytes");
+    MOZ_RELEASE_ASSERT(readCategory.CategoryPair() == aProfilingCategoryPair,
+                       "Incorrect deserialization value");
+  });
+}
+
+void TestMarkerCategory() {
+  printf("TestMarkerCategory...\n");
+
+  mozilla::ProfileBufferChunkManagerSingle chunkManager(512);
+  mozilla::ProfileChunkedBuffer buffer(
+      mozilla::ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
+
+#  define CATEGORY_ENUM_BEGIN_CATEGORY(name, labelAsString, color)
+#  define CATEGORY_ENUM_SUBCATEGORY(supercategory, name, labelAsString)     \
+    static_assert(                                                          \
+        std::is_same_v<decltype(mozilla::baseprofiler::category::name),     \
+                       const mozilla::MarkerCategory>,                      \
+        "baseprofiler::category::<name> should be a const MarkerCategory"); \
+                                                                            \
+    SubTestMarkerCategory(                                                  \
+        mozilla::baseprofiler::category::name,                              \
+        mozilla::baseprofiler::ProfilingCategoryPair::name,                 \
+        mozilla::baseprofiler::ProfilingCategory::supercategory);
+#  define CATEGORY_ENUM_END_CATEGORY
+  MOZ_PROFILING_CATEGORY_LIST(CATEGORY_ENUM_BEGIN_CATEGORY,
+                              CATEGORY_ENUM_SUBCATEGORY,
+                              CATEGORY_ENUM_END_CATEGORY)
+#  undef CATEGORY_ENUM_BEGIN_CATEGORY
+#  undef CATEGORY_ENUM_SUBCATEGORY
+#  undef CATEGORY_ENUM_END_CATEGORY
+
+  printf("TestMarkerCategory done\n");
+}
+
+void TestMarkerThreadId() {
+  printf("TestMarkerThreadId...\n");
+
+  MOZ_RELEASE_ASSERT(MarkerThreadId{}.IsUnspecified());
+  MOZ_RELEASE_ASSERT(!MarkerThreadId::MainThread().IsUnspecified());
+  MOZ_RELEASE_ASSERT(!MarkerThreadId::CurrentThread().IsUnspecified());
+
+  MOZ_RELEASE_ASSERT(!MarkerThreadId{42}.IsUnspecified());
+  MOZ_RELEASE_ASSERT(MarkerThreadId{42}.ThreadId() == 42);
+
+  // We'll assume that this test runs in the main thread (which should be true
+  // when called from the `main` function).
+  MOZ_RELEASE_ASSERT(MarkerThreadId::MainThread().ThreadId() ==
+                     mozilla::baseprofiler::profiler_main_thread_id());
+
+  MOZ_RELEASE_ASSERT(MarkerThreadId::CurrentThread().ThreadId() ==
+                     mozilla::baseprofiler::profiler_current_thread_id());
+
+  MOZ_RELEASE_ASSERT(MarkerThreadId::CurrentThread().ThreadId() ==
+                     mozilla::baseprofiler::profiler_main_thread_id());
+
+  std::thread testThread([]() {
+    MOZ_RELEASE_ASSERT(!MarkerThreadId::MainThread().IsUnspecified());
+    MOZ_RELEASE_ASSERT(!MarkerThreadId::CurrentThread().IsUnspecified());
+
+    MOZ_RELEASE_ASSERT(MarkerThreadId::MainThread().ThreadId() ==
+                       mozilla::baseprofiler::profiler_main_thread_id());
+
+    MOZ_RELEASE_ASSERT(MarkerThreadId::CurrentThread().ThreadId() ==
+                       mozilla::baseprofiler::profiler_current_thread_id());
+
+    MOZ_RELEASE_ASSERT(MarkerThreadId::CurrentThread().ThreadId() !=
+                       mozilla::baseprofiler::profiler_main_thread_id());
+  });
+  testThread.join();
+
+  printf("TestMarkerThreadId done\n");
+}
+
+void TestMarkerNoPayload() {
+  printf("TestMarkerNoPayload...\n");
+
+  mozilla::ProfileBufferChunkManagerSingle chunkManager(512);
+  mozilla::ProfileChunkedBuffer buffer(
+      mozilla::ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
+
+  mozilla::ProfileBufferBlockIndex i0 =
+      mozilla::baseprofiler::AddMarkerToBuffer(
+          buffer, "literal", mozilla::baseprofiler::category::OTHER_Profiling);
+  MOZ_RELEASE_ASSERT(i0);
+
+  const std::string dynamic = "dynamic";
+  mozilla::ProfileBufferBlockIndex i1 =
+      mozilla::baseprofiler::AddMarkerToBuffer(
+          buffer, dynamic,
+          mozilla::baseprofiler::category::GRAPHICS_FlushingAsyncPaints, {});
+  MOZ_RELEASE_ASSERT(i1);
+  MOZ_RELEASE_ASSERT(i1 > i0);
+
+  mozilla::ProfileBufferBlockIndex i2 =
+      mozilla::baseprofiler::AddMarkerToBuffer(
+          buffer, std::string_view("string_view"),
+          mozilla::baseprofiler::category::GRAPHICS_FlushingAsyncPaints, {});
+  MOZ_RELEASE_ASSERT(i2);
+  MOZ_RELEASE_ASSERT(i2 > i1);
+
+#  ifdef DEBUG
+  buffer.Dump();
+#  endif
+
+  PrintMarkers(buffer);
+
+  printf("TestMarkerNoPayload done\n");
+}
+
+void TestUserMarker() {
+  printf("TestUserMarker...\n");
+
+  // User-defined marker type with text.
+  // It's fine to define it right in the function where it's used.
+  struct MarkerTypeTestMinimal {
+    static constexpr Span<const char> MarkerTypeName() {
+      return MakeStringSpan("test-minimal");
+    }
+    static void StreamJSONMarkerData(
+        mozilla::baseprofiler::SpliceableJSONWriter& aWriter,
+        const std::string& aText) {
+      aWriter.StringProperty("text", aText);
+    }
+    static mozilla::MarkerSchema MarkerTypeDisplay() {
+      using MS = mozilla::MarkerSchema;
+      MS schema{MS::Location::markerChart, MS::Location::markerTable};
+      schema.SetTooltipLabel("tooltip for test-minimal");
+      schema.AddKeyLabelFormatSearchable("text", "Text", MS::Format::string,
+                                         MS::Searchable::searchable);
+      return schema;
+    }
+  };
+
+  mozilla::ProfileBufferChunkManagerSingle chunkManager(1024);
+  mozilla::ProfileChunkedBuffer buffer(
+      mozilla::ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling, {},
+      MarkerTypeTestMinimal{}, std::string("payload text")));
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling,
+      mozilla::MarkerThreadId(123), MarkerTypeTestMinimal{},
+      std::string("ThreadId(123)")));
+
+  auto start = mozilla::TimeStamp::NowUnfuzzed();
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling,
+      mozilla::MarkerTiming::InstantAt(start), MarkerTypeTestMinimal{},
+      std::string("InstantAt(start)")));
+
+  auto then = mozilla::TimeStamp::NowUnfuzzed();
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling,
+      mozilla::MarkerTiming::IntervalStart(start), MarkerTypeTestMinimal{},
+      std::string("IntervalStart(start)")));
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling,
+      mozilla::MarkerTiming::IntervalEnd(then), MarkerTypeTestMinimal{},
+      std::string("IntervalEnd(then)")));
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling,
+      mozilla::MarkerTiming::Interval(start, then), MarkerTypeTestMinimal{},
+      std::string("Interval(start, then)")));
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling,
+      mozilla::MarkerTiming::IntervalUntilNowFrom(start),
+      MarkerTypeTestMinimal{}, std::string("IntervalUntilNowFrom(start)")));
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling,
+      mozilla::MarkerStack::NoStack(), MarkerTypeTestMinimal{},
+      std::string("NoStack")));
+  // Note: We cannot test stack-capture here, because the profiler is not
+  // initialized.
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, "test2", mozilla::baseprofiler::category::OTHER_Profiling,
+      mozilla::MarkerInnerWindowId(123), MarkerTypeTestMinimal{},
+      std::string("InnerWindowId(123)")));
+
+#  ifdef DEBUG
+  buffer.Dump();
+#  endif
+
+  PrintMarkers(buffer);
+
+  printf("TestUserMarker done\n");
+}
+
+void TestPredefinedMarkers() {
+  printf("TestPredefinedMarkers...\n");
+
+  mozilla::ProfileBufferChunkManagerSingle chunkManager(1024);
+  mozilla::ProfileChunkedBuffer buffer(
+      mozilla::ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, std::string_view("tracing"),
+      mozilla::baseprofiler::category::OTHER, {},
+      mozilla::baseprofiler::markers::Tracing{}, "category"));
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, std::string_view("text"), mozilla::baseprofiler::category::OTHER,
+      {}, mozilla::baseprofiler::markers::TextMarker{}, "text text"));
+
+  MOZ_RELEASE_ASSERT(mozilla::baseprofiler::AddMarkerToBuffer(
+      buffer, std::string_view("media"), mozilla::baseprofiler::category::OTHER,
+      {}, mozilla::baseprofiler::markers::MediaSampleMarker{}, 123, 456));
+
+#  ifdef DEBUG
+  buffer.Dump();
+#  endif
+
+  PrintMarkers(buffer);
+
+  printf("TestPredefinedMarkers done\n");
+}
+
+void TestProfilerMarkers() {
+  printf("TestProfilerMarkers -- pid: %d, tid: %d\n",
+         mozilla::baseprofiler::profiler_current_process_id(),
+         mozilla::baseprofiler::profiler_current_thread_id());
+  // ::SleepMilli(10000);
+
+  TestUniqueJSONStrings();
+  TestMarkerCategory();
+  TestMarkerThreadId();
+  TestMarkerNoPayload();
+  TestUserMarker();
+  TestPredefinedMarkers();
+
+  printf("TestProfilerMarkers done\n");
+}
+
+#else  // MOZ_GECKO_PROFILER
+
+// Testing that macros are still #defined (but do nothing) when
+// MOZ_GECKO_PROFILER is disabled.
+void TestProfiler() {
+  // These don't need to make sense, we just want to know that they're defined
+  // and don't do anything.
+
+#  ifndef AUTO_BASE_PROFILER_INIT
+#    error AUTO_BASE_PROFILER_INIT not #defined
+#  endif  // AUTO_BASE_PROFILER_INIT
+  AUTO_BASE_PROFILER_INIT;
+
+  // This wouldn't build if the macro did output its arguments.
+#  ifndef AUTO_BASE_PROFILER_MARKER_TEXT
+#    error AUTO_BASE_PROFILER_MARKER_TEXT not #defined
+#  endif  // AUTO_BASE_PROFILER_MARKER_TEXT
+  AUTO_BASE_PROFILER_MARKER_TEXT(catch, catch, catch, catch);
+
+#  ifndef AUTO_BASE_PROFILER_LABEL
+#    error AUTO_BASE_PROFILER_LABEL not #defined
+#  endif  // AUTO_BASE_PROFILER_LABEL
+  AUTO_BASE_PROFILER_LABEL(catch, catch);
+
+#  ifndef AUTO_BASE_PROFILER_THREAD_SLEEP
+#    error AUTO_BASE_PROFILER_THREAD_SLEEP not #defined
+#  endif  // AUTO_BASE_PROFILER_THREAD_SLEEP
+  AUTO_BASE_PROFILER_THREAD_SLEEP;
+
+#  ifndef BASE_PROFILER_MARKER_UNTYPED
+#    error BASE_PROFILER_MARKER_UNTYPED not #defined
+#  endif  // BASE_PROFILER_MARKER_UNTYPED
+  BASE_PROFILER_MARKER_UNTYPED(catch, catch);
+  BASE_PROFILER_MARKER_UNTYPED(catch, catch, catch);
+
+#  ifndef BASE_PROFILER_MARKER
+#    error BASE_PROFILER_MARKER not #defined
+#  endif  // BASE_PROFILER_MARKER
+  BASE_PROFILER_MARKER(catch, catch, catch, catch);
+  BASE_PROFILER_MARKER(catch, catch, catch, catch, catch);
+
+#  ifndef BASE_PROFILER_MARKER_TEXT
+#    error BASE_PROFILER_MARKER_TEXT not #defined
+#  endif  // BASE_PROFILER_MARKER_TEXT
+  BASE_PROFILER_MARKER_TEXT(catch, catch, catch, catch);
+
+  MOZ_RELEASE_ASSERT(!mozilla::baseprofiler::profiler_get_backtrace(),
+                     "profiler_get_backtrace should return nullptr");
+  mozilla::ProfileChunkedBuffer buffer;
+  MOZ_RELEASE_ASSERT(
+      !mozilla::baseprofiler::profiler_capture_backtrace_into(buffer),
+      "profiler_capture_backtrace_into should return false");
+  MOZ_RELEASE_ASSERT(!mozilla::baseprofiler::profiler_capture_backtrace(),
+                     "profiler_capture_backtrace should return nullptr");
+}
+
+// Testing that macros are still #defined (but do nothing) when
+// MOZ_GECKO_PROFILER is disabled.
+void TestProfilerMarkers() {
+  // These don't need to make sense, we just want to know that they're defined
+  // and don't do anything.
+}
+
+#endif  // MOZ_GECKO_PROFILER else
+
+#if defined(XP_WIN)
+int wmain()
+#else
+int main()
+#endif  // defined(XP_WIN)
+{
+#ifdef MOZ_GECKO_PROFILER
+  printf("BaseTestProfiler -- pid: %d, tid: %d\n",
+         baseprofiler::profiler_current_process_id(),
+         baseprofiler::profiler_current_thread_id());
+  // ::SleepMilli(10000);
+#endif  // MOZ_GECKO_PROFILER
+
+  // Note that there are two `TestProfiler{,Markers}` functions above, depending
+  // on whether MOZ_GECKO_PROFILER is #defined.
+  TestProfiler();
+  TestProfilerMarkers();
+
+  return 0;
+}
diff --git a/mozglue/tests/TestNativeNt.cpp b/mozglue/tests/TestNativeNt.cpp
new file mode 100644
index 0000000000..77cd3ad4a3
--- /dev/null
+++ b/mozglue/tests/TestNativeNt.cpp
@@ -0,0 +1,295 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "nscore.h"
+#include "mozilla/NativeNt.h"
+#include "mozilla/ThreadLocal.h"
+#include "mozilla/UniquePtr.h"
+
+#include <stdio.h>
+#include <windows.h>
+
+const wchar_t kNormal[] = L"Foo.dll";
+const wchar_t kHex12[] = L"Foo.ABCDEF012345.dll";
+const wchar_t kHex15[] = L"ABCDEF012345678.dll";
+const wchar_t kHex16[] = L"ABCDEF0123456789.dll";
+const wchar_t kHex17[] = L"ABCDEF0123456789a.dll";
+const wchar_t kHex24[] = L"ABCDEF0123456789cdabef98.dll";
+const wchar_t kHex8[] = L"01234567.dll";
+const wchar_t kNonHex12[] = L"Foo.ABCDEFG12345.dll";
+const wchar_t kHex13[] = L"Foo.ABCDEF0123456.dll";
+const wchar_t kHex11[] = L"Foo.ABCDEF01234.dll";
+const wchar_t kPrefixedHex16[] = L"Pabcdef0123456789.dll";
+const uint32_t kTlsDataValue = 1234;
+static MOZ_THREAD_LOCAL(uint32_t) sTlsData;
+
+const char kFailFmt[] =
+    "TEST-FAILED | NativeNt | %s(%s) should have returned %s but did not\n";
+
+#define RUN_TEST(fn, varName, expected)         \
+  if (fn(varName) == !expected) {               \
+    printf(kFailFmt, #fn, #varName, #expected); \
+    return 1;                                   \
+  }
+
+#define EXPECT_FAIL(fn, varName) RUN_TEST(fn, varName, false)
+
+#define EXPECT_SUCCESS(fn, varName) RUN_TEST(fn, varName, true)
+
+using namespace mozilla;
+using namespace mozilla::nt;
+
+bool TestVirtualQuery(HANDLE aProcess, LPCVOID aAddress) {
+  MEMORY_BASIC_INFORMATION info1 = {}, info2 = {};
+  SIZE_T result1 = ::VirtualQueryEx(aProcess, aAddress, &info1, sizeof(info1)),
+         result2 = mozilla::nt::VirtualQueryEx(aProcess, aAddress, &info2,
+                                               sizeof(info2));
+  if (result1 != result2) {
+    printf("TEST-FAILED | NativeNt | The returned values mismatch\n");
+    return false;
+  }
+
+  if (!result1) {
+    // Both APIs failed.
+    return true;
+  }
+
+  if (memcmp(&info1, &info2, result1) != 0) {
+    printf("TEST-FAILED | NativeNt | The returned structures mismatch\n");
+    return false;
+  }
+
+  return true;
+}
+
+LauncherResult<HMODULE> GetModuleHandleFromLeafName(const wchar_t* aName) {
+  UNICODE_STRING name;
+  ::RtlInitUnicodeString(&name, aName);
+  return nt::GetModuleHandleFromLeafName(name);
+}
+
+// Need a non-inline function to bypass compiler optimization that the thread
+// local storage pointer is cached in a register before accessing a thread-local
+// variable.
+MOZ_NEVER_INLINE PVOID SwapThreadLocalStoragePointer(PVOID aNewValue) {
+  auto oldValue = RtlGetThreadLocalStoragePointer();
+  RtlSetThreadLocalStoragePointerForTestingOnly(aNewValue);
+  return oldValue;
+}
+
+int wmain(int argc, wchar_t* argv[]) {
+  UNICODE_STRING normal;
+  ::RtlInitUnicodeString(&normal, kNormal);
+
+  UNICODE_STRING hex12;
+  ::RtlInitUnicodeString(&hex12, kHex12);
+
+  UNICODE_STRING hex16;
+  ::RtlInitUnicodeString(&hex16, kHex16);
+
+  UNICODE_STRING hex24;
+  ::RtlInitUnicodeString(&hex24, kHex24);
+
+  UNICODE_STRING hex8;
+  ::RtlInitUnicodeString(&hex8, kHex8);
+
+  UNICODE_STRING nonHex12;
+  ::RtlInitUnicodeString(&nonHex12, kNonHex12);
+
+  UNICODE_STRING hex13;
+  ::RtlInitUnicodeString(&hex13, kHex13);
+
+  UNICODE_STRING hex11;
+  ::RtlInitUnicodeString(&hex11, kHex11);
+
+  UNICODE_STRING hex15;
+  ::RtlInitUnicodeString(&hex15, kHex15);
+
+  UNICODE_STRING hex17;
+  ::RtlInitUnicodeString(&hex17, kHex17);
+
+  UNICODE_STRING prefixedHex16;
+  ::RtlInitUnicodeString(&prefixedHex16, kPrefixedHex16);
+
+  EXPECT_FAIL(Contains12DigitHexString, normal);
+  EXPECT_SUCCESS(Contains12DigitHexString, hex12);
+  EXPECT_FAIL(Contains12DigitHexString, hex13);
+  EXPECT_FAIL(Contains12DigitHexString, hex11);
+  EXPECT_FAIL(Contains12DigitHexString, hex16);
+  EXPECT_FAIL(Contains12DigitHexString, nonHex12);
+
+  EXPECT_FAIL(IsFileNameAtLeast16HexDigits, normal);
+  EXPECT_FAIL(IsFileNameAtLeast16HexDigits, hex12);
+  EXPECT_SUCCESS(IsFileNameAtLeast16HexDigits, hex24);
+  EXPECT_SUCCESS(IsFileNameAtLeast16HexDigits, hex16);
+  EXPECT_SUCCESS(IsFileNameAtLeast16HexDigits, hex17);
+  EXPECT_FAIL(IsFileNameAtLeast16HexDigits, hex8);
+  EXPECT_FAIL(IsFileNameAtLeast16HexDigits, hex15);
+  EXPECT_FAIL(IsFileNameAtLeast16HexDigits, prefixedHex16);
+
+  if (RtlGetProcessHeap() != ::GetProcessHeap()) {
+    printf("TEST-FAILED | NativeNt | RtlGetProcessHeap() is broken\n");
+    return 1;
+  }
+
+#ifdef HAVE_SEH_EXCEPTIONS
+  PVOID origTlsHead = nullptr;
+  bool isExceptionThrown = false;
+  // Touch sTlsData.get() several times to prevent the call to sTlsData.set()
+  // from being optimized out in PGO build.
+  printf("sTlsData#1 = %08x\n", sTlsData.get());
+  MOZ_SEH_TRY {
+    // Need to call SwapThreadLocalStoragePointer inside __try to make sure
+    // accessing sTlsData is caught by SEH.  This is due to clang's design.
+    // https://bugs.llvm.org/show_bug.cgi?id=44174.
+    origTlsHead = SwapThreadLocalStoragePointer(nullptr);
+    sTlsData.set(~kTlsDataValue);
+  }
+  MOZ_SEH_EXCEPT(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION
+                     ? EXCEPTION_EXECUTE_HANDLER
+                     : EXCEPTION_CONTINUE_SEARCH) {
+    isExceptionThrown = true;
+  }
+  SwapThreadLocalStoragePointer(origTlsHead);
+  printf("sTlsData#2 = %08x\n", sTlsData.get());
+  sTlsData.set(kTlsDataValue);
+  printf("sTlsData#3 = %08x\n", sTlsData.get());
+  if (!isExceptionThrown || sTlsData.get() != kTlsDataValue) {
+    printf(
+        "TEST-FAILED | NativeNt | RtlGetThreadLocalStoragePointer() is "
+        "broken\n");
+    return 1;
+  }
+#endif
+
+  if (RtlGetCurrentThreadId() != ::GetCurrentThreadId()) {
+    printf("TEST-FAILED | NativeNt | RtlGetCurrentThreadId() is broken\n");
+    return 1;
+  }
+
+  const wchar_t kKernel32[] = L"kernel32.dll";
+  DWORD verInfoSize = ::GetFileVersionInfoSizeW(kKernel32, nullptr);
+  if (!verInfoSize) {
+    printf(
+        "TEST-FAILED | NativeNt | Call to GetFileVersionInfoSizeW failed with "
+        "code %lu\n",
+        ::GetLastError());
+    return 1;
+  }
+
+  auto verInfoBuf = MakeUnique<char[]>(verInfoSize);
+
+  if (!::GetFileVersionInfoW(kKernel32, 0, verInfoSize, verInfoBuf.get())) {
+    printf(
+        "TEST-FAILED | NativeNt | Call to GetFileVersionInfoW failed with code "
+        "%lu\n",
+        ::GetLastError());
+    return 1;
+  }
+
+  UINT len;
+  VS_FIXEDFILEINFO* fixedFileInfo = nullptr;
+  if (!::VerQueryValueW(verInfoBuf.get(), L"\\", (LPVOID*)&fixedFileInfo,
+                        &len)) {
+    printf(
+        "TEST-FAILED | NativeNt | Call to VerQueryValueW failed with code "
+        "%lu\n",
+        ::GetLastError());
+    return 1;
+  }
+
+  const uint64_t expectedVersion =
+      (static_cast<uint64_t>(fixedFileInfo->dwFileVersionMS) << 32) |
+      static_cast<uint64_t>(fixedFileInfo->dwFileVersionLS);
+
+  PEHeaders k32headers(::GetModuleHandleW(kKernel32));
+  if (!k32headers) {
+    printf(
+        "TEST-FAILED | NativeNt | Failed parsing kernel32.dll's PE headers\n");
+    return 1;
+  }
+
+  uint64_t version;
+  if (!k32headers.GetVersionInfo(version)) {
+    printf(
+        "TEST-FAILED | NativeNt | Unable to obtain version information from "
+        "kernel32.dll\n");
+    return 1;
+  }
+
+  if (version != expectedVersion) {
+    printf(
+        "TEST-FAILED | NativeNt | kernel32.dll's detected version "
+        "(0x%016llX) does not match expected version (0x%016llX)\n",
+        version, expectedVersion);
+    return 1;
+  }
+
+  Maybe<Span<IMAGE_THUNK_DATA>> iatThunks =
+      k32headers.GetIATThunksForModule("kernel32.dll");
+  if (iatThunks) {
+    printf(
+        "TEST-FAILED | NativeNt | Detected the IAT thunk for kernel32 "
+        "in kernel32.dll\n");
+    return 1;
+  }
+
+  PEHeaders ntdllheaders(::GetModuleHandleW(L"ntdll.dll"));
+
+  auto ntdllBoundaries = ntdllheaders.GetBounds();
+  if (!ntdllBoundaries) {
+    printf(
+        "TEST-FAILED | NativeNt | "
+        "Unable to obtain the boundaries of ntdll.dll\n");
+    return 1;
+  }
+
+  iatThunks =
+      k32headers.GetIATThunksForModule("ntdll.dll", ntdllBoundaries.ptr());
+  if (!iatThunks) {
+    printf(
+        "TEST-FAILED | NativeNt | Unable to find the IAT thunk for "
+        "ntdll.dll in kernel32.dll\n");
+    return 1;
+  }
+
+  // To test the Ex version of API, we purposely get a real handle
+  // instead of a pseudo handle.
+  nsAutoHandle process(
+      ::OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, GetCurrentProcessId()));
+  if (!process) {
+    printf("TEST-FAILED | NativeNt | OpenProcess() failed - %08lx\n",
+           ::GetLastError());
+    return 1;
+  }
+
+  // Test Null page, Heap, Mapped image, and Invalid handle
+  if (!TestVirtualQuery(process, nullptr) || !TestVirtualQuery(process, argv) ||
+      !TestVirtualQuery(process, kNormal) ||
+      !TestVirtualQuery(nullptr, kNormal)) {
+    return 1;
+  }
+
+  auto moduleResult = GetModuleHandleFromLeafName(kKernel32);
+  if (moduleResult.isErr() ||
+      moduleResult.inspect() != k32headers.template RVAToPtr<HMODULE>(0)) {
+    printf(
+        "TEST-FAILED | NativeNt | "
+        "GetModuleHandleFromLeafName returns a wrong value.\n");
+    return 1;
+  }
+
+  moduleResult = GetModuleHandleFromLeafName(L"invalid");
+  if (moduleResult.isOk()) {
+    printf(
+        "TEST-FAILED | NativeNt | "
+        "GetModuleHandleFromLeafName unexpectedly returns a value.\n");
+    return 1;
+  }
+
+  printf("TEST-PASS | NativeNt | All tests ran successfully\n");
+  return 0;
+}
diff --git a/mozglue/tests/TestPEExportSection.cpp b/mozglue/tests/TestPEExportSection.cpp
new file mode 100644
index 0000000000..0a4d33255f
--- /dev/null
+++ b/mozglue/tests/TestPEExportSection.cpp
@@ -0,0 +1,698 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+// This test makes sure mozilla::nt::PEExportSection can parse the export
+// section of a local process, and a remote process even though it's
+// modified by an external code.
+
+#include "mozilla/CmdLineAndEnvUtils.h"
+#include "mozilla/NativeNt.h"
+#include "nsWindowsDllInterceptor.h"
+
+#include <stdio.h>
+#include <windows.h>
+
+#define EXPORT_FUNCTION_EQ(name, func) \
+  (GetProcAddress(imageBase, name) == reinterpret_cast<void*>(func))
+
+#define VERIFY_EXPORT_FUNCTION(tables, name, expected, errorMessage)        \
+  do {                                                                      \
+    if (tables.GetProcAddress(name) != reinterpret_cast<void*>(expected)) { \
+      printf("TEST-FAILED | TestPEExportSection | %s", errorMessage);       \
+      return kTestFail;                                                     \
+    }                                                                       \
+  } while (0)
+
+using namespace mozilla::nt;
+using mozilla::interceptor::MMPolicyInProcess;
+using mozilla::interceptor::MMPolicyOutOfProcess;
+using LocalPEExportSection = PEExportSection<MMPolicyInProcess>;
+using RemotePEExportSection = PEExportSection<MMPolicyOutOfProcess>;
+
+constexpr DWORD kEventTimeoutinMs = 5000;
+const wchar_t kProcessControlEventName[] =
+    L"TestPEExportSection.Process.Control.Event";
+
+enum TestResult : int {
+  kTestSuccess = 0,
+  kTestFail,
+  kTestSkip,
+};
+
+// These strings start with the same keyword to make sure we don't do substring
+// match.  Moreover, kSecretFunctionInvalid is purposely longer than the
+// combination of the other two strings and located in between the other two
+// strings to effectively test binary search.
+const char kSecretFunction[] = "Secret";
+const char kSecretFunctionInvalid[] = "Secret invalid long name";
+const char kSecretFunctionWithSuffix[] = "Secret2";
+
+const wchar_t* kNoModification = L"--NoModification";
+const wchar_t* kNoExport = L"--NoExport";
+const wchar_t* kModifyTableEntry = L"--ModifyTableEntry";
+const wchar_t* kModifyTable = L"--ModifyTable";
+const wchar_t* kModifyDirectoryEntry = L"--ModifyDirectoryEntry";
+const wchar_t* kExportByOrdinal = L"--ExportByOrdinal";
+
+// Use the global variable to pass the child process's error status to the
+// parent process.  We don't use a process's exit code to keep the test simple.
+int gChildProcessStatus = 0;
+
+// These functions are exported by linker or export section tampering at
+// runtime.  Each of function bodies needs to be different to avoid ICF.
+extern "C" __declspec(dllexport) int Export1() { return 0; }
+extern "C" __declspec(dllexport) int Export2() { return 1; }
+int SecretFunction1() { return 100; }
+int SecretFunction2() { return 101; }
+
+// This class allocates a writable region downstream of the mapped image
+// and prepares it as a valid export section.
+class ExportDirectoryPatcher final {
+  static constexpr int kRegionAllocationTryLimit = 100;
+  static constexpr int kNumOfTableEntries = 2;
+  // VirtualAlloc sometimes fails if a desired base address is too small.
+  // Define a minimum desired base to reduce the number of allocation tries.
+  static constexpr uintptr_t kMinimumAllocationPoint = 0x8000000;
+
+  struct ExportDirectory {
+    IMAGE_EXPORT_DIRECTORY mDirectoryHeader;
+    DWORD mExportAddressTable[kNumOfTableEntries];
+    DWORD mExportNameTable[kNumOfTableEntries];
+    WORD mExportOrdinalTable[kNumOfTableEntries];
+    char mNameBuffer1[sizeof(kSecretFunction)];
+    char mNameBuffer2[sizeof(kSecretFunctionWithSuffix)];
+
+    template <typename T>
+    static DWORD PtrToRVA(T aPtr, uintptr_t aBase) {
+      return reinterpret_cast<uintptr_t>(aPtr) - aBase;
+    }
+
+    explicit ExportDirectory(uintptr_t aImageBase) : mDirectoryHeader{} {
+      mDirectoryHeader.Base = 1;
+      mExportAddressTable[0] = PtrToRVA(SecretFunction1, aImageBase);
+      mExportAddressTable[1] = PtrToRVA(SecretFunction2, aImageBase);
+      mExportNameTable[0] = PtrToRVA(mNameBuffer1, aImageBase);
+      mExportNameTable[1] = PtrToRVA(mNameBuffer2, aImageBase);
+      mExportOrdinalTable[0] = 0;
+      mExportOrdinalTable[1] = 1;
+      strcpy(mNameBuffer1, kSecretFunction);
+      strcpy(mNameBuffer2, kSecretFunctionWithSuffix);
+    }
+  };
+
+  uintptr_t mImageBase;
+  ExportDirectory* mNewExportDirectory;
+
+  DWORD PtrToRVA(const void* aPtr) const {
+    return reinterpret_cast<uintptr_t>(aPtr) - mImageBase;
+  }
+
+ public:
+  explicit ExportDirectoryPatcher(HMODULE aModule)
+      : mImageBase(PEHeaders::HModuleToBaseAddr<uintptr_t>(aModule)),
+        mNewExportDirectory(nullptr) {
+    SYSTEM_INFO si = {};
+    ::GetSystemInfo(&si);
+
+    int numPagesRequired = ((sizeof(ExportDirectory) - 1) / si.dwPageSize) + 1;
+
+    uintptr_t desiredBase = mImageBase + si.dwAllocationGranularity;
+    desiredBase = std::max(desiredBase, kMinimumAllocationPoint);
+
+    for (int i = 0; i < kRegionAllocationTryLimit; ++i) {
+      void* allocated =
+          ::VirtualAlloc(reinterpret_cast<void*>(desiredBase),
+                         numPagesRequired * si.dwPageSize,
+                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+      if (allocated) {
+        // Use the end of a allocated page as ExportDirectory in order to test
+        // the boundary between a commit page and a non-commited page.
+        allocated = reinterpret_cast<uint8_t*>(allocated) +
+                    (numPagesRequired * si.dwPageSize) -
+                    sizeof(ExportDirectory);
+        mNewExportDirectory = new (allocated) ExportDirectory(mImageBase);
+        return;
+      }
+
+      desiredBase += si.dwAllocationGranularity;
+    }
+
+    gChildProcessStatus = kTestSkip;
+    printf(
+        "TEST-SKIP | TestPEExportSection | "
+        "Giving up finding an allocatable space following the mapped image.\n");
+  }
+
+  ~ExportDirectoryPatcher() {
+    // Intentionally leave mNewExportDirectory leaked to keep a patched data
+    // available until the process is terminated.
+  }
+
+  explicit operator bool() const { return !!mNewExportDirectory; }
+
+  void PopulateDirectory(IMAGE_EXPORT_DIRECTORY& aOutput) const {
+    aOutput.NumberOfFunctions = aOutput.NumberOfNames = kNumOfTableEntries;
+    aOutput.AddressOfFunctions =
+        PtrToRVA(mNewExportDirectory->mExportAddressTable);
+    aOutput.AddressOfNames = PtrToRVA(mNewExportDirectory->mExportNameTable);
+    aOutput.AddressOfNameOrdinals =
+        PtrToRVA(mNewExportDirectory->mExportOrdinalTable);
+  }
+
+  void PopulateDirectoryEntry(IMAGE_DATA_DIRECTORY& aOutput) const {
+    PopulateDirectory(mNewExportDirectory->mDirectoryHeader);
+    aOutput.VirtualAddress = PtrToRVA(&mNewExportDirectory->mDirectoryHeader);
+    aOutput.Size = sizeof(ExportDirectory);
+  }
+};
+
+// This exports SecretFunction1 as "Export1" by replacing an entry of the
+// export address table.
+void ModifyExportAddressTableEntry() {
+  MMPolicyInProcess policy;
+  HMODULE imageBase = ::GetModuleHandleW(nullptr);
+  auto ourExe = LocalPEExportSection::Get(imageBase, policy);
+
+  auto addressTableEntry =
+      const_cast<DWORD*>(ourExe.FindExportAddressTableEntry("Export1"));
+  if (!addressTableEntry) {
+    gChildProcessStatus = kTestFail;
+    return;
+  }
+
+  mozilla::AutoVirtualProtect protection(
+      addressTableEntry, sizeof(*addressTableEntry), PAGE_READWRITE);
+  if (!protection) {
+    gChildProcessStatus = kTestFail;
+    return;
+  }
+
+  *addressTableEntry = reinterpret_cast<uintptr_t>(SecretFunction1) -
+                       PEHeaders::HModuleToBaseAddr<uintptr_t>(imageBase);
+
+  if (!EXPORT_FUNCTION_EQ("Export1", SecretFunction1) ||
+      !EXPORT_FUNCTION_EQ("Export2", Export2)) {
+    gChildProcessStatus = kTestFail;
+  }
+}
+
+// This switches the entire address table into one exporting SecretFunction1
+// and SecretFunction2.
+void ModifyExportAddressTable() {
+  MMPolicyInProcess policy;
+  HMODULE imageBase = ::GetModuleHandleW(nullptr);
+  auto ourExe = LocalPEExportSection::Get(imageBase, policy);
+
+  auto exportDirectory = ourExe.GetExportDirectory();
+  if (!exportDirectory) {
+    gChildProcessStatus = kTestFail;
+    return;
+  }
+
+  mozilla::AutoVirtualProtect protection(
+      exportDirectory, sizeof(*exportDirectory), PAGE_READWRITE);
+  if (!protection) {
+    gChildProcessStatus = kTestFail;
+    return;
+  }
+
+  ExportDirectoryPatcher patcher(imageBase);
+  if (!patcher) {
+    return;
+  }
+
+  patcher.PopulateDirectory(*exportDirectory);
+
+  if (GetProcAddress(imageBase, "Export1") ||
+      GetProcAddress(imageBase, "Export2") ||
+      !EXPORT_FUNCTION_EQ(kSecretFunction, SecretFunction1) ||
+      !EXPORT_FUNCTION_EQ(kSecretFunctionWithSuffix, SecretFunction2)) {
+    gChildProcessStatus = kTestFail;
+  }
+}
+
+// This hides all export functions by setting the table size to 0.
+void HideExportSection() {
+  HMODULE imageBase = ::GetModuleHandleW(nullptr);
+  PEHeaders ourExe(imageBase);
+
+  auto sectionTable =
+      ourExe.GetImageDirectoryEntryPtr(IMAGE_DIRECTORY_ENTRY_EXPORT);
+
+  mozilla::AutoVirtualProtect protection(sectionTable, sizeof(*sectionTable),
+                                         PAGE_READWRITE);
+  if (!protection) {
+    gChildProcessStatus = kTestFail;
+    return;
+  }
+
+  sectionTable->VirtualAddress = sectionTable->Size = 0;
+
+  if (GetProcAddress(imageBase, "Export1") ||
+      GetProcAddress(imageBase, "Export2")) {
+    gChildProcessStatus = kTestFail;
+  }
+}
+
+// This makes the export directory entry point to a new export section
+// which exports SecretFunction1 and SecretFunction2.
+void ModifyExportDirectoryEntry() {
+  HMODULE imageBase = ::GetModuleHandleW(nullptr);
+  PEHeaders ourExe(imageBase);
+
+  auto sectionTable =
+      ourExe.GetImageDirectoryEntryPtr(IMAGE_DIRECTORY_ENTRY_EXPORT);
+
+  mozilla::AutoVirtualProtect protection(sectionTable, sizeof(*sectionTable),
+                                         PAGE_READWRITE);
+  if (!protection) {
+    gChildProcessStatus = kTestFail;
+    return;
+  }
+
+  ExportDirectoryPatcher patcher(imageBase);
+  if (!patcher) {
+    return;
+  }
+
+  patcher.PopulateDirectoryEntry(*sectionTable);
+
+  if (GetProcAddress(imageBase, "Export1") ||
+      GetProcAddress(imageBase, "Export2") ||
+      !EXPORT_FUNCTION_EQ(kSecretFunction, SecretFunction1) ||
+      !EXPORT_FUNCTION_EQ(kSecretFunctionWithSuffix, SecretFunction2)) {
+    gChildProcessStatus = kTestFail;
+  }
+}
+
+// This exports functions only by Ordinal by hiding the export name table.
+void ExportByOrdinal() {
+  ModifyExportDirectoryEntry();
+  if (gChildProcessStatus != kTestSuccess) {
+    return;
+  }
+
+  MMPolicyInProcess policy;
+  HMODULE imageBase = ::GetModuleHandleW(nullptr);
+  auto ourExe = LocalPEExportSection::Get(imageBase, policy);
+
+  auto exportDirectory = ourExe.GetExportDirectory();
+  if (!exportDirectory) {
+    gChildProcessStatus = kTestFail;
+    return;
+  }
+
+  exportDirectory->NumberOfNames = 0;
+
+  if (GetProcAddress(imageBase, "Export1") ||
+      GetProcAddress(imageBase, "Export2") ||
+      GetProcAddress(imageBase, kSecretFunction) ||
+      GetProcAddress(imageBase, kSecretFunctionWithSuffix) ||
+      !EXPORT_FUNCTION_EQ(MAKEINTRESOURCE(1), SecretFunction1) ||
+      !EXPORT_FUNCTION_EQ(MAKEINTRESOURCE(2), SecretFunction2)) {
+    gChildProcessStatus = kTestFail;
+  }
+}
+
+class ChildProcess final {
+  nsAutoHandle mChildProcess;
+  nsAutoHandle mChildMainThread;
+
+ public:
+  static int Main(const nsAutoHandle& aEvent, const wchar_t* aOption) {
+    if (wcscmp(aOption, kNoModification) == 0) {
+      ;
+    } else if (wcscmp(aOption, kNoExport) == 0) {
+      HideExportSection();
+    } else if (wcscmp(aOption, kModifyTableEntry) == 0) {
+      ModifyExportAddressTableEntry();
+    } else if (wcscmp(aOption, kModifyTable) == 0) {
+      ModifyExportAddressTable();
+    } else if (wcscmp(aOption, kModifyDirectoryEntry) == 0) {
+      ModifyExportDirectoryEntry();
+    } else if (wcscmp(aOption, kExportByOrdinal) == 0) {
+      ExportByOrdinal();
+    }
+
+    // Letting the parent process know the child process is ready.
+    ::SetEvent(aEvent);
+
+    // The child process does not exit itself.  It's force terminated by
+    // the parent process when all tests are done.
+    for (;;) {
+      ::Sleep(100);
+    }
+    return 0;
+  }
+
+  ChildProcess(const wchar_t* aExecutable, const wchar_t* aOption,
+               const nsAutoHandle& aEvent, const nsAutoHandle& aJob) {
+    const wchar_t* childArgv[] = {aExecutable, aOption};
+    auto cmdLine(
+        mozilla::MakeCommandLine(mozilla::ArrayLength(childArgv), childArgv));
+
+    STARTUPINFOW si = {sizeof(si)};
+    PROCESS_INFORMATION pi;
+    BOOL ok = ::CreateProcessW(aExecutable, cmdLine.get(), nullptr, nullptr,
+                               FALSE, 0, nullptr, nullptr, &si, &pi);
+    if (!ok) {
+      printf(
+          "TEST-FAILED | TestPEExportSection | "
+          "CreateProcessW falied - %08lx.\n",
+          GetLastError());
+      return;
+    }
+
+    if (aJob && !::AssignProcessToJobObject(aJob, pi.hProcess)) {
+      printf(
+          "TEST-FAILED | TestPEExportSection | "
+          "AssignProcessToJobObject falied - %08lx.\n",
+          GetLastError());
+      ::TerminateProcess(pi.hProcess, 1);
+      return;
+    }
+
+    // Wait until requested modification is done in the child process.
+    if (::WaitForSingleObject(aEvent, kEventTimeoutinMs) != WAIT_OBJECT_0) {
+      printf(
+          "TEST-FAILED | TestPEExportSection | "
+          "Child process was not ready in time.\n");
+      return;
+    }
+
+    mChildProcess.own(pi.hProcess);
+    mChildMainThread.own(pi.hThread);
+  }
+
+  ~ChildProcess() { ::TerminateProcess(mChildProcess, 0); }
+
+  operator HANDLE() const { return mChildProcess; }
+
+  TestResult GetStatus() const {
+    TestResult status = kTestSuccess;
+    if (!::ReadProcessMemory(mChildProcess, &gChildProcessStatus, &status,
+                             sizeof(status), nullptr)) {
+      status = kTestFail;
+      printf(
+          "TEST-FAILED | TestPEExportSection | "
+          "ReadProcessMemory failed - %08lx\n",
+          GetLastError());
+    }
+    return status;
+  }
+};
+
+template <typename MMPolicy>
+TestResult BasicTest(const MMPolicy& aMMPolicy) {
+  // Use ntdll.dll because it does not have any forwarder RVA.
+  HMODULE ntdllImageBase = ::GetModuleHandleW(L"ntdll.dll");
+  auto ntdllExports = PEExportSection<MMPolicy>::Get(ntdllImageBase, aMMPolicy);
+
+  auto exportDir = ntdllExports.GetExportDirectory();
+  auto tableOfNames =
+      ntdllExports.template RVAToPtr<const PDWORD>(exportDir->AddressOfNames);
+  for (DWORD i = 0; i < exportDir->NumberOfNames; ++i) {
+    const auto name =
+        ntdllExports.template RVAToPtr<const char*>(tableOfNames[i]);
+    auto funcEntry = ntdllExports.FindExportAddressTableEntry(name);
+    if (ntdllExports.template RVAToPtr<const void*>(*funcEntry) !=
+        ::GetProcAddress(ntdllImageBase, name)) {
+      printf(
+          "TEST-FAILED | TestPEExportSection | "
+          "FindExportAddressTableEntry did not resolve ntdll!%s.\n",
+          name);
+      return kTestFail;
+    }
+  }
+
+  for (DWORD i = 0; i < 0x10000; i += 0x10) {
+    if (ntdllExports.GetProcAddress(MAKEINTRESOURCE(i)) !=
+        ::GetProcAddress(ntdllImageBase, MAKEINTRESOURCE(i))) {
+      printf(
+          "TEST-FAILED | TestPEExportSection | "
+          "GetProcAddress did not resolve ntdll!Ordinal#%lu.\n",
+          i);
+      return kTestFail;
+    }
+  }
+
+  // Test a known forwarder RVA.
+  auto k32Exports = PEExportSection<MMPolicy>::Get(
+      ::GetModuleHandleW(L"kernel32.dll"), aMMPolicy);
+  if (k32Exports.FindExportAddressTableEntry("HeapAlloc")) {
+    printf(
+        "TEST-FAILED | TestPEExportSection | "
+        "kernel32!HeapAlloc should be forwarded to ntdll!RtlAllocateHeap.\n");
+    return kTestFail;
+  }
+
+  // Test invalid names.
+  if (k32Exports.FindExportAddressTableEntry("Invalid name") ||
+      k32Exports.FindExportAddressTableEntry("")) {
+    printf(
+        "TEST-FAILED | TestPEExportSection | "
+        "FindExportAddressTableEntry should return "
+        "nullptr for a non-existent name.\n");
+    return kTestFail;
+  }
+
+  return kTestSuccess;
+}
+
+TestResult RunChildProcessTest(
+    const wchar_t* aExecutable, const wchar_t* aOption,
+    const nsAutoHandle& aEvent, const nsAutoHandle& aJob,
+    TestResult (*aTestCallback)(const RemotePEExportSection&)) {
+  ChildProcess childProcess(aExecutable, aOption, aEvent, aJob);
+  if (!childProcess) {
+    return kTestFail;
+  }
+
+  auto result = childProcess.GetStatus();
+  if (result != kTestSuccess) {
+    return result;
+  }
+
+  MMPolicyOutOfProcess policy(childProcess);
+
+  // One time is enough to run BasicTest in the child process.
+  static TestResult oneTimeResult = BasicTest<MMPolicyOutOfProcess>(policy);
+  if (oneTimeResult != kTestSuccess) {
+    return oneTimeResult;
+  }
+
+  auto exportTableChild =
+      RemotePEExportSection::Get(::GetModuleHandleW(nullptr), policy);
+  return aTestCallback(exportTableChild);
+}
+
+mozilla::LauncherResult<nsReturnRef<HANDLE>> CreateJobToLimitProcessLifetime() {
+  uint64_t version;
+  PEHeaders ntdllHeaders(::GetModuleHandleW(L"ntdll.dll"));
+  if (!ntdllHeaders.GetVersionInfo(version)) {
+    printf(
+        "TEST-FAILED | TestPEExportSection | "
+        "Unable to obtain version information from ntdll.dll\n");
+    return LAUNCHER_ERROR_FROM_LAST();
+  }
+
+  constexpr uint64_t kWin8 = 0x60002ull << 32;
+  nsAutoHandle job;
+
+  if (version < kWin8) {
+    // Since a process can be associated only with a single job in Win7 or
+    // older and this test program is already assigned with a job by
+    // infrastructure, we cannot use a job.
+    return job.out();
+  }
+
+  job.own(::CreateJobObject(nullptr, nullptr));
+  if (!job) {
+    printf(
+        "TEST-FAILED | TestPEExportSection | "
+        "CreateJobObject falied - %08lx.\n",
+        GetLastError());
+    return LAUNCHER_ERROR_FROM_LAST();
+  }
+
+  JOBOBJECT_EXTENDED_LIMIT_INFORMATION jobInfo = {};
+  jobInfo.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
+
+  if (!::SetInformationJobObject(job, JobObjectExtendedLimitInformation,
+                                 &jobInfo, sizeof(jobInfo))) {
+    printf(
+        "TEST-FAILED | TestPEExportSection | "
+        "SetInformationJobObject falied - %08lx.\n",
+        GetLastError());
+    return LAUNCHER_ERROR_FROM_LAST();
+  }
+
+  return job.out();
+}
+
+extern "C" int wmain(int argc, wchar_t* argv[]) {
+  nsAutoHandle controlEvent(
+      ::CreateEventW(nullptr, FALSE, FALSE, kProcessControlEventName));
+
+  if (argc == 2) {
+    return ChildProcess::Main(controlEvent, argv[1]);
+  }
+
+  if (argc != 1) {
+    printf(
+        "TEST-FAILED | TestPEExportSection | "
+        "Invalid arguments.\n");
+    return kTestFail;
+  }
+
+  MMPolicyInProcess policy;
+  if (BasicTest<MMPolicyInProcess>(policy)) {
+    return kTestFail;
+  }
+
+  auto exportTableSelf =
+      LocalPEExportSection::Get(::GetModuleHandleW(nullptr), policy);
+  if (!exportTableSelf) {
+    printf(
+        "TEST-FAILED | TestPEExportSection | "
+        "LocalPEExportSection::Get failed.\n");
+    return kTestFail;
+  }
+
+  VERIFY_EXPORT_FUNCTION(exportTableSelf, "Export1", Export1,
+                         "Local | Export1 was not exported.\n");
+  VERIFY_EXPORT_FUNCTION(exportTableSelf, "Export2", Export2,
+                         "Local | Export2 was not exported.\n");
+  VERIFY_EXPORT_FUNCTION(
+      exportTableSelf, "Invalid name", 0,
+      "Local | GetProcAddress should return nullptr for an invalid name.\n");
+
+  // We'll add the child process to a job so that, in the event of a failure in
+  // this parent process, the child process will be automatically terminated.
+  auto probablyJob = CreateJobToLimitProcessLifetime();
+  if (probablyJob.isErr()) {
+    return kTestFail;
+  }
+
+  nsAutoHandle job(probablyJob.unwrap());
+
+  auto result = RunChildProcessTest(
+      argv[0], kNoModification, controlEvent, job,
+      [](const RemotePEExportSection& aTables) {
+        VERIFY_EXPORT_FUNCTION(aTables, "Export1", Export1,
+                               "NoModification | Export1 was not exported.\n");
+        VERIFY_EXPORT_FUNCTION(aTables, "Export2", Export2,
+                               "NoModification | Export2 was not exported.\n");
+        return kTestSuccess;
+      });
+  if (result == kTestFail) {
+    return result;
+  }
+
+  result = RunChildProcessTest(
+      argv[0], kNoExport, controlEvent, job,
+      [](const RemotePEExportSection& aTables) {
+        VERIFY_EXPORT_FUNCTION(aTables, "Export1", 0,
+                               "NoExport | Export1 was exported.\n");
+        VERIFY_EXPORT_FUNCTION(aTables, "Export2", 0,
+                               "NoExport | Export2 was exported.\n");
+        return kTestSuccess;
+      });
+  if (result == kTestFail) {
+    return result;
+  }
+
+  result = RunChildProcessTest(
+      argv[0], kModifyTableEntry, controlEvent, job,
+      [](const RemotePEExportSection& aTables) {
+        VERIFY_EXPORT_FUNCTION(
+            aTables, "Export1", SecretFunction1,
+            "ModifyTableEntry | SecretFunction1 was not exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, "Export2", Export2,
+            "ModifyTableEntry | Export2 was not exported.\n");
+        return kTestSuccess;
+      });
+  if (result == kTestFail) {
+    return result;
+  }
+
+  result = RunChildProcessTest(
+      argv[0], kModifyTable, controlEvent, job,
+      [](const RemotePEExportSection& aTables) {
+        VERIFY_EXPORT_FUNCTION(aTables, "Export1", 0,
+                               "ModifyTable | Export1 was exported.\n");
+        VERIFY_EXPORT_FUNCTION(aTables, "Export2", 0,
+                               "ModifyTable | Export2 was exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, kSecretFunction, SecretFunction1,
+            "ModifyTable | SecretFunction1 was not exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, kSecretFunctionWithSuffix, SecretFunction2,
+            "ModifyTable | SecretFunction2 was not exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, kSecretFunctionInvalid, 0,
+            "ModifyTable | kSecretFunctionInvalid was exported.\n");
+        return kTestSuccess;
+      });
+  if (result == kTestFail) {
+    return result;
+  }
+
+  result = RunChildProcessTest(
+      argv[0], kModifyDirectoryEntry, controlEvent, job,
+      [](const RemotePEExportSection& aTables) {
+        VERIFY_EXPORT_FUNCTION(
+            aTables, "Export1", 0,
+            "ModifyDirectoryEntry | Export1 was exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, "Export2", 0,
+            "ModifyDirectoryEntry | Export2 was exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, kSecretFunction, SecretFunction1,
+            "ModifyDirectoryEntry | SecretFunction1 was not exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, kSecretFunctionWithSuffix, SecretFunction2,
+            "ModifyDirectoryEntry | SecretFunction2 was not exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, kSecretFunctionInvalid, 0,
+            "ModifyDirectoryEntry | kSecretFunctionInvalid was exported.\n");
+        return kTestSuccess;
+      });
+  if (result == kTestFail) {
+    return result;
+  }
+
+  result = RunChildProcessTest(
+      argv[0], kExportByOrdinal, controlEvent, job,
+      [](const RemotePEExportSection& aTables) {
+        VERIFY_EXPORT_FUNCTION(aTables, "Export1", 0,
+                               "ExportByOrdinal | Export1 was exported.\n");
+        VERIFY_EXPORT_FUNCTION(aTables, "Export2", 0,
+                               "ExportByOrdinal | Export2 was exported.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, kSecretFunction, 0,
+            "ModifyDirectoryEntry | kSecretFunction was exported by name.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, kSecretFunctionWithSuffix, 0,
+            "ModifyDirectoryEntry | "
+            "kSecretFunctionWithSuffix was exported by name.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, MAKEINTRESOURCE(1), SecretFunction1,
+            "ModifyDirectoryEntry | "
+            "kSecretFunction was not exported by ordinal.\n");
+        VERIFY_EXPORT_FUNCTION(
+            aTables, MAKEINTRESOURCE(2), SecretFunction2,
+            "ModifyDirectoryEntry | "
+            "kSecretFunctionWithSuffix was not exported by ordinal.\n");
+        return kTestSuccess;
+      });
+  if (result == kTestFail) {
+    return result;
+  }
+
+  return kTestSuccess;
+}
diff --git a/mozglue/tests/TestPrintf.cpp b/mozglue/tests/TestPrintf.cpp
new file mode 100644
index 0000000000..69fcfd51b2
--- /dev/null
+++ b/mozglue/tests/TestPrintf.cpp
@@ -0,0 +1,164 @@
+/* vim: set shiftwidth=2 tabstop=8 autoindent cindent expandtab: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/Printf.h"
+
+#include <cfloat>
+#include <stdarg.h>
+
+// A simple implementation of PrintfTarget, just for testing
+// PrintfTarget::print.
+class TestPrintfTarget : public mozilla::PrintfTarget {
+ public:
+  static const char* test_string;
+
+  TestPrintfTarget() : mOut(0) { memset(mBuffer, '\0', sizeof(mBuffer)); }
+
+  ~TestPrintfTarget() {
+    MOZ_RELEASE_ASSERT(mOut == strlen(test_string));
+    MOZ_RELEASE_ASSERT(strncmp(mBuffer, test_string, strlen(test_string)) == 0);
+  }
+
+  bool append(const char* sp, size_t len) override {
+    if (mOut + len < sizeof(mBuffer)) {
+      memcpy(&mBuffer[mOut], sp, len);
+    }
+    mOut += len;
+    return true;
+  }
+
+ private:
+  char mBuffer[100];
+  size_t mOut;
+};
+
+const char* TestPrintfTarget::test_string = "test string";
+
+static void TestPrintfTargetPrint() {
+  TestPrintfTarget checker;
+  checker.print("test string");
+}
+
+static bool MOZ_FORMAT_PRINTF(2, 3)
+    print_one(const char* expect, const char* fmt, ...) {
+  va_list ap;
+
+  va_start(ap, fmt);
+  mozilla::SmprintfPointer output = mozilla::Vsmprintf(fmt, ap);
+  va_end(ap);
+
+  return output && !strcmp(output.get(), expect);
+}
+
+static const char* zero() { return nullptr; }
+
+static void TestPrintfFormats() {
+  MOZ_RELEASE_ASSERT(print_one("0", "%d", 0));
+  MOZ_RELEASE_ASSERT(print_one("23", "%d", 23));
+  MOZ_RELEASE_ASSERT(print_one("+23", "%+d", 23));
+  MOZ_RELEASE_ASSERT(print_one("-23", "%+d", -23));
+  MOZ_RELEASE_ASSERT(print_one("0023", "%04d", 23));
+  MOZ_RELEASE_ASSERT(print_one("777777", "%04d", 777777));
+  MOZ_RELEASE_ASSERT(print_one("  23", "% 4d", 23));
+  MOZ_RELEASE_ASSERT(print_one("23  ", "%-4d", 23));
+  MOZ_RELEASE_ASSERT(print_one("  23", "%*d", 4, 23));
+  MOZ_RELEASE_ASSERT(print_one("-23    ", "%*d", -7, -23));
+  MOZ_RELEASE_ASSERT(print_one("  077", "%5.3d", 77));
+  MOZ_RELEASE_ASSERT(print_one("  077", "%5.*d", 3, 77));
+  MOZ_RELEASE_ASSERT(print_one("  077", "%*.*d", 5, 3, 77));
+  MOZ_RELEASE_ASSERT(print_one("077  ", "%*.*d", -5, 3, 77));
+  MOZ_RELEASE_ASSERT(print_one("77   ", "%*.*d", -5, -3, 77));
+  MOZ_RELEASE_ASSERT(print_one("-1", "%d", -1));
+  MOZ_RELEASE_ASSERT(print_one("23", "%u", 23u));
+  MOZ_RELEASE_ASSERT(print_one("0x17", "0x%x", 23u));
+  MOZ_RELEASE_ASSERT(print_one("0xFF", "0x%X", 255u));
+  MOZ_RELEASE_ASSERT(print_one("027", "0%o", 23u));
+  MOZ_RELEASE_ASSERT(print_one("-1", "%hd", (short)-1));
+  // A funny special case.
+  MOZ_RELEASE_ASSERT(print_one("", "%.*d", 0, 0));
+  // This could be expanded if need be, it's just convenient to do
+  // it this way.
+  if (sizeof(short) == 2) {
+    MOZ_RELEASE_ASSERT(print_one("8000", "%hx", (unsigned short)0x8000));
+  }
+  MOZ_RELEASE_ASSERT(print_one("2305", "%ld", 2305l));
+  MOZ_RELEASE_ASSERT(print_one("-2305", "%ld", -2305l));
+  MOZ_RELEASE_ASSERT(print_one("0xf0f0", "0x%lx", 0xf0f0ul));
+  MOZ_RELEASE_ASSERT(print_one("0", "%lld", 0ll));
+  MOZ_RELEASE_ASSERT(print_one("2305", "%lld", 2305ll));
+  MOZ_RELEASE_ASSERT(print_one("-2305", "%lld", -2305ll));
+  // A funny special case.
+  MOZ_RELEASE_ASSERT(print_one("", "%.*lld", 0, 0ll));
+  MOZ_RELEASE_ASSERT(print_one("0xF0F0", "0x%llX", 0xf0f0ull));
+  MOZ_RELEASE_ASSERT(print_one("27270", "%zu", (size_t)27270));
+  MOZ_RELEASE_ASSERT(print_one("27270", "%zu", (size_t)27270));
+  MOZ_RELEASE_ASSERT(print_one("hello", "he%so", "ll"));
+  MOZ_RELEASE_ASSERT(print_one("hello   ", "%-8s", "hello"));
+  MOZ_RELEASE_ASSERT(print_one("   hello", "%8s", "hello"));
+  MOZ_RELEASE_ASSERT(print_one("hello   ", "%*s", -8, "hello"));
+  MOZ_RELEASE_ASSERT(print_one("hello", "%.*s", 5, "hello there"));
+  MOZ_RELEASE_ASSERT(print_one("", "%.*s", 0, "hello there"));
+  MOZ_RELEASE_ASSERT(print_one("%%", "%%%%"));
+  MOZ_RELEASE_ASSERT(print_one("0", "%p", (char*)0));
+  MOZ_RELEASE_ASSERT(print_one("h", "%c", 'h'));
+  MOZ_RELEASE_ASSERT(print_one("1.500000", "%f", 1.5f));
+  MOZ_RELEASE_ASSERT(print_one("1.5", "%g", 1.5));
+  MOZ_RELEASE_ASSERT(print_one("1.50000", "%.5f", 1.5));
+
+  MOZ_RELEASE_ASSERT(print_one("z      ", "%-7s", "z"));
+  MOZ_RELEASE_ASSERT(print_one("z      ", "%*s", -7, "z"));
+  MOZ_RELEASE_ASSERT(print_one("hello", "%*s", -3, "hello"));
+
+  MOZ_RELEASE_ASSERT(print_one("  q", "%3c", 'q'));
+  MOZ_RELEASE_ASSERT(print_one("q  ", "%-3c", 'q'));
+  MOZ_RELEASE_ASSERT(print_one("  q", "%*c", 3, 'q'));
+  MOZ_RELEASE_ASSERT(print_one("q  ", "%*c", -3, 'q'));
+
+  // Regression test for bug#1350097.  The bug was an assertion
+  // failure caused by printing a very long floating point value.
+  print_one("ignore", "%lf", DBL_MAX);
+
+  // Regression test for bug#1517433.  The bug was an assertion
+  // failure caused by printing a floating point value with a large
+  // precision and/or width.
+  print_one("ignore", "%500.500lf", DBL_MAX);
+
+  MOZ_RELEASE_ASSERT(print_one("2727", "%" PRIu32, (uint32_t)2727));
+  MOZ_RELEASE_ASSERT(print_one("aa7", "%" PRIx32, (uint32_t)2727));
+  MOZ_RELEASE_ASSERT(print_one("2727", "%" PRIu64, (uint64_t)2727));
+  MOZ_RELEASE_ASSERT(print_one("aa7", "%" PRIx64, (uint64_t)2727));
+
+  int n1, n2;
+  MOZ_RELEASE_ASSERT(print_one(" hi ", "%n hi %n", &n1, &n2));
+  MOZ_RELEASE_ASSERT(n1 == 0);
+  MOZ_RELEASE_ASSERT(n2 == 4);
+
+  MOZ_RELEASE_ASSERT(print_one("23 % 24", "%2$ld %% %1$d", 24, 23l));
+  MOZ_RELEASE_ASSERT(
+      print_one("7 8 9 10", "%4$lld %3$ld %2$d %1$hd", (short)10, 9, 8l, 7ll));
+
+  MOZ_RELEASE_ASSERT(print_one("0 ", "%2$p %1$n", &n1, zero()));
+  MOZ_RELEASE_ASSERT(n1 == 2);
+
+  MOZ_RELEASE_ASSERT(print_one("23 % 024", "%2$-3ld%%%1$4.3d", 24, 23l));
+  MOZ_RELEASE_ASSERT(print_one("23 1.5", "%2$d %1$g", 1.5, 23));
+  MOZ_RELEASE_ASSERT(
+      print_one("ff number FF", "%3$llx %1$s %2$lX", "number", 255ul, 255ull));
+  MOZ_RELEASE_ASSERT(
+      print_one("7799 9977", "%2$zu %1$zu", (size_t)9977, (size_t)7799));
+}
+
+#if defined(XP_WIN)
+int wmain()
+#else
+int main()
+#endif  // defined(XP_WIN)
+{
+  TestPrintfFormats();
+  TestPrintfTargetPrint();
+
+  return 0;
+}
diff --git a/mozglue/tests/TestTimeStampWin.cpp b/mozglue/tests/TestTimeStampWin.cpp
new file mode 100644
index 0000000000..4e7437450a
--- /dev/null
+++ b/mozglue/tests/TestTimeStampWin.cpp
@@ -0,0 +1,97 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/CmdLineAndEnvUtils.h"
+#include "mozilla/TimeStamp.h"
+
+#include "nsWindowsHelpers.h"
+
+#include <stdio.h>
+#include <windows.h>
+
+static wchar_t kChildArg[] = L"--child";
+
+static nsReturnRef<HANDLE> CreateProcessWrapper(const wchar_t* aPath) {
+  nsAutoHandle empty;
+
+  const wchar_t* childArgv[] = {aPath, kChildArg};
+  mozilla::UniquePtr<wchar_t[]> cmdLine(
+      mozilla::MakeCommandLine(mozilla::ArrayLength(childArgv), childArgv));
+
+  STARTUPINFOW si = {sizeof(si)};
+  PROCESS_INFORMATION pi;
+  BOOL ok = ::CreateProcessW(aPath, cmdLine.get(), nullptr, nullptr, FALSE, 0,
+                             nullptr, nullptr, &si, &pi);
+  if (!ok) {
+    printf(
+        "TEST-FAILED | TimeStampWin | "
+        "CreateProcess failed - %08lx\n",
+        GetLastError());
+    return empty.out();
+  }
+
+  nsAutoHandle proc(pi.hProcess);
+  nsAutoHandle thd(pi.hThread);
+
+  return proc.out();
+}
+
+int ChildMain() {
+  // Make sure a process creation timestamp is always not bigger than
+  // the current timestamp.
+  bool inconsistent = false;
+  auto t0 = mozilla::TimeStamp::ProcessCreation(&inconsistent);
+  auto t1 = mozilla::TimeStamp::Now();
+  if (t0 > t1) {
+    printf(
+        "TEST-FAILED | TimeStampWin | "
+        "Process creation timestamp is bigger than the current "
+        "timestamp!\n");
+    return 1;
+  }
+  return 0;
+}
+
+int wmain(int argc, wchar_t* argv[]) {
+  if (argc == 2 && wcscmp(argv[1], kChildArg) == 0) {
+    return ChildMain();
+  }
+
+  if (argc != 1) {
+    printf(
+        "TEST-FAILED | TimeStampWin | "
+        "Unexpected argc\n");
+    return 1;
+  }
+
+  // Start a child process successively, checking any of them terminates with
+  // a non-zero value which means an error.
+  for (int i = 0; i < 20; ++i) {
+    nsAutoHandle childProc(CreateProcessWrapper(argv[0]));
+
+    if (::WaitForSingleObject(childProc, 60000) != WAIT_OBJECT_0) {
+      printf(
+          "TEST-FAILED | TimeStampWin | "
+          "Unexpected result from WaitForSingleObject\n");
+      return 1;
+    }
+
+    DWORD childExitCode;
+    if (!::GetExitCodeProcess(childProc.get(), &childExitCode)) {
+      printf(
+          "TEST-FAILED | TimeStampWin | "
+          "GetExitCodeProcess failed - %08lx\n",
+          GetLastError());
+      return 1;
+    }
+
+    if (childExitCode != 0) {
+      return childExitCode;
+    }
+  }
+
+  return 0;
+}
diff --git a/mozglue/tests/gtest/TestDLLBlocklist.cpp b/mozglue/tests/gtest/TestDLLBlocklist.cpp
new file mode 100644
index 0000000000..97c4ea9f9b
--- /dev/null
+++ b/mozglue/tests/gtest/TestDLLBlocklist.cpp
@@ -0,0 +1,161 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+#include <winternl.h>
+
+#include <process.h>
+
+#include "gtest/gtest.h"
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Char16.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsString.h"
+#include "nsTArray.h"
+#include "nsWindowsHelpers.h"
+
+static nsString GetFullPath(const nsAString& aLeaf) {
+  nsCOMPtr<nsIFile> f;
+
+  EXPECT_TRUE(NS_SUCCEEDED(
+      NS_GetSpecialDirectory(NS_OS_CURRENT_WORKING_DIR, getter_AddRefs(f))));
+
+  EXPECT_TRUE(NS_SUCCEEDED(f->Append(aLeaf)));
+
+  bool exists;
+  EXPECT_TRUE(NS_SUCCEEDED(f->Exists(&exists)) && exists);
+
+  nsString ret;
+  EXPECT_TRUE(NS_SUCCEEDED(f->GetPath(ret)));
+  return ret;
+}
+
+TEST(TestDllBlocklist, BlockDllByName)
+{
+  // The DLL name has capital letters, so this also tests that the comparison
+  // is case-insensitive.
+  constexpr auto kLeafName = u"TestDllBlocklist_MatchByName.dll"_ns;
+  nsString dllPath = GetFullPath(kLeafName);
+
+  nsModuleHandle hDll(::LoadLibraryW(dllPath.get()));
+
+  EXPECT_TRUE(!hDll);
+  EXPECT_TRUE(!::GetModuleHandleW(kLeafName.get()));
+}
+
+TEST(TestDllBlocklist, BlockDllByVersion)
+{
+  constexpr auto kLeafName = u"TestDllBlocklist_MatchByVersion.dll"_ns;
+  nsString dllPath = GetFullPath(kLeafName);
+
+  nsModuleHandle hDll(::LoadLibraryW(dllPath.get()));
+
+  EXPECT_TRUE(!hDll);
+  EXPECT_TRUE(!::GetModuleHandleW(kLeafName.get()));
+}
+
+TEST(TestDllBlocklist, AllowDllByVersion)
+{
+  constexpr auto kLeafName = u"TestDllBlocklist_AllowByVersion.dll"_ns;
+  nsString dllPath = GetFullPath(kLeafName);
+
+  nsModuleHandle hDll(::LoadLibraryW(dllPath.get()));
+
+  EXPECT_TRUE(!!hDll);
+  EXPECT_TRUE(!!::GetModuleHandleW(kLeafName.get()));
+}
+
+TEST(TestDllBlocklist, NoOpEntryPoint)
+{
+  // DllMain of this dll has MOZ_RELEASE_ASSERT.  This test makes sure we load
+  // the module successfully without running DllMain.
+  constexpr auto kLeafName = u"TestDllBlocklist_NoOpEntryPoint.dll"_ns;
+  nsString dllPath = GetFullPath(kLeafName);
+
+  nsModuleHandle hDll(::LoadLibraryW(dllPath.get()));
+
+#if defined(MOZ_ASAN)
+  // With ASAN, the test uses mozglue's blocklist where
+  // REDIRECT_TO_NOOP_ENTRYPOINT is ignored.  So LoadLibraryW
+  // is expected to fail.
+  EXPECT_TRUE(!hDll);
+  EXPECT_TRUE(!::GetModuleHandleW(kLeafName.get()));
+#else
+  EXPECT_TRUE(!!hDll);
+  EXPECT_TRUE(!!::GetModuleHandleW(kLeafName.get()));
+#endif
+}
+
+#define DLL_BLOCKLIST_ENTRY(name, ...) {name, __VA_ARGS__},
+#define DLL_BLOCKLIST_STRING_TYPE const char*
+#include "mozilla/WindowsDllBlocklistLegacyDefs.h"
+
+TEST(TestDllBlocklist, BlocklistIntegrity)
+{
+  nsTArray<DLL_BLOCKLIST_STRING_TYPE> dupes;
+  DECLARE_POINTER_TO_FIRST_DLL_BLOCKLIST_ENTRY(pFirst);
+  DECLARE_POINTER_TO_LAST_DLL_BLOCKLIST_ENTRY(pLast);
+
+  EXPECT_FALSE(pLast->mName || pLast->mMaxVersion || pLast->mFlags);
+
+  for (size_t i = 0; i < mozilla::ArrayLength(gWindowsDllBlocklist) - 1; ++i) {
+    auto pEntry = pFirst + i;
+
+    // Validate name
+    EXPECT_TRUE(!!pEntry->mName);
+    EXPECT_GT(strlen(pEntry->mName), 3);
+
+    // Check the filename for valid characters.
+    for (auto pch = pEntry->mName; *pch != 0; ++pch) {
+      EXPECT_FALSE(*pch >= 'A' && *pch <= 'Z');
+    }
+
+    // Check for duplicate entries
+    for (auto&& dupe : dupes) {
+      EXPECT_NE(stricmp(dupe, pEntry->mName), 0);
+    }
+
+    dupes.AppendElement(pEntry->mName);
+  }
+}
+
+TEST(TestDllBlocklist, BlockThreadWithLoadLibraryEntryPoint)
+{
+  // Only supported on Nightly
+#if defined(NIGHTLY_BUILD)
+  using ThreadProc = unsigned(__stdcall*)(void*);
+
+  constexpr auto kLeafNameW = u"TestDllBlocklist_MatchByVersion.dll"_ns;
+
+  nsString fullPathW = GetFullPath(kLeafNameW);
+  EXPECT_FALSE(fullPathW.IsEmpty());
+
+  nsAutoHandle threadW(reinterpret_cast<HANDLE>(
+      _beginthreadex(nullptr, 0, reinterpret_cast<ThreadProc>(&::LoadLibraryW),
+                     (void*)fullPathW.get(), 0, nullptr)));
+
+  EXPECT_TRUE(!!threadW);
+  EXPECT_EQ(::WaitForSingleObject(threadW, INFINITE), WAIT_OBJECT_0);
+
+  DWORD exitCode;
+  EXPECT_TRUE(::GetExitCodeThread(threadW, &exitCode) && !exitCode);
+  EXPECT_TRUE(!::GetModuleHandleW(kLeafNameW.get()));
+
+  const NS_LossyConvertUTF16toASCII fullPathA(fullPathW);
+  EXPECT_FALSE(fullPathA.IsEmpty());
+
+  nsAutoHandle threadA(reinterpret_cast<HANDLE>(
+      _beginthreadex(nullptr, 0, reinterpret_cast<ThreadProc>(&::LoadLibraryA),
+                     (void*)fullPathA.get(), 0, nullptr)));
+
+  EXPECT_TRUE(!!threadA);
+  EXPECT_EQ(::WaitForSingleObject(threadA, INFINITE), WAIT_OBJECT_0);
+  EXPECT_TRUE(::GetExitCodeThread(threadA, &exitCode) && !exitCode);
+  EXPECT_TRUE(!::GetModuleHandleW(kLeafNameW.get()));
+#endif  // defined(NIGHTLY_BUILD)
+}
diff --git a/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/TestDllBlocklist_AllowByVersion.cpp b/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/TestDllBlocklist_AllowByVersion.cpp
new file mode 100644
index 0000000000..7bd936296e
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/TestDllBlocklist_AllowByVersion.cpp
@@ -0,0 +1,7 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+
+BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD aReason, LPVOID) { return TRUE; }
diff --git a/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/TestDllBlocklist_AllowByVersion.rc b/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/TestDllBlocklist_AllowByVersion.rc
new file mode 100644
index 0000000000..f56aa099ff
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/TestDllBlocklist_AllowByVersion.rc
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <winver.h>
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+  FILEVERSION    5,5,5,6
+  PRODUCTVERSION 5,5,5,1
+  FILEFLAGSMASK 0x3fL
+#ifdef _DEBUG
+  FILEFLAGS 0x1L
+#else
+  FILEFLAGS 0x0L
+#endif
+  FILEOS VOS__WINDOWS32
+  FILETYPE VFT_DLL
+  FILESUBTYPE 0x0L
+BEGIN
+  BLOCK "StringFileInfo"
+  BEGIN
+    BLOCK "040904e4"
+    BEGIN
+      VALUE "CompanyName", "mozilla.org"
+      VALUE "FileDescription", L"Test DLL"
+      VALUE "FileVersion", "1.0"
+      VALUE "InternalName", "Test DLL"
+      VALUE "OriginalFilename", "TestDllBlocklist_AllowByVersion.dll"
+      VALUE "ProductName", "Test DLL"
+      VALUE "ProductVersion", "1.0"
+    END
+  END
+  BLOCK "VarFileInfo"
+  BEGIN
+    VALUE "Translation", 0x0409, 1252
+  END
+END
diff --git a/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/moz.build b/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/moz.build
new file mode 100644
index 0000000000..0987cdde1a
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_AllowByVersion/moz.build
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIST_INSTALL = False
+
+SharedLibrary("TestDllBlocklist_AllowByVersion")
+
+UNIFIED_SOURCES = [
+    "TestDllBlocklist_AllowByVersion.cpp",
+]
+
+RCFILE = "TestDllBlocklist_AllowByVersion.rc"
+
+if CONFIG["COMPILE_ENVIRONMENT"]:
+    TEST_HARNESS_FILES.gtest += ["!TestDllBlocklist_AllowByVersion.dll"]
diff --git a/mozglue/tests/gtest/TestDllBlocklist_MatchByName/TestDllBlocklist_MatchByName.cpp b/mozglue/tests/gtest/TestDllBlocklist_MatchByName/TestDllBlocklist_MatchByName.cpp
new file mode 100644
index 0000000000..7bd936296e
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_MatchByName/TestDllBlocklist_MatchByName.cpp
@@ -0,0 +1,7 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+
+BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD aReason, LPVOID) { return TRUE; }
diff --git a/mozglue/tests/gtest/TestDllBlocklist_MatchByName/moz.build b/mozglue/tests/gtest/TestDllBlocklist_MatchByName/moz.build
new file mode 100644
index 0000000000..f34931898a
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_MatchByName/moz.build
@@ -0,0 +1,15 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIST_INSTALL = False
+
+SharedLibrary("TestDllBlocklist_MatchByName")
+
+UNIFIED_SOURCES = [
+    "TestDllBlocklist_MatchByName.cpp",
+]
+
+if CONFIG["COMPILE_ENVIRONMENT"]:
+    TEST_HARNESS_FILES.gtest += ["!TestDllBlocklist_MatchByName.dll"]
diff --git a/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/TestDllBlocklist_MatchByVersion.cpp b/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/TestDllBlocklist_MatchByVersion.cpp
new file mode 100644
index 0000000000..7bd936296e
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/TestDllBlocklist_MatchByVersion.cpp
@@ -0,0 +1,7 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+
+BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD aReason, LPVOID) { return TRUE; }
diff --git a/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/TestDllBlocklist_MatchByVersion.rc b/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/TestDllBlocklist_MatchByVersion.rc
new file mode 100644
index 0000000000..7390c1cb34
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/TestDllBlocklist_MatchByVersion.rc
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <winver.h>
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+  FILEVERSION    5,5,5,5
+  PRODUCTVERSION 5,5,5,1
+  FILEFLAGSMASK 0x3fL
+#ifdef _DEBUG
+  FILEFLAGS 0x1L
+#else
+  FILEFLAGS 0x0L
+#endif
+  FILEOS VOS__WINDOWS32
+  FILETYPE VFT_DLL
+  FILESUBTYPE 0x0L
+BEGIN
+  BLOCK "StringFileInfo"
+  BEGIN
+    BLOCK "040904e4"
+    BEGIN
+      VALUE "CompanyName", "mozilla.org"
+      VALUE "FileDescription", L"Test DLL"
+      VALUE "FileVersion", "1.0"
+      VALUE "InternalName", "Test DLL"
+      VALUE "OriginalFilename", "TestDllBlocklist_MatchByVersion.dll"
+      VALUE "ProductName", "Test DLL"
+      VALUE "ProductVersion", "1.0"
+    END
+  END
+  BLOCK "VarFileInfo"
+  BEGIN
+    VALUE "Translation", 0x0409, 1252
+  END
+END
diff --git a/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/moz.build b/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/moz.build
new file mode 100644
index 0000000000..38e10524c7
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_MatchByVersion/moz.build
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIST_INSTALL = False
+
+SharedLibrary("TestDllBlocklist_MatchByVersion")
+
+UNIFIED_SOURCES = [
+    "TestDllBlocklist_MatchByVersion.cpp",
+]
+
+RCFILE = "TestDllBlocklist_MatchByVersion.rc"
+
+if CONFIG["COMPILE_ENVIRONMENT"]:
+    TEST_HARNESS_FILES.gtest += ["!TestDllBlocklist_MatchByVersion.dll"]
diff --git a/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/TestDllBlocklist_NoOpEntryPoint.cpp b/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/TestDllBlocklist_NoOpEntryPoint.cpp
new file mode 100644
index 0000000000..2505b8b700
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/TestDllBlocklist_NoOpEntryPoint.cpp
@@ -0,0 +1,12 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+
+#include "mozilla/Assertions.h"
+
+BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD aReason, LPVOID) {
+  MOZ_RELEASE_ASSERT(0);
+  return TRUE;
+}
diff --git a/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/TestDllBlocklist_NoOpEntryPoint.rc b/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/TestDllBlocklist_NoOpEntryPoint.rc
new file mode 100644
index 0000000000..7c79dac373
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/TestDllBlocklist_NoOpEntryPoint.rc
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <winver.h>
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+  FILEVERSION    5,5,5,5
+  PRODUCTVERSION 5,5,5,1
+  FILEFLAGSMASK 0x3fL
+#ifdef _DEBUG
+  FILEFLAGS 0x1L
+#else
+  FILEFLAGS 0x0L
+#endif
+  FILEOS VOS__WINDOWS32
+  FILETYPE VFT_DLL
+  FILESUBTYPE 0x0L
+BEGIN
+  BLOCK "StringFileInfo"
+  BEGIN
+    BLOCK "040904e4"
+    BEGIN
+      VALUE "CompanyName", "mozilla.org"
+      VALUE "FileDescription", L"Test DLL"
+      VALUE "FileVersion", "1.0"
+      VALUE "InternalName", "Test DLL"
+      VALUE "OriginalFilename", "TestDllBlocklist_NoOpEntryPoint.dll"
+      VALUE "ProductName", "Test DLL"
+      VALUE "ProductVersion", "1.0"
+    END
+  END
+  BLOCK "VarFileInfo"
+  BEGIN
+    VALUE "Translation", 0x0409, 1252
+  END
+END
diff --git a/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/moz.build b/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/moz.build
new file mode 100644
index 0000000000..57fae737c4
--- /dev/null
+++ b/mozglue/tests/gtest/TestDllBlocklist_NoOpEntryPoint/moz.build
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIST_INSTALL = False
+
+SharedLibrary("TestDllBlocklist_NoOpEntryPoint")
+
+UNIFIED_SOURCES = [
+    "TestDllBlocklist_NoOpEntryPoint.cpp",
+]
+
+RCFILE = "TestDllBlocklist_NoOpEntryPoint.rc"
+
+if CONFIG["COMPILE_ENVIRONMENT"]:
+    TEST_HARNESS_FILES.gtest += ["!TestDllBlocklist_NoOpEntryPoint.dll"]
diff --git a/mozglue/tests/gtest/TestNativeNtGTest.cpp b/mozglue/tests/gtest/TestNativeNtGTest.cpp
new file mode 100644
index 0000000000..e0f0a343a7
--- /dev/null
+++ b/mozglue/tests/gtest/TestNativeNtGTest.cpp
@@ -0,0 +1,20 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+
+#include "gtest/gtest.h"
+
+#include "mozilla/NativeNt.h"
+
+TEST(TestNativeNtGTest, GenerateDependentModuleSet)
+{
+  mozilla::nt::PEHeaders executable(::GetModuleHandleW(nullptr));
+  auto dependentModules = executable.GenerateDependentModuleSet();
+  EXPECT_NE(dependentModules.GetEntry(u"mozglue.dll"_ns), nullptr);
+  EXPECT_NE(dependentModules.GetEntry(u"MOZGLUE.dll"_ns), nullptr);
+  EXPECT_EQ(dependentModules.GetEntry(u"xxx.dll"_ns), nullptr);
+}
diff --git a/mozglue/tests/gtest/moz.build b/mozglue/tests/gtest/moz.build
new file mode 100644
index 0000000000..5a5e2d8ac5
--- /dev/null
+++ b/mozglue/tests/gtest/moz.build
@@ -0,0 +1,17 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+    "TestDLLBlocklist.cpp",
+    "TestNativeNtGTest.cpp",
+]
+
+FINAL_LIBRARY = "xul-gtest"
+
+TEST_DIRS += [
+    "TestDllBlocklist_AllowByVersion",
+    "TestDllBlocklist_MatchByName",
+    "TestDllBlocklist_MatchByVersion",
+    "TestDllBlocklist_NoOpEntryPoint",
+]
diff --git a/mozglue/tests/interceptor/AssemblyPayloads.h b/mozglue/tests/interceptor/AssemblyPayloads.h
new file mode 100644
index 0000000000..f053e161b0
--- /dev/null
+++ b/mozglue/tests/interceptor/AssemblyPayloads.h
@@ -0,0 +1,194 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+/* These assembly functions represent patterns that were already hooked by
+ * another application before our detour.
+ */
+
+#ifndef mozilla_AssemblyPayloads_h
+#define mozilla_AssemblyPayloads_h
+
+#define PADDING_256_NOP                                              \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;" \
+  "nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;"
+
+extern "C" {
+
+#if defined(__clang__)
+#  if defined(_M_X64)
+constexpr uintptr_t JumpDestination = 0x7fff00000000;
+
+__declspec(dllexport) __attribute__((naked)) void MovPushRet() {
+  asm volatile(
+      "mov %0, %%rax;"
+      "push %%rax;"
+      "ret;"
+      :
+      : "i"(JumpDestination));
+}
+
+__declspec(dllexport) __attribute__((naked)) void MovRaxJump() {
+  asm volatile(
+      "mov %0, %%rax;"
+      "jmpq *%%rax;"
+      :
+      : "i"(JumpDestination));
+}
+
+__declspec(dllexport) __attribute__((naked)) void DoubleJump() {
+  asm volatile(
+      "jmp label1;"
+
+      "label2:"
+      "mov %0, %%rax;"
+      "jmpq *%%rax;"
+
+      // 0x100 bytes padding to generate jmp rel32 instead of jmp rel8
+      PADDING_256_NOP
+
+      "label1:"
+      "jmp label2;"
+      :
+      : "i"(JumpDestination));
+}
+
+__declspec(dllexport) __attribute__((naked)) void NearJump() {
+  asm volatile(
+      "jae label3;"
+      "je  label3;"
+      "jne label3;"
+
+      "label4:"
+      "mov %0, %%rax;"
+      "jmpq *%%rax;"
+
+      // 0x100 bytes padding to generate jae rel32 instead of jae rel8
+      PADDING_256_NOP
+
+      "label3:"
+      "jmp label4;"
+      :
+      : "i"(JumpDestination));
+}
+
+__declspec(dllexport) __attribute__((naked)) void OpcodeFF() {
+  // Skip PUSH (FF /6) because clang prefers Opcode 50+rd
+  // to translate PUSH r64 rather than Opcode FF.
+  asm volatile(
+      "incl %eax;"
+      "decl %ebx;"
+      "call *%rcx;"
+      "jmp *(%rip);"  // Indirect jump to 0xcccccccc`cccccccc
+      "int $3;int $3;int $3;int $3;"
+      "int $3;int $3;int $3;int $3;");
+}
+
+__declspec(dllexport) __attribute__((naked)) void IndirectCall() {
+  asm volatile(
+      "call *(%rip);"  // Indirect call to 0x90909090`90909090
+      "nop;nop;nop;nop;nop;nop;nop;nop;"
+      "ret;");
+}
+
+__declspec(dllexport) __attribute__((naked)) void MovImm64() {
+  asm volatile(
+      "mov $0x1234567812345678, %r10;"
+      "nop;nop;nop");
+}
+
+#  elif defined(_M_IX86)
+constexpr uintptr_t JumpDestination = 0x7fff0000;
+
+__declspec(dllexport) __attribute__((naked)) void PushRet() {
+  asm volatile(
+      "push %0;"
+      "ret;"
+      :
+      : "i"(JumpDestination));
+}
+
+__declspec(dllexport) __attribute__((naked)) void MovEaxJump() {
+  asm volatile(
+      "mov %0, %%eax;"
+      "jmp *%%eax;"
+      :
+      : "i"(JumpDestination));
+}
+
+__declspec(dllexport) __attribute__((naked)) void Opcode83() {
+  asm volatile(
+      "xor $0x42, %eax;"
+      "cmpl $1, 0xc(%ebp);");
+}
+
+__declspec(dllexport) __attribute__((naked)) void LockPrefix() {
+  // Test an instruction with a LOCK prefix (0xf0) at a non-zero offset
+  asm volatile(
+      "push $0x7c;"
+      "lock push $0x7c;");
+}
+
+__declspec(dllexport) __attribute__((naked)) void LooksLikeLockPrefix() {
+  // This is for a regression scenario of bug 1625452, where we double-counted
+  // the offset in CountPrefixBytes.  When we count prefix bytes in front of
+  // the 2nd PUSH located at offset 2, we mistakenly started counting from
+  // the byte 0xf0 at offset 4, which is considered as LOCK, thus we try to
+  // detour the next byte 0xcc and it fails.
+  //
+  // 0: 6a7c       push 7Ch
+  // 2: 68ccf00000 push 0F0CCh
+  //
+  asm volatile(
+      "push $0x7c;"
+      "push $0x0000f0cc;");
+}
+
+__declspec(dllexport) __attribute__((naked)) void DoubleJump() {
+  asm volatile(
+      "jmp label1;"
+
+      "label2:"
+      "mov %0, %%eax;"
+      "jmp *%%eax;"
+
+      // 0x100 bytes padding to generate jmp rel32 instead of jmp rel8
+      PADDING_256_NOP
+
+      "label1:"
+      "jmp label2;"
+      :
+      : "i"(JumpDestination));
+}
+#  endif
+
+#  if !defined(_M_ARM64)
+__declspec(dllexport) __attribute__((naked)) void UnsupportedOp() {
+  asm volatile(
+      "ud2;"
+      "nop;nop;nop;nop;nop;nop;nop;nop;"
+      "nop;nop;nop;nop;nop;nop;nop;nop;");
+}
+#  endif  // !defined(_M_ARM64)
+
+#endif  // defined(__clang__)
+
+}  // extern "C"
+
+#endif  // mozilla_AssemblyPayloads_h
diff --git a/mozglue/tests/interceptor/TestDllInterceptor.cpp b/mozglue/tests/interceptor/TestDllInterceptor.cpp
new file mode 100644
index 0000000000..a5e0de2885
--- /dev/null
+++ b/mozglue/tests/interceptor/TestDllInterceptor.cpp
@@ -0,0 +1,1105 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <shlobj.h>
+#include <stdio.h>
+#include <commdlg.h>
+#define SECURITY_WIN32
+#include <security.h>
+#include <wininet.h>
+#include <schnlsp.h>
+#include <winternl.h>
+#include <processthreadsapi.h>
+
+#include "AssemblyPayloads.h"
+#include "mozilla/DynamicallyLinkedFunctionPtr.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/WindowsVersion.h"
+#include "nsWindowsDllInterceptor.h"
+#include "nsWindowsHelpers.h"
+
+NTSTATUS NTAPI NtFlushBuffersFile(HANDLE, PIO_STATUS_BLOCK);
+NTSTATUS NTAPI NtReadFile(HANDLE, HANDLE, PIO_APC_ROUTINE, PVOID,
+                          PIO_STATUS_BLOCK, PVOID, ULONG, PLARGE_INTEGER,
+                          PULONG);
+NTSTATUS NTAPI NtReadFileScatter(HANDLE, HANDLE, PIO_APC_ROUTINE, PVOID,
+                                 PIO_STATUS_BLOCK, PFILE_SEGMENT_ELEMENT, ULONG,
+                                 PLARGE_INTEGER, PULONG);
+NTSTATUS NTAPI NtWriteFile(HANDLE, HANDLE, PIO_APC_ROUTINE, PVOID,
+                           PIO_STATUS_BLOCK, PVOID, ULONG, PLARGE_INTEGER,
+                           PULONG);
+NTSTATUS NTAPI NtWriteFileGather(HANDLE, HANDLE, PIO_APC_ROUTINE, PVOID,
+                                 PIO_STATUS_BLOCK, PFILE_SEGMENT_ELEMENT, ULONG,
+                                 PLARGE_INTEGER, PULONG);
+NTSTATUS NTAPI NtQueryFullAttributesFile(POBJECT_ATTRIBUTES, PVOID);
+NTSTATUS NTAPI LdrLoadDll(PWCHAR filePath, PULONG flags,
+                          PUNICODE_STRING moduleFileName, PHANDLE handle);
+NTSTATUS NTAPI LdrUnloadDll(HMODULE);
+
+NTSTATUS NTAPI NtMapViewOfSection(
+    HANDLE aSection, HANDLE aProcess, PVOID* aBaseAddress, ULONG_PTR aZeroBits,
+    SIZE_T aCommitSize, PLARGE_INTEGER aSectionOffset, PSIZE_T aViewSize,
+    SECTION_INHERIT aInheritDisposition, ULONG aAllocationType,
+    ULONG aProtectionFlags);
+
+// These pointers are disguised as PVOID to avoid pulling in obscure headers
+PVOID NTAPI LdrResolveDelayLoadedAPI(PVOID, PVOID, PVOID, PVOID, PVOID, ULONG);
+void CALLBACK ProcessCaretEvents(HWINEVENTHOOK, DWORD, HWND, LONG, LONG, DWORD,
+                                 DWORD);
+void __fastcall BaseThreadInitThunk(BOOL aIsInitialThread, void* aStartAddress,
+                                    void* aThreadParam);
+
+BOOL WINAPI ApiSetQueryApiSetPresence(PCUNICODE_STRING, PBOOLEAN);
+
+#if (_WIN32_WINNT < 0x0602)
+BOOL WINAPI
+SetProcessMitigationPolicy(PROCESS_MITIGATION_POLICY aMitigationPolicy,
+                           PVOID aBuffer, SIZE_T aBufferLen);
+#endif  // (_WIN32_WINNT < 0x0602)
+
+using namespace mozilla;
+
+struct payload {
+  UINT64 a;
+  UINT64 b;
+  UINT64 c;
+
+  bool operator==(const payload& other) const {
+    return (a == other.a && b == other.b && c == other.c);
+  }
+};
+
+extern "C" __declspec(dllexport) __declspec(noinline) payload
+    rotatePayload(payload p) {
+  UINT64 tmp = p.a;
+  p.a = p.b;
+  p.b = p.c;
+  p.c = tmp;
+  return p;
+}
+
+// payloadNotHooked is a target function for a test to expect a negative result.
+// We cannot use rotatePayload for that purpose because our detour cannot hook
+// a function detoured already.  Please keep this function always unhooked.
+extern "C" __declspec(dllexport) __declspec(noinline) payload
+    payloadNotHooked(payload p) {
+  // Do something different from rotatePayload to avoid ICF.
+  p.a ^= p.b;
+  p.b ^= p.c;
+  p.c ^= p.a;
+  return p;
+}
+
+static bool patched_func_called = false;
+
+static WindowsDllInterceptor::FuncHookType<decltype(&rotatePayload)>
+    orig_rotatePayload;
+
+static WindowsDllInterceptor::FuncHookType<decltype(&payloadNotHooked)>
+    orig_payloadNotHooked;
+
+static payload patched_rotatePayload(payload p) {
+  patched_func_called = true;
+  return orig_rotatePayload(p);
+}
+
+// Invoke aFunc by taking aArg's contents and using them as aFunc's arguments
+template <typename OrigFuncT, typename... Args,
+          typename ArgTuple = Tuple<Args...>, size_t... Indices>
+decltype(auto) Apply(OrigFuncT& aFunc, ArgTuple&& aArgs,
+                     std::index_sequence<Indices...>) {
+  return aFunc(Get<Indices>(std::forward<ArgTuple>(aArgs))...);
+}
+
+#define DEFINE_TEST_FUNCTION(calling_convention)                               \
+  template <typename R, typename... Args, typename... TestArgs>                \
+  bool TestFunction(R(calling_convention* aFunc)(Args...), bool (*aPred)(R),   \
+                    TestArgs&&... aArgs) {                                     \
+    using ArgTuple = Tuple<Args...>;                                           \
+    using Indices = std::index_sequence_for<Args...>;                          \
+    ArgTuple fakeArgs{std::forward<TestArgs>(aArgs)...};                       \
+    patched_func_called = false;                                               \
+    return aPred(Apply(aFunc, std::forward<ArgTuple>(fakeArgs), Indices())) && \
+           patched_func_called;                                                \
+  }                                                                            \
+                                                                               \
+  /* Specialization for functions returning void */                            \
+  template <typename PredT, typename... Args, typename... TestArgs>            \
+  bool TestFunction(void(calling_convention * aFunc)(Args...), PredT,          \
+                    TestArgs&&... aArgs) {                                     \
+    using ArgTuple = Tuple<Args...>;                                           \
+    using Indices = std::index_sequence_for<Args...>;                          \
+    ArgTuple fakeArgs{std::forward<TestArgs>(aArgs)...};                       \
+    patched_func_called = false;                                               \
+    Apply(aFunc, std::forward<ArgTuple>(fakeArgs), Indices());                 \
+    return patched_func_called;                                                \
+  }
+
+// C++11 allows empty arguments to macros. clang works just fine. MSVC does the
+// right thing, but it also throws up warning C4003.
+#if defined(_MSC_VER) && !defined(__clang__)
+DEFINE_TEST_FUNCTION(__cdecl)
+#else
+DEFINE_TEST_FUNCTION()
+#endif
+
+#ifdef _M_IX86
+DEFINE_TEST_FUNCTION(__stdcall)
+DEFINE_TEST_FUNCTION(__fastcall)
+#endif  // _M_IX86
+
+// Test the hooked function against the supplied predicate
+template <typename OrigFuncT, typename PredicateT, typename... Args>
+bool CheckHook(OrigFuncT& aOrigFunc, const char* aDllName,
+               const char* aFuncName, PredicateT&& aPred, Args&&... aArgs) {
+  if (TestFunction(aOrigFunc, std::forward<PredicateT>(aPred),
+                   std::forward<Args>(aArgs)...)) {
+    printf(
+        "TEST-PASS | WindowsDllInterceptor | "
+        "Executed hooked function %s from %s\n",
+        aFuncName, aDllName);
+    fflush(stdout);
+    return true;
+  }
+  printf(
+      "TEST-FAILED | WindowsDllInterceptor | "
+      "Failed to execute hooked function %s from %s\n",
+      aFuncName, aDllName);
+  return false;
+}
+
+struct InterceptorFunction {
+  static const size_t EXEC_MEMBLOCK_SIZE = 64 * 1024;  // 64K
+
+  static InterceptorFunction& Create() {
+    // Make sure the executable memory is allocated
+    if (!sBlock) {
+      Init();
+    }
+    MOZ_ASSERT(sBlock);
+
+    // Make sure we aren't making more functions than we allocated room for
+    MOZ_RELEASE_ASSERT((sNumInstances + 1) * sizeof(InterceptorFunction) <=
+                       EXEC_MEMBLOCK_SIZE);
+
+    // Grab the next InterceptorFunction from executable memory
+    InterceptorFunction& ret = *reinterpret_cast<InterceptorFunction*>(
+        sBlock + (sNumInstances++ * sizeof(InterceptorFunction)));
+
+    // Set the InterceptorFunction to the code template.
+    auto funcCode = &ret[0];
+    memcpy(funcCode, sInterceptorTemplate, TemplateLength);
+
+    // Fill in the patched_func_called pointer in the template.
+    auto pfPtr = reinterpret_cast<bool**>(&ret[PatchedFuncCalledIndex]);
+    *pfPtr = &patched_func_called;
+    return ret;
+  }
+
+  uint8_t& operator[](size_t i) { return mFuncCode[i]; }
+
+  uint8_t* GetFunction() { return mFuncCode; }
+
+  void SetStub(uintptr_t aStub) {
+    auto pfPtr = reinterpret_cast<uintptr_t*>(&mFuncCode[StubFuncIndex]);
+    *pfPtr = aStub;
+  }
+
+ private:
+  // We intercept functions with short machine-code functions that set a boolean
+  // and run the stub that launches the original function.  Each entry in the
+  // array is the code for one of those interceptor functions.  We cannot
+  // free this memory until the test shuts down.
+  // The templates have spots for the address of patched_func_called
+  // and for the address of the stub function.  Their indices in the byte
+  // array are given as constants below and they appear as blocks of
+  // 0xff bytes in the templates.
+#if defined(_M_X64)
+  //  0: 48 b8 ff ff ff ff ff ff ff ff    movabs rax, &patched_func_called
+  //  a: c6 00 01                         mov    BYTE PTR [rax],0x1
+  //  d: 48 b8 ff ff ff ff ff ff ff ff    movabs rax, &stub_func_ptr
+  // 17: ff e0                            jmp    rax
+  static constexpr uint8_t sInterceptorTemplate[] = {
+      0x48, 0xB8, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+      0xFF, 0xC6, 0x00, 0x01, 0x48, 0xB8, 0xFF, 0xFF, 0xFF,
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0};
+  static const size_t PatchedFuncCalledIndex = 0x2;
+  static const size_t StubFuncIndex = 0xf;
+#elif defined(_M_IX86)
+  // 0: c6 05 ff ff ff ff 01     mov    BYTE PTR &patched_func_called, 0x1
+  // 7: 68 ff ff ff ff           push   &stub_func_ptr
+  // c: c3                       ret
+  static constexpr uint8_t sInterceptorTemplate[] = {
+      0xC6, 0x05, 0xFF, 0xFF, 0xFF, 0xFF, 0x01,
+      0x68, 0xFF, 0xFF, 0xFF, 0xFF, 0xC3};
+  static const size_t PatchedFuncCalledIndex = 0x2;
+  static const size_t StubFuncIndex = 0x8;
+#elif defined(_M_ARM64)
+  //  0: 31 00 80 52    movz w17, #0x1
+  //  4: 90 00 00 58    ldr  x16, #16
+  //  8: 11 02 00 39    strb w17, [x16]
+  //  c: 90 00 00 58    ldr  x16, #16
+  // 10: 00 02 1F D6    br   x16
+  // 14: &patched_func_called
+  // 1c: &stub_func_ptr
+  static constexpr uint8_t sInterceptorTemplate[] = {
+      0x31, 0x00, 0x80, 0x52, 0x90, 0x00, 0x00, 0x58, 0x11, 0x02, 0x00, 0x39,
+      0x90, 0x00, 0x00, 0x58, 0x00, 0x02, 0x1F, 0xD6, 0xFF, 0xFF, 0xFF, 0xFF,
+      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
+  static const size_t PatchedFuncCalledIndex = 0x14;
+  static const size_t StubFuncIndex = 0x1c;
+#else
+#  error "Missing template for architecture"
+#endif
+
+  static const size_t TemplateLength = sizeof(sInterceptorTemplate);
+  uint8_t mFuncCode[TemplateLength];
+
+  InterceptorFunction() = delete;
+  InterceptorFunction(const InterceptorFunction&) = delete;
+  InterceptorFunction& operator=(const InterceptorFunction&) = delete;
+
+  static void Init() {
+    MOZ_ASSERT(!sBlock);
+    sBlock = reinterpret_cast<uint8_t*>(
+        ::VirtualAlloc(nullptr, EXEC_MEMBLOCK_SIZE, MEM_RESERVE | MEM_COMMIT,
+                       PAGE_EXECUTE_READWRITE));
+  }
+
+  static uint8_t* sBlock;
+  static size_t sNumInstances;
+};
+
+uint8_t* InterceptorFunction::sBlock = nullptr;
+size_t InterceptorFunction::sNumInstances = 0;
+
+constexpr uint8_t InterceptorFunction::sInterceptorTemplate[];
+
+// Hook the function and optionally attempt calling it
+template <typename OrigFuncT, size_t N, typename PredicateT, typename... Args>
+bool TestHook(const char (&dll)[N], const char* func, PredicateT&& aPred,
+              Args&&... aArgs) {
+  auto orig_func(
+      mozilla::MakeUnique<WindowsDllInterceptor::FuncHookType<OrigFuncT>>());
+  wchar_t dllW[N];
+  std::copy(std::begin(dll), std::end(dll), std::begin(dllW));
+
+  bool successful = false;
+  WindowsDllInterceptor TestIntercept;
+  TestIntercept.Init(dll);
+
+  InterceptorFunction& interceptorFunc = InterceptorFunction::Create();
+  successful = orig_func->Set(
+      TestIntercept, func,
+      reinterpret_cast<OrigFuncT>(interceptorFunc.GetFunction()));
+
+  if (successful) {
+    interceptorFunc.SetStub(reinterpret_cast<uintptr_t>(orig_func->GetStub()));
+    printf("TEST-PASS | WindowsDllInterceptor | Could hook %s from %s\n", func,
+           dll);
+    fflush(stdout);
+    if (!aPred) {
+      printf(
+          "TEST-SKIPPED | WindowsDllInterceptor | "
+          "Will not attempt to execute patched %s.\n",
+          func);
+      fflush(stdout);
+      return true;
+    }
+
+    // Test the DLL function we just hooked.
+    HMODULE module = ::LoadLibraryW(dllW);
+    FARPROC funcAddr = ::GetProcAddress(module, func);
+    if (!funcAddr) {
+      return false;
+    }
+
+    return CheckHook(reinterpret_cast<OrigFuncT&>(funcAddr), dll, func,
+                     std::forward<PredicateT>(aPred),
+                     std::forward<Args>(aArgs)...);
+  } else {
+    printf(
+        "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Failed to hook %s from "
+        "%s\n",
+        func, dll);
+    fflush(stdout);
+
+    // Print out the function's bytes so that we can easily analyze the error.
+    nsModuleHandle mod(::LoadLibraryW(dllW));
+    FARPROC funcAddr = ::GetProcAddress(mod, func);
+    if (funcAddr) {
+      const uint32_t kNumBytesToDump =
+          WindowsDllInterceptor::GetWorstCaseRequiredBytesToPatch();
+
+      printf("\tFirst %u bytes of function:\n\t", kNumBytesToDump);
+
+      auto code = reinterpret_cast<const uint8_t*>(funcAddr);
+      for (uint32_t i = 0; i < kNumBytesToDump; ++i) {
+        char suffix = (i < (kNumBytesToDump - 1)) ? ' ' : '\n';
+        printf("%02hhX%c", code[i], suffix);
+      }
+
+      fflush(stdout);
+    }
+    return false;
+  }
+}
+
+// Detour the function and optionally attempt calling it
+template <typename OrigFuncT, size_t N, typename PredicateT>
+bool TestDetour(const char (&dll)[N], const char* func, PredicateT&& aPred) {
+  auto orig_func(
+      mozilla::MakeUnique<WindowsDllInterceptor::FuncHookType<OrigFuncT>>());
+  wchar_t dllW[N];
+  std::copy(std::begin(dll), std::end(dll), std::begin(dllW));
+
+  bool successful = false;
+  WindowsDllInterceptor TestIntercept;
+  TestIntercept.Init(dll);
+
+  InterceptorFunction& interceptorFunc = InterceptorFunction::Create();
+  successful = orig_func->Set(
+      TestIntercept, func,
+      reinterpret_cast<OrigFuncT>(interceptorFunc.GetFunction()));
+
+  if (successful) {
+    interceptorFunc.SetStub(reinterpret_cast<uintptr_t>(orig_func->GetStub()));
+    printf("TEST-PASS | WindowsDllInterceptor | Could detour %s from %s\n",
+           func, dll);
+    fflush(stdout);
+    if (!aPred) {
+      printf(
+          "TEST-SKIPPED | WindowsDllInterceptor | "
+          "Will not attempt to execute patched %s.\n",
+          func);
+      fflush(stdout);
+      return true;
+    }
+
+    // Test the DLL function we just hooked.
+    HMODULE module = ::LoadLibraryW(dllW);
+    FARPROC funcAddr = ::GetProcAddress(module, func);
+    if (!funcAddr) {
+      return false;
+    }
+
+    return CheckHook(reinterpret_cast<OrigFuncT&>(funcAddr), dll, func,
+                     std::forward<PredicateT>(aPred));
+  } else {
+    printf(
+        "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Failed to detour %s "
+        "from %s\n",
+        func, dll);
+    fflush(stdout);
+    return false;
+  }
+}
+
+// If a function pointer's type returns void*, this template converts that type
+// to return uintptr_t instead, for the purposes of predicates.
+template <typename FuncT>
+struct SubstituteForVoidPtr {
+  using Type = FuncT;
+};
+
+template <typename... Args>
+struct SubstituteForVoidPtr<void* (*)(Args...)> {
+  using Type = uintptr_t (*)(Args...);
+};
+
+#ifdef _M_IX86
+template <typename... Args>
+struct SubstituteForVoidPtr<void*(__stdcall*)(Args...)> {
+  using Type = uintptr_t(__stdcall*)(Args...);
+};
+
+template <typename... Args>
+struct SubstituteForVoidPtr<void*(__fastcall*)(Args...)> {
+  using Type = uintptr_t(__fastcall*)(Args...);
+};
+#endif  // _M_IX86
+
+// Determines the function's return type
+template <typename FuncT>
+struct ReturnType;
+
+template <typename R, typename... Args>
+struct ReturnType<R (*)(Args...)> {
+  using Type = R;
+};
+
+#ifdef _M_IX86
+template <typename R, typename... Args>
+struct ReturnType<R(__stdcall*)(Args...)> {
+  using Type = R;
+};
+
+template <typename R, typename... Args>
+struct ReturnType<R(__fastcall*)(Args...)> {
+  using Type = R;
+};
+#endif  // _M_IX86
+
+// Predicates that may be supplied during tests
+template <typename FuncT>
+struct Predicates {
+  using ArgType = typename ReturnType<FuncT>::Type;
+
+  template <ArgType CompVal>
+  static bool Equals(ArgType aValue) {
+    return CompVal == aValue;
+  }
+
+  template <ArgType CompVal>
+  static bool NotEquals(ArgType aValue) {
+    return CompVal != aValue;
+  }
+
+  template <ArgType CompVal>
+  static bool Ignore(ArgType aValue) {
+    return true;
+  }
+};
+
+// Functions that return void should be ignored, so we specialize the
+// Ignore predicate for that case. Use nullptr as the value to compare against.
+template <typename... Args>
+struct Predicates<void (*)(Args...)> {
+  template <nullptr_t DummyVal>
+  static bool Ignore() {
+    return true;
+  }
+};
+
+#ifdef _M_IX86
+template <typename... Args>
+struct Predicates<void(__stdcall*)(Args...)> {
+  template <nullptr_t DummyVal>
+  static bool Ignore() {
+    return true;
+  }
+};
+
+template <typename... Args>
+struct Predicates<void(__fastcall*)(Args...)> {
+  template <nullptr_t DummyVal>
+  static bool Ignore() {
+    return true;
+  }
+};
+#endif  // _M_IX86
+
+// The standard test. Hook |func|, and then try executing it with all zero
+// arguments, using |pred| and |comp| to determine whether the call successfully
+// executed. In general, you want set pred and comp such that they return true
+// when the function is returning whatever value is expected with all-zero
+// arguments.
+//
+// Note: When |func| returns void, you must supply |Ignore| and |nullptr| as the
+// |pred| and |comp| arguments, respectively.
+#define TEST_HOOK(dll, func, pred, comp) \
+  TestHook<decltype(&func)>(dll, #func,  \
+                            &Predicates<decltype(&func)>::pred<comp>)
+
+// We need to special-case functions that return INVALID_HANDLE_VALUE
+// (ie, CreateFile). Our template machinery for comparing values doesn't work
+// with integer constants passed as pointers (well, it works on MSVC, but not
+// clang, because that is not standard-compliant).
+#define TEST_HOOK_FOR_INVALID_HANDLE_VALUE(dll, func)                   \
+  TestHook<SubstituteForVoidPtr<decltype(&func)>::Type>(                \
+      dll, #func,                                                       \
+      &Predicates<SubstituteForVoidPtr<decltype(&func)>::Type>::Equals< \
+          uintptr_t(-1)>)
+
+// This variant allows you to explicitly supply arguments to the hooked function
+// during testing. You want to provide arguments that produce the conditions
+// that induce the function to return a value that is accepted by your
+// predicate.
+#define TEST_HOOK_PARAMS(dll, func, pred, comp, ...) \
+  TestHook<decltype(&func)>(                         \
+      dll, #func, &Predicates<decltype(&func)>::pred<comp>, __VA_ARGS__)
+
+// This is for cases when we want to hook |func|, but it is unsafe to attempt
+// to execute the function in the context of a test.
+#define TEST_HOOK_SKIP_EXEC(dll, func)                                        \
+  TestHook<decltype(&func)>(                                                  \
+      dll, #func,                                                             \
+      reinterpret_cast<bool (*)(typename ReturnType<decltype(&func)>::Type)>( \
+          NULL))
+
+// The following three variants are identical to the previous macros,
+// however the forcibly use a Detour on 32-bit Windows. On 64-bit Windows,
+// these macros are identical to their TEST_HOOK variants.
+#define TEST_DETOUR(dll, func, pred, comp) \
+  TestDetour<decltype(&func)>(dll, #func,  \
+                              &Predicates<decltype(&func)>::pred<comp>)
+
+#define TEST_DETOUR_PARAMS(dll, func, pred, comp, ...) \
+  TestDetour<decltype(&func)>(                         \
+      dll, #func, &Predicates<decltype(&func)>::pred<comp>, __VA_ARGS__)
+
+#define TEST_DETOUR_SKIP_EXEC(dll, func)                                      \
+  TestDetour<decltype(&func)>(                                                \
+      dll, #func,                                                             \
+      reinterpret_cast<bool (*)(typename ReturnType<decltype(&func)>::Type)>( \
+          NULL))
+
+template <typename OrigFuncT, size_t N, typename PredicateT, typename... Args>
+bool MaybeTestHook(const bool cond, const char (&dll)[N], const char* func,
+                   PredicateT&& aPred, Args&&... aArgs) {
+  if (!cond) {
+    printf(
+        "TEST-SKIPPED | WindowsDllInterceptor | Skipped hook test for %s from "
+        "%s\n",
+        func, dll);
+    fflush(stdout);
+    return true;
+  }
+
+  return TestHook<OrigFuncT>(dll, func, std::forward<PredicateT>(aPred),
+                             std::forward<Args>(aArgs)...);
+}
+
+// Like TEST_HOOK, but the test is only executed when cond is true.
+#define MAYBE_TEST_HOOK(cond, dll, func, pred, comp) \
+  MaybeTestHook<decltype(&func)>(cond, dll, #func,   \
+                                 &Predicates<decltype(&func)>::pred<comp>)
+
+#define MAYBE_TEST_HOOK_PARAMS(cond, dll, func, pred, comp, ...) \
+  MaybeTestHook<decltype(&func)>(                                \
+      cond, dll, #func, &Predicates<decltype(&func)>::pred<comp>, __VA_ARGS__)
+
+#define MAYBE_TEST_HOOK_SKIP_EXEC(cond, dll, func)                            \
+  MaybeTestHook<decltype(&func)>(                                             \
+      cond, dll, #func,                                                       \
+      reinterpret_cast<bool (*)(typename ReturnType<decltype(&func)>::Type)>( \
+          NULL))
+
+bool ShouldTestTipTsf() {
+  if (!IsWin8OrLater()) {
+    return false;
+  }
+
+  mozilla::DynamicallyLinkedFunctionPtr<decltype(&SHGetKnownFolderPath)>
+      pSHGetKnownFolderPath(L"shell32.dll", "SHGetKnownFolderPath");
+  if (!pSHGetKnownFolderPath) {
+    return false;
+  }
+
+  PWSTR commonFilesPath = nullptr;
+  if (FAILED(pSHGetKnownFolderPath(FOLDERID_ProgramFilesCommon, 0, nullptr,
+                                   &commonFilesPath))) {
+    return false;
+  }
+
+  wchar_t fullPath[MAX_PATH + 1] = {};
+  wcscpy(fullPath, commonFilesPath);
+  wcscat(fullPath, L"\\Microsoft Shared\\Ink\\tiptsf.dll");
+  CoTaskMemFree(commonFilesPath);
+
+  if (!LoadLibraryW(fullPath)) {
+    return false;
+  }
+
+  // Leak the module so that it's loaded for the interceptor test
+  return true;
+}
+
+static const wchar_t gEmptyUnicodeStringLiteral[] = L"";
+static UNICODE_STRING gEmptyUnicodeString;
+static BOOLEAN gIsPresent;
+
+bool HasApiSetQueryApiSetPresence() {
+  mozilla::DynamicallyLinkedFunctionPtr<decltype(&ApiSetQueryApiSetPresence)>
+      func(L"Api-ms-win-core-apiquery-l1-1-0.dll", "ApiSetQueryApiSetPresence");
+  if (!func) {
+    return false;
+  }
+
+  // Prepare gEmptyUnicodeString for the test
+  ::RtlInitUnicodeString(&gEmptyUnicodeString, gEmptyUnicodeStringLiteral);
+
+  return true;
+}
+
+// Set this to true to test function unhooking.
+const bool ShouldTestUnhookFunction = false;
+
+#if defined(_M_X64) || defined(_M_ARM64)
+
+// Use VMSharingPolicyUnique for the ShortInterceptor, as it needs to
+// reserve its trampoline memory in a special location.
+using ShortInterceptor = mozilla::interceptor::WindowsDllInterceptor<
+    mozilla::interceptor::VMSharingPolicyUnique<
+        mozilla::interceptor::MMPolicyInProcess>>;
+
+static ShortInterceptor::FuncHookType<decltype(&::NtMapViewOfSection)>
+    orig_NtMapViewOfSection;
+
+#endif  // defined(_M_X64) || defined(_M_ARM64)
+
+bool TestShortDetour() {
+#if defined(_M_X64) || defined(_M_ARM64)
+  auto pNtMapViewOfSection = reinterpret_cast<decltype(&::NtMapViewOfSection)>(
+      ::GetProcAddress(::GetModuleHandleW(L"ntdll.dll"), "NtMapViewOfSection"));
+  if (!pNtMapViewOfSection) {
+    printf(
+        "TEST-FAILED | WindowsDllInterceptor | "
+        "Failed to resolve ntdll!NtMapViewOfSection\n");
+    fflush(stdout);
+    return false;
+  }
+
+  {  // Scope for shortInterceptor
+    ShortInterceptor shortInterceptor;
+    shortInterceptor.TestOnlyDetourInit(
+        L"ntdll.dll",
+        mozilla::interceptor::DetourFlags::eTestOnlyForceShortPatch);
+
+    InterceptorFunction& interceptorFunc = InterceptorFunction::Create();
+    if (!orig_NtMapViewOfSection.SetDetour(
+            shortInterceptor, "NtMapViewOfSection",
+            reinterpret_cast<decltype(&::NtMapViewOfSection)>(
+                interceptorFunc.GetFunction()))) {
+      printf(
+          "TEST-FAILED | WindowsDllInterceptor | "
+          "Failed to hook ntdll!NtMapViewOfSection via 10-byte patch\n");
+      fflush(stdout);
+      return false;
+    }
+
+    interceptorFunc.SetStub(
+        reinterpret_cast<uintptr_t>(orig_NtMapViewOfSection.GetStub()));
+
+    auto pred =
+        &Predicates<decltype(&::NtMapViewOfSection)>::Ignore<((NTSTATUS)0)>;
+
+    if (!CheckHook(pNtMapViewOfSection, "ntdll.dll", "NtMapViewOfSection",
+                   pred)) {
+      // CheckHook has already printed the error message for us
+      return false;
+    }
+  }
+
+  // Now ensure that our hook cleanup worked
+  if (ShouldTestUnhookFunction) {
+    NTSTATUS status =
+        pNtMapViewOfSection(nullptr, nullptr, nullptr, 0, 0, nullptr, nullptr,
+                            ((SECTION_INHERIT)0), 0, 0);
+    if (NT_SUCCESS(status)) {
+      printf(
+          "TEST-FAILED | WindowsDllInterceptor | "
+          "Unexpected successful call to ntdll!NtMapViewOfSection after "
+          "removing short-patched hook\n");
+      fflush(stdout);
+      return false;
+    }
+
+    printf(
+        "TEST-PASS | WindowsDllInterceptor | "
+        "Successfully unhooked ntdll!NtMapViewOfSection via short patch\n");
+    fflush(stdout);
+  }
+
+  return true;
+#else
+  return true;
+#endif
+}
+
+constexpr uintptr_t NoStubAddressCheck = 0;
+constexpr uintptr_t ExpectedFail = 1;
+struct TestCase {
+  const char* mFunctionName;
+  uintptr_t mExpectedStub;
+  bool mPatchedOnce;
+  explicit TestCase(const char* aFunctionName, uintptr_t aExpectedStub)
+      : mFunctionName(aFunctionName),
+        mExpectedStub(aExpectedStub),
+        mPatchedOnce(false) {}
+} g_AssemblyTestCases[] = {
+#if defined(__clang__)
+// We disable these testcases because the code coverage instrumentation injects
+// code in a way that WindowsDllInterceptor doesn't understand.
+#  ifndef MOZ_CODE_COVERAGE
+#    if defined(_M_X64)
+    // Since we have PatchIfTargetIsRecognizedTrampoline for x64, we expect the
+    // original jump destination is returned as a stub.
+    TestCase("MovPushRet", JumpDestination),
+    TestCase("MovRaxJump", JumpDestination),
+    TestCase("DoubleJump", JumpDestination),
+
+    // Passing NoStubAddressCheck as the following testcases return
+    // a trampoline address instead of the original destination.
+    TestCase("NearJump", NoStubAddressCheck),
+    TestCase("OpcodeFF", NoStubAddressCheck),
+    TestCase("IndirectCall", NoStubAddressCheck),
+    TestCase("MovImm64", NoStubAddressCheck),
+#    elif defined(_M_IX86)
+    // Skip the stub address check as we always generate a trampoline for x86.
+    TestCase("PushRet", NoStubAddressCheck),
+    TestCase("MovEaxJump", NoStubAddressCheck),
+    TestCase("DoubleJump", NoStubAddressCheck),
+    TestCase("Opcode83", NoStubAddressCheck),
+    TestCase("LockPrefix", NoStubAddressCheck),
+    TestCase("LooksLikeLockPrefix", NoStubAddressCheck),
+#    endif
+#    if !defined(DEBUG)
+    // Skip on Debug build because it hits MOZ_ASSERT_UNREACHABLE.
+    TestCase("UnsupportedOp", ExpectedFail),
+#    endif  // !defined(DEBUG)
+#  endif    // MOZ_CODE_COVERAGE
+#endif      // defined(__clang__)
+};
+
+template <typename InterceptorType>
+bool TestAssemblyFunctions() {
+  static const auto patchedFunction = []() { patched_func_called = true; };
+
+  InterceptorType interceptor;
+  interceptor.Init("TestDllInterceptor.exe");
+
+  for (auto& testCase : g_AssemblyTestCases) {
+    if (testCase.mExpectedStub == NoStubAddressCheck && testCase.mPatchedOnce) {
+      // For the testcases with NoStubAddressCheck, we revert a hook by
+      // jumping into the original stub, which is not detourable again.
+      continue;
+    }
+
+    typename InterceptorType::template FuncHookType<void (*)()> hook;
+    bool result =
+        hook.Set(interceptor, testCase.mFunctionName, patchedFunction);
+    if (testCase.mExpectedStub == ExpectedFail) {
+      if (result) {
+        printf(
+            "TEST-FAILED | WindowsDllInterceptor | "
+            "Unexpectedly succeeded to detour %s.\n",
+            testCase.mFunctionName);
+        return false;
+      }
+#if defined(NIGHTLY_BUILD)
+      const Maybe<DetourError>& maybeError = interceptor.GetLastDetourError();
+      if (maybeError.isNothing()) {
+        printf(
+            "TEST-FAILED | WindowsDllInterceptor | "
+            "DetourError was not set on detour error.\n");
+        return false;
+      }
+      if (maybeError.ref().mErrorCode !=
+          DetourResultCode::DETOUR_PATCHER_CREATE_TRAMPOLINE_ERROR) {
+        printf(
+            "TEST-FAILED | WindowsDllInterceptor | "
+            "A wrong detour errorcode was set on detour error.\n");
+        return false;
+      }
+#endif  // defined(NIGHTLY_BUILD)
+      printf("TEST-PASS | WindowsDllInterceptor | %s\n",
+             testCase.mFunctionName);
+      continue;
+    }
+
+    if (!result) {
+      printf(
+          "TEST-FAILED | WindowsDllInterceptor | "
+          "Failed to detour %s.\n",
+          testCase.mFunctionName);
+      return false;
+    }
+
+    testCase.mPatchedOnce = true;
+
+    const auto actualStub = reinterpret_cast<uintptr_t>(hook.GetStub());
+    if (testCase.mExpectedStub != NoStubAddressCheck &&
+        actualStub != testCase.mExpectedStub) {
+      printf(
+          "TEST-FAILED | WindowsDllInterceptor | "
+          "Wrong stub was backed up for %s: %zx\n",
+          testCase.mFunctionName, actualStub);
+      return false;
+    }
+
+    patched_func_called = false;
+
+    auto originalFunction = reinterpret_cast<void (*)()>(
+        GetProcAddress(GetModuleHandleW(nullptr), testCase.mFunctionName));
+    originalFunction();
+
+    if (!patched_func_called) {
+      printf(
+          "TEST-FAILED | WindowsDllInterceptor | "
+          "Hook from %s was not called\n",
+          testCase.mFunctionName);
+      return false;
+    }
+
+    printf("TEST-PASS | WindowsDllInterceptor | %s\n", testCase.mFunctionName);
+  }
+
+  return true;
+}
+
+bool TestDynamicCodePolicy() {
+  if (!IsWin8Point1OrLater()) {
+    // Skip if a platform does not support this policy.
+    return true;
+  }
+
+  PROCESS_MITIGATION_DYNAMIC_CODE_POLICY policy = {};
+  policy.ProhibitDynamicCode = true;
+
+  mozilla::DynamicallyLinkedFunctionPtr<decltype(&SetProcessMitigationPolicy)>
+      pSetProcessMitigationPolicy(L"kernel32.dll",
+                                  "SetProcessMitigationPolicy");
+  if (!pSetProcessMitigationPolicy) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | "
+        "SetProcessMitigationPolicy does not exist.\n");
+    fflush(stdout);
+    return false;
+  }
+
+  if (!pSetProcessMitigationPolicy(ProcessDynamicCodePolicy, &policy,
+                                   sizeof(policy))) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | "
+        "Fail to enable ProcessDynamicCodePolicy.\n");
+    fflush(stdout);
+    return false;
+  }
+
+  WindowsDllInterceptor ExeIntercept;
+  ExeIntercept.Init("TestDllInterceptor.exe");
+
+  // Make sure we fail to hook a function if ProcessDynamicCodePolicy is on
+  // because we cannot create an executable trampoline region.
+  if (orig_payloadNotHooked.Set(ExeIntercept, "payloadNotHooked",
+                                &patched_rotatePayload)) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | "
+        "ProcessDynamicCodePolicy is not working.\n");
+    fflush(stdout);
+    return false;
+  }
+
+  printf(
+      "TEST-PASS | WindowsDllInterceptor | "
+      "Successfully passed TestDynamicCodePolicy.\n");
+  fflush(stdout);
+  return true;
+}
+
+extern "C" int wmain(int argc, wchar_t* argv[]) {
+  LARGE_INTEGER start;
+  QueryPerformanceCounter(&start);
+
+  // We disable this part of the test because the code coverage instrumentation
+  // injects code in rotatePayload in a way that WindowsDllInterceptor doesn't
+  // understand.
+#ifndef MOZ_CODE_COVERAGE
+  payload initial = {0x12345678, 0xfc4e9d31, 0x87654321};
+  payload p0, p1;
+  ZeroMemory(&p0, sizeof(p0));
+  ZeroMemory(&p1, sizeof(p1));
+
+  p0 = rotatePayload(initial);
+
+  {
+    WindowsDllInterceptor ExeIntercept;
+    ExeIntercept.Init("TestDllInterceptor.exe");
+    if (orig_rotatePayload.Set(ExeIntercept, "rotatePayload",
+                               &patched_rotatePayload)) {
+      printf("TEST-PASS | WindowsDllInterceptor | Hook added\n");
+      fflush(stdout);
+    } else {
+      printf(
+          "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Failed to add "
+          "hook\n");
+      fflush(stdout);
+      return 1;
+    }
+
+    p1 = rotatePayload(initial);
+
+    if (patched_func_called) {
+      printf("TEST-PASS | WindowsDllInterceptor | Hook called\n");
+      fflush(stdout);
+    } else {
+      printf(
+          "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Hook was not "
+          "called\n");
+      fflush(stdout);
+      return 1;
+    }
+
+    if (p0 == p1) {
+      printf("TEST-PASS | WindowsDllInterceptor | Hook works properly\n");
+      fflush(stdout);
+    } else {
+      printf(
+          "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Hook didn't return "
+          "the right information\n");
+      fflush(stdout);
+      return 1;
+    }
+  }
+
+  patched_func_called = false;
+  ZeroMemory(&p1, sizeof(p1));
+
+  p1 = rotatePayload(initial);
+
+  if (ShouldTestUnhookFunction != patched_func_called) {
+    printf(
+        "TEST-PASS | WindowsDllInterceptor | Hook was %scalled after "
+        "unregistration\n",
+        ShouldTestUnhookFunction ? "not " : "");
+    fflush(stdout);
+  } else {
+    printf(
+        "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Hook was %scalled "
+        "after unregistration\n",
+        ShouldTestUnhookFunction ? "" : "not ");
+    fflush(stdout);
+    return 1;
+  }
+
+  if (p0 == p1) {
+    printf(
+        "TEST-PASS | WindowsDllInterceptor | Original function worked "
+        "properly\n");
+    fflush(stdout);
+  } else {
+    printf(
+        "TEST-UNEXPECTED-FAIL | WindowsDllInterceptor | Original function "
+        "didn't return the right information\n");
+    fflush(stdout);
+    return 1;
+  }
+#endif
+
+  CredHandle credHandle;
+  memset(&credHandle, 0, sizeof(CredHandle));
+  OBJECT_ATTRIBUTES attributes = {};
+
+  // NB: These tests should be ordered such that lower-level APIs are tested
+  // before higher-level APIs.
+  if (TestShortDetour() &&
+  // Run <ShortInterceptor> first because <WindowsDllInterceptor>
+  // does not clean up hooks.
+#if defined(_M_X64)
+      TestAssemblyFunctions<ShortInterceptor>() &&
+#endif
+      TestAssemblyFunctions<WindowsDllInterceptor>() &&
+#ifdef _M_IX86
+      // We keep this test to hook complex code on x86. (Bug 850957)
+      TEST_HOOK("ntdll.dll", NtFlushBuffersFile, NotEquals, 0) &&
+#endif
+      TEST_HOOK("ntdll.dll", NtCreateFile, NotEquals, 0) &&
+      TEST_HOOK("ntdll.dll", NtReadFile, NotEquals, 0) &&
+      TEST_HOOK("ntdll.dll", NtReadFileScatter, NotEquals, 0) &&
+      TEST_HOOK("ntdll.dll", NtWriteFile, NotEquals, 0) &&
+      TEST_HOOK("ntdll.dll", NtWriteFileGather, NotEquals, 0) &&
+      TEST_HOOK_PARAMS("ntdll.dll", NtQueryFullAttributesFile, NotEquals, 0,
+                       &attributes, nullptr) &&
+      TEST_DETOUR_SKIP_EXEC("ntdll.dll", LdrLoadDll) &&
+      TEST_HOOK("ntdll.dll", LdrUnloadDll, NotEquals, 0) &&
+      MAYBE_TEST_HOOK_SKIP_EXEC(IsWin8OrLater(), "ntdll.dll",
+                                LdrResolveDelayLoadedAPI) &&
+      MAYBE_TEST_HOOK_PARAMS(HasApiSetQueryApiSetPresence(),
+                             "Api-ms-win-core-apiquery-l1-1-0.dll",
+                             ApiSetQueryApiSetPresence, Equals, FALSE,
+                             &gEmptyUnicodeString, &gIsPresent) &&
+      TEST_HOOK("kernelbase.dll", QueryDosDeviceW, Equals, 0) &&
+      TEST_HOOK("kernel32.dll", GetFileAttributesW, Equals,
+                INVALID_FILE_ATTRIBUTES) &&
+#if !defined(_M_ARM64)
+#  ifndef MOZ_ASAN
+      // Bug 733892: toolkit/crashreporter/nsExceptionHandler.cpp
+      // This fails on ASan because the ASan runtime already hooked this
+      // function
+      TEST_HOOK("kernel32.dll", SetUnhandledExceptionFilter, Ignore, nullptr) &&
+#  endif
+#endif  // !defined(_M_ARM64)
+#ifdef _M_IX86
+      TEST_HOOK_FOR_INVALID_HANDLE_VALUE("kernel32.dll", CreateFileW) &&
+#endif
+#if !defined(_M_ARM64)
+      TEST_HOOK_FOR_INVALID_HANDLE_VALUE("kernel32.dll", CreateFileA) &&
+#endif  // !defined(_M_ARM64)
+#if !defined(_M_ARM64)
+      TEST_HOOK("kernel32.dll", TlsAlloc, NotEquals, TLS_OUT_OF_INDEXES) &&
+      TEST_HOOK_PARAMS("kernel32.dll", TlsFree, Equals, FALSE,
+                       TLS_OUT_OF_INDEXES) &&
+      TEST_HOOK("kernel32.dll", CloseHandle, Equals, FALSE) &&
+      TEST_HOOK("kernel32.dll", DuplicateHandle, Equals, FALSE) &&
+#endif  // !defined(_M_ARM64)
+      TEST_DETOUR_SKIP_EXEC("kernel32.dll", BaseThreadInitThunk) &&
+#if defined(_M_X64) || defined(_M_ARM64)
+      MAYBE_TEST_HOOK(!IsWin8OrLater(), "kernel32.dll",
+                      RtlInstallFunctionTableCallback, Equals, FALSE) &&
+      TEST_HOOK("user32.dll", GetKeyState, Ignore, 0) &&  // see Bug 1316415
+#endif
+      TEST_HOOK("user32.dll", GetWindowInfo, Equals, FALSE) &&
+      TEST_HOOK("user32.dll", TrackPopupMenu, Equals, FALSE) &&
+      TEST_DETOUR("user32.dll", CreateWindowExW, Equals, nullptr) &&
+      TEST_HOOK("user32.dll", InSendMessageEx, Equals, ISMEX_NOSEND) &&
+      TEST_HOOK("user32.dll", SendMessageTimeoutW, Equals, 0) &&
+      TEST_HOOK("user32.dll", SetCursorPos, NotEquals, FALSE) &&
+#if !defined(_M_ARM64)
+      TEST_HOOK("imm32.dll", ImmGetContext, Equals, nullptr) &&
+#endif  // !defined(_M_ARM64)
+      TEST_HOOK("imm32.dll", ImmGetCompositionStringW, Ignore, 0) &&
+      TEST_HOOK_SKIP_EXEC("imm32.dll", ImmSetCandidateWindow) &&
+      TEST_HOOK("imm32.dll", ImmNotifyIME, Equals, 0) &&
+      TEST_HOOK("comdlg32.dll", GetSaveFileNameW, Ignore, FALSE) &&
+      TEST_HOOK("comdlg32.dll", GetOpenFileNameW, Ignore, FALSE) &&
+#if defined(_M_X64)
+      TEST_HOOK("comdlg32.dll", PrintDlgW, Ignore, 0) &&
+#endif
+      MAYBE_TEST_HOOK(ShouldTestTipTsf(), "tiptsf.dll", ProcessCaretEvents,
+                      Ignore, nullptr) &&
+      TEST_HOOK("wininet.dll", InternetOpenA, NotEquals, nullptr) &&
+      TEST_HOOK("wininet.dll", InternetCloseHandle, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", InternetConnectA, Equals, nullptr) &&
+      TEST_HOOK("wininet.dll", InternetQueryDataAvailable, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", InternetReadFile, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", InternetWriteFile, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", InternetSetOptionA, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", HttpAddRequestHeadersA, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", HttpOpenRequestA, Equals, nullptr) &&
+      TEST_HOOK("wininet.dll", HttpQueryInfoA, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", HttpSendRequestA, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", HttpSendRequestExA, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", HttpEndRequestA, Equals, FALSE) &&
+      TEST_HOOK("wininet.dll", InternetQueryOptionA, Equals, FALSE) &&
+      TEST_HOOK("sspicli.dll", AcquireCredentialsHandleA, NotEquals,
+                SEC_E_OK) &&
+      TEST_HOOK_PARAMS("sspicli.dll", QueryCredentialsAttributesA, Equals,
+                       SEC_E_INVALID_HANDLE, &credHandle, 0, nullptr) &&
+      TEST_HOOK_PARAMS("sspicli.dll", FreeCredentialsHandle, Equals,
+                       SEC_E_INVALID_HANDLE, &credHandle) &&
+      // Run TestDynamicCodePolicy() at the end because the policy is
+      // irreversible.
+      TestDynamicCodePolicy()) {
+    printf("TEST-PASS | WindowsDllInterceptor | all checks passed\n");
+
+    LARGE_INTEGER end, freq;
+    QueryPerformanceCounter(&end);
+
+    QueryPerformanceFrequency(&freq);
+
+    LARGE_INTEGER result;
+    result.QuadPart = end.QuadPart - start.QuadPart;
+    result.QuadPart *= 1000000;
+    result.QuadPart /= freq.QuadPart;
+
+    printf("Elapsed time: %lld microseconds\n", result.QuadPart);
+
+    return 0;
+  }
+
+  return 1;
+}
diff --git a/mozglue/tests/interceptor/TestDllInterceptor.exe.manifest b/mozglue/tests/interceptor/TestDllInterceptor.exe.manifest
new file mode 100644
index 0000000000..11287012c5
--- /dev/null
+++ b/mozglue/tests/interceptor/TestDllInterceptor.exe.manifest
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1"
+          manifestVersion="1.0"
+          xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
+  <assemblyIdentity type="win32"
+                    name="TestDllInterceptor"
+                    version="1.0.0.0" />
+  <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
+    <application>
+      <!-- Need this to use functions in WindowsVersion.h -->
+      <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/> <!-- Win10 -->
+      <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/> <!-- Win8.1 -->
+      <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/> <!-- Win8 -->
+      <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/> <!-- Win7 -->
+    </application>
+  </compatibility>
+</assembly>
diff --git a/mozglue/tests/interceptor/TestDllInterceptorCrossProcess.cpp b/mozglue/tests/interceptor/TestDllInterceptorCrossProcess.cpp
new file mode 100644
index 0000000000..5bba4b1f8c
--- /dev/null
+++ b/mozglue/tests/interceptor/TestDllInterceptorCrossProcess.cpp
@@ -0,0 +1,159 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Attributes.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/CmdLineAndEnvUtils.h"
+#include "nsWindowsDllInterceptor.h"
+#include "nsWindowsHelpers.h"
+
+#include <string>
+
+using std::wstring;
+
+extern "C" __declspec(dllexport) int ReturnResult() { return 2; }
+
+static mozilla::CrossProcessDllInterceptor::FuncHookType<decltype(
+    &ReturnResult)>
+    gOrigReturnResult;
+
+static int ReturnResultHook() {
+  if (gOrigReturnResult() != 2) {
+    return 3;
+  }
+
+  return 0;
+}
+
+int ParentMain(int argc, wchar_t* argv[]) {
+  mozilla::SetArgv0ToFullBinaryPath(argv);
+
+  // We'll add the child process to a job so that, in the event of a failure in
+  // this parent process, the child process will be automatically terminated.
+  nsAutoHandle job(::CreateJobObjectW(nullptr, nullptr));
+  if (!job) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Job creation "
+        "failed\n");
+    return 1;
+  }
+
+  JOBOBJECT_EXTENDED_LIMIT_INFORMATION jobInfo = {};
+  jobInfo.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE;
+
+  if (!::SetInformationJobObject(job.get(), JobObjectExtendedLimitInformation,
+                                 &jobInfo, sizeof(jobInfo))) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Job config "
+        "failed\n");
+    return 1;
+  }
+
+  wchar_t childArgv_1[] = L"-child";
+
+  wchar_t* childArgv[] = {argv[0], childArgv_1};
+
+  mozilla::UniquePtr<wchar_t[]> cmdLine(
+      mozilla::MakeCommandLine(mozilla::ArrayLength(childArgv), childArgv));
+
+  STARTUPINFOW si = {sizeof(si)};
+  PROCESS_INFORMATION pi;
+  if (!::CreateProcessW(argv[0], cmdLine.get(), nullptr, nullptr, FALSE,
+                        CREATE_SUSPENDED, nullptr, nullptr, &si, &pi)) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Failed to spawn "
+        "child process\n");
+    return 1;
+  }
+
+  nsAutoHandle childProcess(pi.hProcess);
+  nsAutoHandle childMainThread(pi.hThread);
+
+  if (!::AssignProcessToJobObject(job.get(), childProcess.get())) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Failed to assign "
+        "child process to job\n");
+    ::TerminateProcess(childProcess.get(), 1);
+    return 1;
+  }
+
+  mozilla::nt::CrossExecTransferManager transferMgr(childProcess);
+  if (!transferMgr) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | "
+        "CrossExecTransferManager instantiation failed.\n");
+    return 1;
+  }
+
+  mozilla::CrossProcessDllInterceptor intcpt(childProcess.get());
+  intcpt.Init("TestDllInterceptorCrossProcess.exe");
+
+  if (!gOrigReturnResult.Set(transferMgr, intcpt, "ReturnResult",
+                             &ReturnResultHook)) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Failed to add "
+        "hook\n");
+    return 1;
+  }
+
+  printf("TEST-PASS | DllInterceptorCrossProcess | Hook added\n");
+
+  if (::ResumeThread(childMainThread.get()) == static_cast<DWORD>(-1)) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Failed to resume "
+        "child thread\n");
+    return 1;
+  }
+
+  BOOL remoteDebugging;
+  bool debugging =
+      ::IsDebuggerPresent() ||
+      (::CheckRemoteDebuggerPresent(childProcess.get(), &remoteDebugging) &&
+       remoteDebugging);
+
+  DWORD waitResult =
+      ::WaitForSingleObject(childProcess.get(), debugging ? INFINITE : 60000);
+  if (waitResult != WAIT_OBJECT_0) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Child process "
+        "failed to finish\n");
+    return 1;
+  }
+
+  DWORD childExitCode;
+  if (!::GetExitCodeProcess(childProcess.get(), &childExitCode)) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Failed to obtain "
+        "child process exit code\n");
+    return 1;
+  }
+
+  if (childExitCode) {
+    printf(
+        "TEST-UNEXPECTED-FAIL | DllInterceptorCrossProcess | Child process "
+        "exit code is %lu instead of 0\n",
+        childExitCode);
+    return 1;
+  }
+
+  printf(
+      "TEST-PASS | DllInterceptorCrossProcess | Child process exit code is "
+      "zero\n");
+  return 0;
+}
+
+extern "C" int wmain(int argc, wchar_t* argv[]) {
+  if (argc > 1) {
+    // clang keeps inlining this call despite every attempt to force it to do
+    // otherwise. We'll use GetProcAddress and call its function pointer
+    // instead.
+    auto pReturnResult = reinterpret_cast<decltype(&ReturnResult)>(
+        ::GetProcAddress(::GetModuleHandleW(nullptr), "ReturnResult"));
+    return pReturnResult();
+  }
+
+  return ParentMain(argc, argv);
+}
diff --git a/mozglue/tests/interceptor/TestIATPatcher.cpp b/mozglue/tests/interceptor/TestIATPatcher.cpp
new file mode 100644
index 0000000000..4dfb81ee9d
--- /dev/null
+++ b/mozglue/tests/interceptor/TestIATPatcher.cpp
@@ -0,0 +1,121 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Assertions.h"
+#include "mozilla/DynamicallyLinkedFunctionPtr.h"
+#include "nsWindowsDllInterceptor.h"
+#include "nsWindowsHelpers.h"
+
+#include <shlwapi.h>
+
+static int NormalImport() { return ::GetSystemMetrics(SM_CYCAPTION); }
+
+static bool DelayLoadImport() {
+  return !!::UrlIsW(L"http://example.com/", URLIS_FILEURL);
+}
+
+static mozilla::WindowsIATPatcher::FuncHookType<decltype(&::GetSystemMetrics)>
+    gGetSystemMetricsHook;
+
+static mozilla::WindowsIATPatcher::FuncHookType<decltype(&::MessageBoxA)>
+    gMessageBoxAHook;
+
+static mozilla::WindowsIATPatcher::FuncHookType<decltype(&::UrlIsW)> gUrlIsHook;
+
+static bool gGetSystemMetricsHookCalled = false;
+
+static int WINAPI GetSystemMetricsHook(int aIndex) {
+  MOZ_DIAGNOSTIC_ASSERT(aIndex == SM_CYCAPTION);
+  gGetSystemMetricsHookCalled = true;
+  return 0;
+}
+
+static bool gUrlIsHookCalled = false;
+
+static BOOL WINAPI UrlIsWHook(PCWSTR aUrl, URLIS aFlags) {
+  gUrlIsHookCalled = true;
+  return TRUE;
+}
+
+static HMODULE GetStrongReferenceToExeModule() {
+  HMODULE result;
+  if (!::GetModuleHandleExW(0, nullptr, &result)) {
+    return nullptr;
+  }
+
+  return result;
+}
+
+#define PRINT_FAIL(msg) printf("TEST-UNEXPECTED-FAIL | IATPatcher | " msg "\n")
+
+extern "C" int wmain(int argc, wchar_t* argv[]) {
+  nsModuleHandle ourModule1(GetStrongReferenceToExeModule());
+  if (!ourModule1) {
+    PRINT_FAIL("Failed obtaining HMODULE for executable");
+    return 1;
+  }
+
+  if (!gGetSystemMetricsHook.Set(ourModule1, "user32.dll", "GetSystemMetrics",
+                                 &GetSystemMetricsHook)) {
+    PRINT_FAIL("Failed setting GetSystemMetrics hook");
+    return 1;
+  }
+
+  if (NormalImport() || !gGetSystemMetricsHookCalled) {
+    PRINT_FAIL("GetSystemMetrics hook was not called");
+    return 1;
+  }
+
+  static const mozilla::StaticDynamicallyLinkedFunctionPtr<decltype(
+      &::GetSystemMetrics)>
+      pRealGetSystemMetrics(L"user32.dll", "GetSystemMetrics");
+  if (!pRealGetSystemMetrics) {
+    PRINT_FAIL("Failed resolving real GetSystemMetrics pointer");
+    return 1;
+  }
+
+  if (gGetSystemMetricsHook.GetStub() != pRealGetSystemMetrics) {
+    PRINT_FAIL(
+        "GetSystemMetrics hook stub pointer does not match real "
+        "GetSystemMetrics pointer");
+    return 1;
+  }
+
+  nsModuleHandle ourModule2(GetStrongReferenceToExeModule());
+  if (!ourModule2) {
+    PRINT_FAIL("Failed obtaining HMODULE for executable");
+    return 1;
+  }
+
+  // This should fail becuase the test never calls, and thus never imports,
+  // MessageBoxA
+  if (gMessageBoxAHook.Set(ourModule2, "user32.dll", "MessageBoxA", nullptr)) {
+    PRINT_FAIL("Setting MessageBoxA hook succeeded when it should have failed");
+    return 1;
+  }
+
+  nsModuleHandle ourModule3(GetStrongReferenceToExeModule());
+  if (!ourModule3) {
+    PRINT_FAIL("Failed obtaining HMODULE for executable");
+    return 1;
+  }
+
+  // These tests involve a delay-loaded import, which are not supported; we
+  // expect these tests to FAIL.
+
+  if (gUrlIsHook.Set(ourModule3, "shlwapi.dll", "UrlIsW", &UrlIsWHook)) {
+    PRINT_FAIL("gUrlIsHook.Set should have failed");
+    return 1;
+  }
+
+  if (DelayLoadImport() || gUrlIsHookCalled) {
+    PRINT_FAIL("gUrlIsHook should not have been called");
+    return 1;
+  }
+
+  printf("TEST-PASS | IATPatcher | All tests passed.\n");
+  return 0;
+}
diff --git a/mozglue/tests/interceptor/TestMMPolicy.cpp b/mozglue/tests/interceptor/TestMMPolicy.cpp
new file mode 100644
index 0000000000..9bb50f683b
--- /dev/null
+++ b/mozglue/tests/interceptor/TestMMPolicy.cpp
@@ -0,0 +1,198 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsWindowsDllInterceptor.h"
+
+#include <functional>
+
+mozilla::interceptor::MMPolicyInProcess gPolicy;
+
+void DepleteVirtualAddress(
+    uint8_t* aStart, size_t aSize,
+    const std::function<void(void*)>& aPostAllocCallback) {
+  const DWORD granularity = gPolicy.GetAllocGranularity();
+  if (aStart == 0 || aSize < granularity) {
+    return;
+  }
+
+  uint8_t* alignedStart = reinterpret_cast<uint8_t*>(
+      (((reinterpret_cast<uintptr_t>(aStart) - 1) / granularity) + 1) *
+      granularity);
+  aSize -= (alignedStart - aStart);
+  if (auto p = VirtualAlloc(alignedStart, aSize, MEM_RESERVE, PAGE_NOACCESS)) {
+    aPostAllocCallback(p);
+    return;
+  }
+
+  uintptr_t mask = ~(static_cast<uintptr_t>(granularity) - 1);
+  size_t halfSize = (aSize >> 1) & mask;
+  if (halfSize == 0) {
+    return;
+  }
+
+  DepleteVirtualAddress(aStart, halfSize, aPostAllocCallback);
+  DepleteVirtualAddress(aStart + halfSize, aSize - halfSize,
+                        aPostAllocCallback);
+}
+
+bool ValidateFreeRegion(LPVOID aRegion, size_t aDesiredLen) {
+  MEMORY_BASIC_INFORMATION mbi;
+  if (VirtualQuery(aRegion, &mbi, sizeof(mbi)) != sizeof(mbi)) {
+    printf(
+        "TEST-FAILED | TestMMPolicy | "
+        "VirtualQuery(%p) failed - %08lx\n",
+        aRegion, GetLastError());
+    return false;
+  }
+
+  if (mbi.State != MEM_FREE) {
+    printf(
+        "TEST-FAILED | TestMMPolicy | "
+        "%p is not within a free region\n",
+        aRegion);
+    return false;
+  }
+
+  if (aRegion != mbi.BaseAddress ||
+      reinterpret_cast<uintptr_t>(mbi.BaseAddress) %
+          gPolicy.GetAllocGranularity()) {
+    printf(
+        "TEST-FAILED | TestMMPolicy | "
+        "%p is not a region's start address\n",
+        aRegion);
+    return false;
+  }
+
+  LPVOID allocated = VirtualAlloc(aRegion, aDesiredLen,
+                                  MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+  if (!allocated) {
+    printf(
+        "TEST-FAILED | TestMMPolicy | "
+        "VirtualAlloc(%p) failed - %08lx\n",
+        aRegion, GetLastError());
+    return false;
+  }
+
+  if (!VirtualFree(allocated, 0, MEM_RELEASE)) {
+    printf(
+        "TEST-FAILED | TestMMPolicy | "
+        "VirtualFree(%p) failed - %08lx\n",
+        allocated, GetLastError());
+    return false;
+  }
+
+  return true;
+}
+
+bool TestFindRegion() {
+  // Skip the near-null addresses
+  uint8_t* minAddr = reinterpret_cast<uint8_t*>(
+      std::max(gPolicy.GetAllocGranularity(), 0x1000000ul));
+  // 64bit address space is too large to deplete.  32bit space is enough.
+  uint8_t* maxAddr = reinterpret_cast<uint8_t*>(std::min(
+      gPolicy.GetMaxUserModeAddress(), static_cast<uintptr_t>(0xffffffff)));
+
+  // Keep one of the regions we allocate so that we can release it later.
+  void* lastResort = nullptr;
+
+  // Reserve all free regions in the range [minAddr, maxAddr]
+  for (uint8_t* address = minAddr; address <= maxAddr;) {
+    MEMORY_BASIC_INFORMATION mbi;
+    if (VirtualQuery(address, &mbi, sizeof(mbi)) != sizeof(mbi)) {
+      printf(
+          "TEST-FAILED | TestMMPolicy | "
+          "VirtualQuery(%p) failed - %08lx\n",
+          address, GetLastError());
+      break;
+    }
+
+    address = reinterpret_cast<uint8_t*>(mbi.BaseAddress);
+    if (mbi.State == MEM_FREE) {
+      DepleteVirtualAddress(address, mbi.RegionSize,
+                            [&lastResort](void* aAllocated) {
+                              // Pick the first address we allocate to make sure
+                              // FindRegion scans the full range.
+                              if (!lastResort) {
+                                lastResort = aAllocated;
+                              }
+                            });
+    }
+
+    address += mbi.RegionSize;
+  }
+
+  if (!lastResort) {
+    printf(
+        "TEST-SKIPPED | TestMMPolicy | "
+        "No free region in [%p - %p].  Skipping the testcase.\n",
+        minAddr, maxAddr);
+    return true;
+  }
+
+  // Make sure there are no free regions
+  PVOID freeRegion =
+      gPolicy.FindRegion(GetCurrentProcess(), 1, minAddr, maxAddr);
+  if (freeRegion) {
+    if (reinterpret_cast<uintptr_t>(freeRegion) %
+        gPolicy.GetAllocGranularity()) {
+      printf(
+          "TEST-FAILED | TestMMPolicy | "
+          "MMPolicyBase::FindRegion returned an unaligned address %p.\n",
+          freeRegion);
+      return false;
+    }
+
+    printf(
+        "TEST-SKIPPED | TestMMPolicy | "
+        "%p was freed after depletion.  Skipping the testcase.\n",
+        freeRegion);
+    return true;
+  }
+
+  // Free one region, and thus we can expect FindRegion finds this region
+  if (!VirtualFree(lastResort, 0, MEM_RELEASE)) {
+    printf(
+        "TEST-FAILED | TestMMPolicy | "
+        "VirtualFree(%p) failed - %08lx\n",
+        lastResort, GetLastError());
+    return false;
+  }
+  printf("The region starting from %p has been freed.\n", lastResort);
+
+  // Run the function several times because it uses a randon number inside
+  // and its result is nondeterministic.
+  for (int i = 0; i < 50; ++i) {
+    // Because one region was freed, a desire up to one region
+    // should be fulfilled.
+    const size_t desiredLengths[] = {1, gPolicy.GetAllocGranularity()};
+
+    for (auto desiredLen : desiredLengths) {
+      freeRegion =
+          gPolicy.FindRegion(GetCurrentProcess(), desiredLen, minAddr, maxAddr);
+      if (!freeRegion) {
+        printf(
+            "TEST-FAILED | TestMMPolicy | "
+            "Failed to find a free region.\n");
+        return false;
+      }
+
+      if (!ValidateFreeRegion(freeRegion, desiredLen)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+extern "C" int wmain(int argc, wchar_t* argv[]) {
+  if (!TestFindRegion()) {
+    return 1;
+  }
+
+  printf("TEST-PASS | TestMMPolicy | All tests passed.\n");
+  return 0;
+}
diff --git a/mozglue/tests/interceptor/moz.build b/mozglue/tests/interceptor/moz.build
new file mode 100644
index 0000000000..c179125cff
--- /dev/null
+++ b/mozglue/tests/interceptor/moz.build
@@ -0,0 +1,40 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+GeckoCppUnitTests(
+    [
+        "TestDllInterceptor",
+        "TestIATPatcher",
+        "TestMMPolicy",
+    ],
+    linkage=None,
+)
+
+if CONFIG["OS_TARGET"] == "WINNT" and CONFIG["CPU_ARCH"] in ("x86", "x86_64"):
+    # Cross-process interceptors not yet supported on aarch64
+    GeckoCppUnitTests(
+        [
+            "TestDllInterceptorCrossProcess",
+        ],
+        linkage=None,
+    )
+
+OS_LIBS += [
+    "ntdll",
+    "ole32",
+    "shlwapi",
+    "user32",
+]
+
+DELAYLOAD_DLLS += [
+    "shlwapi.dll",
+]
+
+if CONFIG["OS_TARGET"] == "WINNT" and CONFIG["CC_TYPE"] in ("gcc", "clang"):
+    # This allows us to use wmain as the entry point on mingw
+    LDFLAGS += [
+        "-municode",
+    ]
diff --git a/mozglue/tests/moz.build b/mozglue/tests/moz.build
new file mode 100644
index 0000000000..472e9d315d
--- /dev/null
+++ b/mozglue/tests/moz.build
@@ -0,0 +1,51 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DisableStlWrapping()
+
+# Important: for CppUnitTests to be run, they also need to be added
+# to testing/cppunittest.ini.
+
+GeckoCppUnitTests(
+    [
+        "ShowSSEConfig",
+    ],
+    linkage=None,
+)
+
+CppUnitTests(
+    [
+        "TestBaseProfiler",
+        "TestPrintf",
+    ]
+)
+
+with Files("TestBaseProfiler.cpp"):
+    BUG_COMPONENT = ("Core", "Gecko Profiler")
+
+if CONFIG["OS_ARCH"] == "WINNT":
+    GeckoCppUnitTests(
+        [
+            "TestNativeNt",
+            "TestPEExportSection",
+            "TestTimeStampWin",
+        ],
+        linkage=None,
+    )
+    TEST_DIRS += [
+        "interceptor",
+        "gtest",
+    ]
+    OS_LIBS += [
+        "ntdll",
+        "version",
+    ]
+
+if CONFIG["OS_TARGET"] == "WINNT" and CONFIG["CC_TYPE"] in ("gcc", "clang"):
+    # This allows us to use wmain as the entry point on mingw
+    LDFLAGS += [
+        "-municode",
+    ]